diff --git a/.gitignore b/.gitignore
index 86fd4562ab..3bab88f426 100644
--- a/.gitignore
+++ b/.gitignore
@@ -39,9 +39,7 @@
 /qmp-introspect.[ch]
 /qmp-marshal.c
 /qemu-doc.html
-/qemu-tech.html
 /qemu-doc.info
-/qemu-tech.info
 /qemu-img
 /qemu-nbd
 /qemu-options.def
@@ -53,7 +51,9 @@
 /qemu-bridge-helper
 /qemu-monitor.texi
 /qemu-monitor-info.texi
-/qmp-commands.txt
+/qemu-version.h
+/qemu-version.h.tmp
+/module_block.h
 /vscclient
 /fsdev/virtfs-proxy-helper
 *.[1-9]
diff --git a/.gitmodules b/.gitmodules
index b642eee600..ca323b4d87 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -22,12 +22,15 @@
 [submodule "roms/sgabios"]
 	path = roms/sgabios
 	url = git://git.qemu-project.org/sgabios.git
-[submodule "roms/u-boot"]
-	path = roms/u-boot
-	url = git://git.qemu-project.org/u-boot.git
-[submodule "dtc"]
-	path = dtc
-	url = git://git.qemu-project.org/dtc.git
 [submodule "pixman"]
 	path = pixman
 	url = git://anongit.freedesktop.org/pixman
+[submodule "dtc"]
+	path = dtc
+	url = git://git.qemu-project.org/dtc.git
+[submodule "roms/u-boot"]
+	path = roms/u-boot
+	url = git://git.qemu-project.org/u-boot.git
+[submodule "roms/skiboot"]
+	path = roms/skiboot
+	url = git://git.qemu.org/skiboot.git
diff --git a/.travis.yml b/.travis.yml
index f30b10e4f7..9916178bf3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,6 +9,7 @@ cache: ccache
 addons:
   apt:
     packages:
+      # Build dependencies
       - libaio-dev
       - libattr1-dev
       - libbrlapi-dev
@@ -89,6 +90,7 @@ matrix:
     - env: CONFIG=""
       os: osx
       compiler: clang
+    # Plain Trusty Build
     - env: CONFIG=""
       sudo: required
       addons:
@@ -99,3 +101,46 @@ matrix:
         - sudo apt-get build-dep -qq qemu
         - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
         - git submodule update --init --recursive
+    # Using newer GCC with sanitizers
+    - addons:
+        apt:
+          sources:
+            # PPAs for newer toolchains
+            - ubuntu-toolchain-r-test
+          packages:
+            # Extra toolchains
+            - gcc-5
+            - g++-5
+            # Build dependencies
+            - libaio-dev
+            - libattr1-dev
+            - libbrlapi-dev
+            - libcap-ng-dev
+            - libgnutls-dev
+            - libgtk-3-dev
+            - libiscsi-dev
+            - liblttng-ust-dev
+            - libnfs-dev
+            - libncurses5-dev
+            - libnss3-dev
+            - libpixman-1-dev
+            - libpng12-dev
+            - librados-dev
+            - libsdl1.2-dev
+            - libseccomp-dev
+            - libspice-protocol-dev
+            - libspice-server-dev
+            - libssh2-1-dev
+            - liburcu-dev
+            - libusb-1.0-0-dev
+            - libvte-2.90-dev
+            - sparse
+            - uuid-dev
+      language: generic
+      compiler: none
+      env:
+        - COMPILER_NAME=gcc CXX=g++-5 CC=gcc-5
+        - CONFIG="--cc=gcc-5 --cxx=g++-5 --disable-pie --disable-linux-user --with-coroutine=gthread"
+        - TEST_CMD=""
+      before_script:
+        - ./configure ${CONFIG} --extra-cflags="-g3 -O0 -fsanitize=thread -fuse-ld=gold" || cat config.log
diff --git a/CODING_STYLE b/CODING_STYLE
index e7fde15003..f53180bf3f 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -9,7 +9,7 @@ patches before submitting.
 Of course, the most important aspect in any coding style is whitespace.
 Crusty old coders who have trouble spotting the glasses on their noses
 can tell the difference between a tab and eight spaces from a distance
-of approximately fifteen parsecs.  Many a flamewar have been fought and
+of approximately fifteen parsecs.  Many a flamewar has been fought and
 lost on this issue.
 
 QEMU indents are four spaces.  Tabs are never used, except in Makefiles
diff --git a/MAINTAINERS b/MAINTAINERS
index b6fb84e826..4a605791fc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -63,6 +63,17 @@ W: http://wiki.qemu.org/SecurityProcess
 M: Michael S. Tsirkin <mst@redhat.com>
 L: secalert@redhat.com
 
+Trivial patches
+---------------
+Trivial patches
+M: Michael Tokarev <mjt@tls.msk.ru>
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
+L: qemu-trivial@nongnu.org
+K: ^Subject:.*(?i)trivial
+T: git git://git.corpit.ru/qemu.git trivial-patches
+T: git git://github.com/vivier/qemu.git trivial-patches
+
 Guest CPU cores (TCG):
 ----------------------
 Overall
@@ -83,6 +94,7 @@ F: include/exec/cpu*.h
 F: include/exec/exec-all.h
 F: include/exec/helper*.h
 F: include/exec/tb-hash.h
+F: include/sysemu/cpus.h
 
 FPU emulation
 M: Aurelien Jarno <aurelien@aurel32.net>
@@ -106,6 +118,7 @@ S: Maintained
 F: target-arm/
 F: hw/arm/
 F: hw/cpu/a*mpcore.c
+F: include/hw/cpu/a*mpcore.h
 F: disas/arm.c
 F: disas/arm-a64.cc
 F: disas/libvixl/
@@ -115,6 +128,7 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 S: Maintained
 F: target-cris/
 F: hw/cris/
+F: include/hw/cris/
 F: tests/tcg/cris/
 F: disas/cris.c
 
@@ -131,9 +145,10 @@ F: include/hw/lm32/
 F: tests/tcg/lm32/
 
 M68K
-S: Orphan
+M: Laurent Vivier <laurent@vivier.eu>
+S: Maintained
 F: target-m68k/
-F: hw/m68k/
+F: disas/m68k.c
 
 MicroBlaze
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -144,10 +159,17 @@ F: disas/microblaze.c
 
 MIPS
 M: Aurelien Jarno <aurelien@aurel32.net>
-M: Leon Alrae <leon.alrae@imgtec.com>
+M: Yongbok Kim <yongbok.kim@imgtec.com>
 S: Maintained
 F: target-mips/
 F: hw/mips/
+F: hw/misc/mips_*
+F: hw/intc/mips_gic.c
+F: hw/timer/mips_gictimer.c
+F: include/hw/mips/
+F: include/hw/misc/mips_*
+F: include/hw/intc/mips_gic.h
+F: include/hw/timer/mips_gictimer.h
 F: tests/tcg/mips/
 F: disas/mips.c
 
@@ -156,6 +178,8 @@ M: Anthony Green <green@moxielogic.com>
 S: Maintained
 F: target-moxie/
 F: disas/moxie.c
+F: hw/moxie/
+F: default-configs/moxie-softmmu.mak
 
 OpenRISC
 M: Jia Liu <proljc@gmail.com>
@@ -171,6 +195,7 @@ L: qemu-ppc@nongnu.org
 S: Maintained
 F: target-ppc/
 F: hw/ppc/
+F: include/hw/ppc/
 F: disas/ppc.c
 
 S390
@@ -187,6 +212,7 @@ S: Odd Fixes
 F: target-sh4/
 F: hw/sh4/
 F: disas/sh4.c
+F: include/hw/sh4/
 
 SPARC
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
@@ -202,6 +228,7 @@ M: Guan Xuetao <gxt@mprc.pku.edu.cn>
 S: Maintained
 F: target-unicore32/
 F: hw/unicore32/
+F: include/hw/unicore32/
 
 X86
 M: Paolo Bonzini <pbonzini@redhat.com>
@@ -225,6 +252,7 @@ M: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
 S: Maintained
 F: target-tricore/
 F: hw/tricore/
+F: include/hw/tricore/
 
 Guest CPU Cores (KVM):
 ----------------------
@@ -314,6 +342,9 @@ L: qemu-devel@nongnu.org
 M: Stefan Weil <sw@weilnetz.de>
 S: Maintained
 F: *win32*
+F: */*win32*
+F: include/*/*win32*
+X: qga/*win32*
 F: qemu.nsi
 
 ARM Machines
@@ -390,6 +421,7 @@ M: Peter Chubb <peter.chubb@nicta.com.au>
 L: qemu-arm@nongnu.org
 S: Odd fixes
 F: hw/*/imx*
+F: include/hw/*/imx*
 F: hw/arm/kzm.c
 F: include/hw/arm/fsl-imx31.h
 
@@ -398,6 +430,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/integratorcp.c
+F: hw/misc/arm_integrator_debug.c
 
 Musicpal
 M: Jan Kiszka <jan.kiszka@web.de>
@@ -422,6 +455,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/realview*
+F: hw/cpu/realview_mpcore.c
 F: hw/intc/realview_gic.c
 F: include/hw/intc/realview_gic.h
 
@@ -434,6 +468,7 @@ F: hw/arm/spitz.c
 F: hw/arm/tosa.c
 F: hw/arm/z2.c
 F: hw/*/pxa2xx*
+F: hw/misc/mst_fpga.c
 F: include/hw/arm/pxa.h
 
 Stellaris
@@ -455,8 +490,8 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/xilinx_*
 F: hw/*/cadence_*
-F: hw/misc/zynq_slcr.c
-F: include/hw/xilinx.h
+F: hw/misc/zynq*
+F: include/hw/misc/zynq*
 X: hw/ssi/xilinx_*
 
 Xilinx ZynqMP
@@ -465,7 +500,7 @@ M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/xlnx*.c
-F: include/hw/*/xlnx*.c
+F: include/hw/*/xlnx*.h
 
 ARM ACPI Subsystem
 M: Shannon Zhao <zhaoshenglong@huawei.com>
@@ -475,6 +510,21 @@ S: Maintained
 F: hw/arm/virt-acpi-build.c
 F: include/hw/arm/virt-acpi-build.h
 
+STM32F205
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/arm/stm32f205_soc.c
+F: hw/misc/stm32f2xx_syscfg.c
+F: hw/char/stm32f2xx_usart.c
+F: hw/timer/stm32f2xx_timer.c
+F: hw/adc/*
+F: hw/ssi/stm32f2xx_spi.c
+
+Netduino 2
+M: Alistair Francis <alistair@alistair23.me>
+S: Maintained
+F: hw/arm/netduino2.c
+
 CRIS Machines
 -------------
 Axis Dev88
@@ -500,6 +550,7 @@ M68K Machines
 an5206
 S: Orphan
 F: hw/m68k/an5206.c
+F: hw/m68k/mcf5206.c
 
 dummy_m68k
 S: Orphan
@@ -508,6 +559,9 @@ F: hw/m68k/dummy_m68k.c
 mcf5208
 S: Orphan
 F: hw/m68k/mcf5208.c
+F: hw/m68k/mcf_intc.c
+F: hw/char/mcf_uart.c
+F: hw/net/mcf_fec.c
 
 MicroBlaze Machines
 -------------------
@@ -571,6 +625,9 @@ L: qemu-ppc@nongnu.org
 S: Supported
 F: hw/ppc/e500.[hc]
 F: hw/ppc/e500plat.c
+F: include/hw/ppc/ppc_e500.h
+F: include/hw/pci-host/ppce500.h
+F: pc-bios/u-boot.e500
 
 mpc8544ds
 M: Alexander Graf <agraf@suse.de>
@@ -588,6 +645,8 @@ F: hw/ppc/mac_newworld.c
 F: hw/pci-host/uninorth.c
 F: hw/pci-bridge/dec.[hc]
 F: hw/misc/macio/
+F: include/hw/ppc/mac_dbdma.h
+F: hw/nvram/mac_nvram.c
 
 Old World
 M: Alexander Graf <agraf@suse.de>
@@ -596,6 +655,7 @@ S: Maintained
 F: hw/ppc/mac_oldworld.c
 F: hw/pci-host/grackle.c
 F: hw/misc/macio/
+F: hw/intc/heathrow_pic.c
 
 PReP
 L: qemu-devel@nongnu.org
@@ -604,6 +664,7 @@ S: Odd Fixes
 F: hw/ppc/prep.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]
+F: pc-bios/ppc_rom.bin
 
 sPAPR
 M: David Gibson <david@gibson.dropbear.id.au>
@@ -615,6 +676,15 @@ F: include/hw/*/spapr*
 F: hw/*/xics*
 F: include/hw/*/xics*
 F: pc-bios/spapr-rtas/*
+F: pc-bios/spapr-rtas.bin
+F: pc-bios/slof.bin
+F: pc-bios/skiboot.lid
+F: docs/specs/ppc-spapr-hcalls.txt
+F: docs/specs/ppc-spapr-hotplug.txt
+F: tests/spapr*
+F: tests/libqos/*spapr*
+F: tests/rtas*
+F: tests/libqos/rtas*
 
 virtex_ml507
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -628,31 +698,40 @@ R2D
 M: Magnus Damm <magnus.damm@gmail.com>
 S: Maintained
 F: hw/sh4/r2d.c
+F: hw/intc/sh_intc.c
+F: hw/timer/sh_timer.c
 
 Shix
 M: Magnus Damm <magnus.damm@gmail.com>
-S: Orphan
+S: Odd Fixes
 F: hw/sh4/shix.c
 
 SPARC Machines
 --------------
 Sun4m
-M: Blue Swirl <blauwirbel@gmail.com>
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 S: Maintained
 F: hw/sparc/sun4m.c
+F: hw/dma/sparc32_dma.c
+F: hw/dma/sun4m_iommu.c
+F: hw/misc/eccmemctl.c
+F: hw/misc/slavio_misc.c
+F: include/hw/sparc/sparc32_dma.h
+F: include/hw/sparc/sun4m.h
+F: pc-bios/openbios-sparc32
 
 Sun4u
-M: Blue Swirl <blauwirbel@gmail.com>
 M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
 S: Maintained
 F: hw/sparc64/sun4u.c
+F: pc-bios/openbios-sparc64
 
 Leon3
 M: Fabien Chouteau <chouteau@adacore.com>
 S: Maintained
 F: hw/sparc/leon3.c
 F: hw/*/grlib*
+F: include/hw/sparc/grlib.h
 
 S390 Machines
 -------------
@@ -666,6 +745,9 @@ F: hw/s390x/
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
 F: hw/watchdog/wdt_diag288.c
+F: include/hw/watchdog/wdt_diag288.h
+F: pc-bios/s390-ccw.img
+F: default-configs/s390x-softmmu.mak
 T: git git://github.com/cohuck/qemu.git s390-next
 T: git git://github.com/borntraeger/qemu.git s390-next
 
@@ -695,7 +777,7 @@ F: hw/i2c/smbus_ich9.c
 F: hw/acpi/piix4.c
 F: hw/acpi/ich9.c
 F: include/hw/acpi/ich9.h
-F: include/hw/acpi/piix.h
+F: include/hw/acpi/piix4.h
 F: hw/misc/sga.c
 
 PC Chipset
@@ -715,6 +797,10 @@ F: hw/misc/pc-testdev.c
 F: hw/timer/hpet*
 F: hw/timer/i8254*
 F: hw/timer/mc146818rtc*
+F: include/hw/i2c/pm_smbus.h
+F: include/hw/timer/hpet.h
+F: include/hw/timer/i8254*
+F: include/hw/timer/mc146818rtc*
 
 Machine core
 M: Eduardo Habkost <ehabkost@redhat.com>
@@ -748,6 +834,7 @@ M: John Snow <jsnow@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: include/hw/ide.h
+F: include/hw/ide/
 F: hw/ide/
 F: hw/block/block.c
 F: hw/block/cdrom.c
@@ -797,16 +884,15 @@ F: hw/mem/*
 F: hw/acpi/*
 F: hw/smbios/*
 F: hw/i386/acpi-build.[hc]
-F: hw/i386/*dsl
 F: hw/arm/virt-acpi-build.c
 F: include/hw/arm/virt-acpi-build.h
-F: scripts/acpi*py
 
 ppc4xx
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/ppc/ppc4*.c
+F: include/hw/ppc/ppc4xx.h
 
 ppce500
 M: Alexander Graf <agraf@suse.de>
@@ -826,13 +912,15 @@ Network devices
 M: Jason Wang <jasowang@redhat.com>
 S: Odd Fixes
 F: hw/net/
+F: tests/virtio-net-test.c
 T: git git://github.com/jasowang/qemu.git net
 
 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
-F: include/hw/scsi*
+F: include/hw/scsi/*
 F: hw/scsi/*
+F: tests/virtio-scsi-test.c
 T: git git://github.com/bonzini/qemu.git scsi-next
 
 LSI53C895A
@@ -883,8 +971,11 @@ virtio
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*
+F: hw/virtio/Makefile.objs
+F: hw/virtio/trace-events
 F: net/vhost-user.c
 F: include/hw/virtio/
+F: tests/virtio-balloon-test.c
 
 virtio-9p
 M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
@@ -902,7 +993,7 @@ L: qemu-block@nongnu.org
 S: Supported
 F: hw/block/virtio-blk.c
 F: hw/block/dataplane/*
-F: hw/virtio/dataplane/*
+F: tests/virtio-blk-test.c
 T: git git://github.com/stefanha/qemu.git block
 
 virtio-ccw
@@ -925,6 +1016,8 @@ S: Supported
 F: hw/char/virtio-serial-bus.c
 F: hw/char/virtio-console.c
 F: include/hw/virtio/virtio-serial.h
+F: tests/virtio-console-test.c
+F: tests/virtio-serial-test.c
 
 virtio-rng
 M: Amit Shah <amit.shah@redhat.com>
@@ -933,6 +1026,14 @@ F: hw/virtio/virtio-rng.c
 F: include/hw/virtio/virtio-rng.h
 F: include/sysemu/rng*.h
 F: backends/rng*.c
+F: tests/virtio-rng-test.c
+
+virtio-crypto
+M: Gonglei <arei.gonglei@huawei.com>
+S: Supported
+F: hw/virtio/virtio-crypto.c
+F: hw/virtio/virtio-crypto-pci.c
+F: include/hw/virtio/virtio-crypto.h
 
 nvme
 M: Keith Busch <keith.busch@intel.com>
@@ -966,6 +1067,8 @@ Rocker
 M: Jiri Pirko <jiri@resnulli.us>
 S: Maintained
 F: hw/net/rocker/
+F: tests/rocker/
+F: docs/specs/rocker.txt
 
 NVDIMM
 M: Xiao Guangrong <guangrong.xiao@linux.intel.com>
@@ -984,6 +1087,19 @@ M: Dmitry Fleytman <dmitry@daynix.com>
 S: Maintained
 F: hw/net/e1000e*
 
+Generic Loader
+M: Alistair Francis <alistair.francis@xilinx.com>
+S: Maintained
+F: hw/core/generic-loader.c
+F: include/hw/core/generic-loader.h
+
+CHRP NVRAM
+M: Thomas Huth <thuth@redhat.com>
+S: Maintained
+F: hw/nvram/chrp_nvram.c
+F: include/hw/nvram/chrp_nvram.h
+F: tests/prom-env-test.c
+
 Subsystems
 ----------
 Audio
@@ -991,6 +1107,7 @@ M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: audio/
 F: hw/audio/
+F: include/hw/audio/
 F: tests/ac97-test.c
 F: tests/es1370-test.c
 F: tests/intel-hda-test.c
@@ -1041,6 +1158,20 @@ F: block/qapi.c
 F: qapi/block*.json
 T: git git://repo.or.cz/qemu/armbru.git block-next
 
+Dirty Bitmaps
+M: Fam Zheng <famz@redhat.com>
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: util/hbitmap.c
+F: block/dirty-bitmap.c
+F: include/qemu/hbitmap.h
+F: include/block/dirty-bitmap.h
+F: tests/test-hbitmap.c
+F: docs/bitmaps.md
+T: git git://github.com/famz/qemu.git bitmaps
+T: git git://github.com/jnsnow/qemu.git bitmaps
+
 Character device backends
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
@@ -1064,12 +1195,6 @@ S: Supported
 F: qom/cpu.c
 F: include/qom/cpu.h
 
-ICC Bus
-M: Igor Mammedov <imammedo@redhat.com>
-S: Supported
-F: include/hw/cpu/icc_bus.h
-F: hw/cpu/icc_bus.c
-
 Device Tree
 M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Alexander Graf <agraf@suse.de>
@@ -1131,12 +1256,12 @@ F: qemu-timer.c
 F: vl.c
 
 Human Monitor (HMP)
-M: Luiz Capitulino <lcapitulino@redhat.com>
+M: Dr. David Alan Gilbert <dgilbert@redhat.com>
 S: Maintained
 F: monitor.c
-F: hmp.c
-F: hmp-commands.hx
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+F: hmp.[ch]
+F: hmp-commands*.hx
+F: include/monitor/hmp-target.h
 
 Network device backends
 M: Jason Wang <jasowang@redhat.com>
@@ -1176,6 +1301,12 @@ S: Maintained
 F: backends/hostmem*.c
 F: include/sysemu/hostmem.h
 
+Cryptodev Backends
+M: Gonglei <arei.gonglei@huawei.com>
+S: Maintained
+F: include/sysemu/cryptodev*.h
+F: backends/cryptodev*.c
+
 QAPI
 M: Markus Armbruster <armbru@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1201,8 +1332,8 @@ F: qapi/*.json
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
 
 QObject
-M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
 F: qobject/
 F: include/qapi/qmp/
 X: include/qapi/qmp/dispatch.h
@@ -1212,7 +1343,7 @@ F: tests/check-qint.c
 F: tests/check-qjson.c
 F: tests/check-qlist.c
 F: tests/check-qstring.c
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+T: git git://repo.or.cz/qemu/armbru.git qapi-next
 
 QEMU Guest Agent
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1237,7 +1368,6 @@ M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: qmp.c
 F: monitor.c
-F: qmp-commands.hx
 F: docs/*qmp-*
 F: scripts/qmp/
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
@@ -1257,6 +1387,11 @@ F: net/slirp.c
 F: include/net/slirp.h
 T: git git://git.kiszka.org/qemu.git queues/slirp
 
+Stubs
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: stubs/
+
 Tracing
 M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
@@ -1330,6 +1465,30 @@ F: include/qemu/throttle.h
 F: util/throttle.c
 L: qemu-block@nongnu.org
 
+UUID
+M: Fam Zheng <famz@redhat.com>
+S: Supported
+F: util/uuid.c
+F: include/qemu/uuid.h
+F: tests/test-uuid.c
+
+COLO Framework
+M: zhanghailiang <zhang.zhanghailiang@huawei.com>
+S: Maintained
+F: migration/colo*
+F: include/migration/colo.h
+F: include/migration/failover.h
+F: docs/COLO-FT.txt
+
+COLO Proxy
+M: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+M: Li Zhijian <lizhijian@cn.fujitsu.com>
+S: Supported
+F: docs/colo-proxy.txt
+F: net/colo*
+F: net/filter-rewriter.c
+F: net/filter-mirror.c
+
 Usermode Emulation
 ------------------
 Overall
@@ -1341,11 +1500,13 @@ F: user-exec.c
 BSD user
 S: Orphan
 F: bsd-user/
+F: default-configs/*-bsd-user.mak
 
 Linux user
 M: Riku Voipio <riku.voipio@iki.fi>
 S: Maintained
 F: linux-user/
+F: default-configs/*-linux-user.mak
 
 Tiny Code Generator (TCG)
 -------------------------
@@ -1389,8 +1550,8 @@ F: tcg/mips/
 F: disas/mips.c
 
 PPC
-M: Vassili Karpov (malc) <av1474@comtv.ru>
-S: Maintained
+M: Richard Henderson <rth@twiddle.net>
+S: Odd Fixes
 F: tcg/ppc/
 F: disas/ppc.c
 
@@ -1413,28 +1574,6 @@ F: tcg/tci/
 F: tci.c
 F: disas/tci.c
 
-Stable branches
----------------
-Stable 1.0
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-1.0.git
-S: Orphan
-
-Stable 0.15
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Orphan
-
-Stable 0.14
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.14.git
-S: Orphan
-
-Stable 0.10
-L: qemu-stable@nongnu.org
-T: git git://git.qemu-project.org/qemu-stable-0.10.git
-S: Orphan
-
 Block drivers
 -------------
 VMDK
@@ -1580,7 +1719,7 @@ M: Kevin Wolf <kwolf@redhat.com>
 L: qemu-block@nongnu.org
 S: Supported
 F: block/linux-aio.c
-F: block/raw-aio.h
+F: include/block/raw-aio.h
 F: block/raw-posix.c
 F: block/raw-win32.c
 F: block/raw_bsd.c
@@ -1624,6 +1763,15 @@ L: qemu-block@nongnu.org
 S: Supported
 F: tests/image-fuzzer/
 
+Replication
+M: Wen Congyang <wency@cn.fujitsu.com>
+M: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+S: Supported
+F: replication*
+F: block/replication.c
+F: tests/test-replication.c
+F: docs/block-replication.txt
+
 Build and test automation
 -------------------------
 M: Alex Bennée <alex.bennee@linaro.org>
diff --git a/Makefile b/Makefile
index d74f30ad27..3786932931 100644
--- a/Makefile
+++ b/Makefile
@@ -56,9 +56,6 @@ GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
 GENERATED_HEADERS += qmp-introspect.h
 GENERATED_SOURCES += qmp-introspect.c
 
-GENERATED_HEADERS += trace/generated-events.h
-GENERATED_SOURCES += trace/generated-events.c
-
 GENERATED_HEADERS += trace/generated-tracers.h
 ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
 GENERATED_HEADERS += trace/generated-tracers-dtrace.h
@@ -76,6 +73,8 @@ GENERATED_HEADERS += trace/generated-ust-provider.h
 GENERATED_SOURCES += trace/generated-ust.c
 endif
 
+GENERATED_HEADERS += module_block.h
+
 # Don't try to regenerate Makefile or configure
 # We don't generate any of them
 Makefile: ;
@@ -91,8 +90,7 @@ LIBS+=-lz $(LIBS_TOOLS)
 HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
 
 ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
-DOCS+=qmp-commands.txt
+DOCS=qemu-doc.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
 ifdef CONFIG_VIRTFS
 DOCS+=fsdev/virtfs-proxy-helper.1
 endif
@@ -106,20 +104,20 @@ SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %-config-devices.mak.d, $(TARGET_DIRS))
 
 ifeq ($(SUBDIR_DEVICES_MAK),)
 config-all-devices.mak:
-	$(call quiet-command,echo '# no devices' > $@,"  GEN   $@")
+	$(call quiet-command,echo '# no devices' > $@,"GEN","$@")
 else
 config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
 	$(call quiet-command, sed -n \
              's|^\([^=]*\)=\(.*\)$$|\1:=$$(findstring y,$$(\1)\2)|p' \
              $(SUBDIR_DEVICES_MAK) | sort -u > $@, \
-             "  GEN   $@")
+             "GEN","$@")
 endif
 
 -include $(SUBDIR_DEVICES_MAK_DEP)
 
 %/config-devices.mak: default-configs/%.mak $(SRC_PATH)/scripts/make_device_config.sh
 	$(call quiet-command, \
-            $(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $< $*-config-devices.mak.d $@ > $@.tmp, "  GEN   $@.tmp")
+            $(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $< $*-config-devices.mak.d $@ > $@.tmp,"GEN","$@.tmp")
 	$(call quiet-command, if test -f $@; then \
 	  if cmp -s $@.old $@; then \
 	    mv $@.tmp $@; \
@@ -136,7 +134,7 @@ endif
 	 else \
 	  mv $@.tmp $@; \
 	  cp -p $@ $@.old; \
-	 fi, "  GEN   $@");
+	 fi,"GEN","$@");
 
 defconfig:
 	rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)
@@ -190,7 +188,7 @@ qemu-version.h: FORCE
 config-host.h: config-host.h-timestamp
 config-host.h-timestamp: config-host.mak
 qemu-options.def: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
 
 SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
 SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))
@@ -234,9 +232,9 @@ ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
 recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
 
 $(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.o")
+	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.o")
 $(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
-	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"  RC    version.lo")
+	$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<,"RC","version.lo")
 
 Makefile: $(version-obj-y) $(version-lobj-y)
 
@@ -246,9 +244,6 @@ Makefile: $(version-obj-y) $(version-lobj-y)
 libqemustub.a: $(stub-obj-y)
 libqemuutil.a: $(util-obj-y)
 
-block-modules = $(foreach o,$(block-obj-m),"$(basename $(subst /,-,$o))",) NULL
-util/module.o-cflags = -D'CONFIG_BLOCK_MODULES=$(block-modules)'
-
 ######################################################################
 
 qemu-img.o: qemu-img-cmds.h
@@ -263,7 +258,7 @@ fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal
 fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
 
 qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$@")
 
 qemu-ga$(EXESUF): LIBS = $(LIBS_QGA)
 qemu-ga$(EXESUF): QEMU_CFLAGS += -I qga/qapi-generated
@@ -276,17 +271,17 @@ qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
 		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 qga/qapi-generated/qga-qapi-visit.c qga/qapi-generated/qga-qapi-visit.h :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
 		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 qga/qapi-generated/qga-qmp-commands.h qga/qapi-generated/qga-qmp-marshal.c :\
 $(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
 		$(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 
 qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
                $(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
@@ -298,27 +293,27 @@ qapi-types.c qapi-types.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
 		$(gen-out-type) -o "." -b $<, \
-		"  GEN   $@")
+		"GEN","$@")
 qapi-visit.c qapi-visit.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
 		$(gen-out-type) -o "." -b $<, \
-		"  GEN   $@")
+		"GEN","$@")
 qapi-event.c qapi-event.h :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-event.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-event.py \
 		$(gen-out-type) -o "." $<, \
-		"  GEN   $@")
+		"GEN","$@")
 qmp-commands.h qmp-marshal.c :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
-		$(gen-out-type) -o "." -m $<, \
-		"  GEN   $@")
+		$(gen-out-type) -o "." $<, \
+		"GEN","$@")
 qmp-introspect.h qmp-introspect.c :\
 $(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-introspect.py \
 		$(gen-out-type) -o "." $<, \
-		"  GEN   $@")
+		"GEN","$@")
 
 QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
 $(qga-obj-y) qemu-ga.o: $(QGALIB_GEN)
@@ -337,7 +332,7 @@ $(QEMU_GA_MSI): config-host.mak
 
 $(QEMU_GA_MSI):  $(SRC_PATH)/qga/installer/qemu-ga.wxs
 	$(call quiet-command,QEMU_GA_VERSION="$(QEMU_GA_VERSION)" QEMU_GA_MANUFACTURER="$(QEMU_GA_MANUFACTURER)" QEMU_GA_DISTRO="$(QEMU_GA_DISTRO)" BUILD_DIR="$(BUILD_DIR)" \
-	wixl -o $@ $(QEMU_GA_MSI_ARCH) $(QEMU_GA_MSI_WITH_VSS) $(QEMU_GA_MSI_MINGW_DLL_PATH) $<, "  WIXL  $@")
+	wixl -o $@ $(QEMU_GA_MSI_ARCH) $(QEMU_GA_MSI_WITH_VSS) $(QEMU_GA_MSI_MINGW_DLL_PATH) $<,"WIXL","$@")
 else
 msi:
 	@echo "MSI build not configured or dependency resolution failed (reconfigure with --enable-guest-agent-msi option)"
@@ -353,6 +348,11 @@ ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
 ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
 	$(call LINK, $^)
 
+module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
+	$(call quiet-command,$(PYTHON) $< $@ \
+	$(addprefix $(SRC_PATH)/,$(patsubst %.mo,%.c,$(block-obj-m))), \
+	"GEN","$@")
+
 clean:
 # avoid old build problems by removing potentially incorrect old files
 	rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
@@ -395,7 +395,6 @@ distclean: clean
 	rm -f qemu-doc.vr
 	rm -f config.log
 	rm -f linux-headers/asm
-	rm -f qemu-tech.info qemu-tech.aux qemu-tech.cp qemu-tech.dvi qemu-tech.fn qemu-tech.info qemu-tech.ky qemu-tech.log qemu-tech.pdf qemu-tech.pg qemu-tech.toc qemu-tech.tp qemu-tech.vr
 	for d in $(TARGET_DIRS); do \
 	rm -rf $$d || exit 1 ; \
         done
@@ -422,7 +421,7 @@ qemu-icon.bmp qemu_logo_no_text.svg \
 bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
 multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
 s390-ccw.img \
-spapr-rtas.bin slof.bin \
+spapr-rtas.bin slof.bin skiboot.lid \
 palcode-clipper \
 u-boot.e500
 else
@@ -431,8 +430,8 @@ endif
 
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) qemu-doc.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) $(SRC_PATH)/docs/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 "$(DESTDIR)$(mandir)/man1"
@@ -452,7 +451,7 @@ endif
 
 install-pdf: pdf
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
-	$(INSTALL_DATA) qemu-doc.pdf  qemu-tech.pdf "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) qemu-doc.pdf "$(DESTDIR)$(qemu_docdir)"
 
 install-datadir:
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_datadir)"
@@ -522,13 +521,13 @@ ui/shader/%-vert.h: $(SRC_PATH)/ui/shader/%.vert $(SRC_PATH)/scripts/shaderinclu
 	@mkdir -p $(dir $@)
 	$(call quiet-command,\
 		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  VERT  $@")
+		"VERT","$@")
 
 ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclude.pl
 	@mkdir -p $(dir $@)
 	$(call quiet-command,\
 		perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
-		"  FRAG  $@")
+		"FRAG","$@")
 
 ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
 	ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h
@@ -538,68 +537,65 @@ MAKEINFO=makeinfo
 MAKEINFOFLAGS=--no-headers --no-split --number-sections
 TEXIFLAG=$(if $(V),,--quiet)
 %.dvi: %.texi
-	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"  GEN   $@")
+	$(call quiet-command,texi2dvi $(TEXIFLAG) -I . $<,"GEN","$@")
 
 %.html: %.texi
 	$(call quiet-command,LC_ALL=C $(MAKEINFO) $(MAKEINFOFLAGS) --html $< -o $@, \
-	"  GEN   $@")
+	"GEN","$@")
 
 %.info: %.texi
-	$(call quiet-command,$(MAKEINFO) $< -o $@,"  GEN   $@")
+	$(call quiet-command,$(MAKEINFO) $< -o $@,"GEN","$@")
 
 %.pdf: %.texi
-	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"  GEN   $@")
+	$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<,"GEN","$@")
 
 qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
 qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
 qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
-
-qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
 qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"  GEN   $@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@,"GEN","$@")
 
 qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
-	  "  GEN   $@")
+	  "GEN","$@")
 qemu.1: qemu-option-trace.texi
 
 qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
-	  "  GEN   $@")
+	  "GEN","$@")
 
 fsdev/virtfs-proxy-helper.1: fsdev/virtfs-proxy-helper.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< fsdev/virtfs-proxy-helper.pod && \
 	  $(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
-	  "  GEN   $@")
+	  "GEN","$@")
 
 qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
-	  "  GEN   $@")
+	  "GEN","$@")
 
 qemu-ga.8: qemu-ga.texi
 	$(call quiet-command, \
 	  perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
 	  $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
-	  "  GEN   $@")
+	  "GEN","$@")
 
-dvi: qemu-doc.dvi qemu-tech.dvi
-html: qemu-doc.html qemu-tech.html
-info: qemu-doc.info qemu-tech.info
-pdf: qemu-doc.pdf qemu-tech.pdf
+dvi: qemu-doc.dvi
+html: qemu-doc.html
+info: qemu-doc.info
+pdf: qemu-doc.pdf
 
 qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
 	qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
@@ -673,3 +669,40 @@ endif
 -include $(wildcard *.d tests/*.d)
 
 include $(SRC_PATH)/tests/docker/Makefile.include
+
+.PHONY: help
+help:
+	@echo  'Generic targets:'
+	@echo  '  all             - Build all'
+	@echo  '  dir/file.o      - Build specified target only'
+	@echo  '  install         - Install QEMU, documentation and tools'
+	@echo  '  ctags/TAGS      - Generate tags file for editors'
+	@echo  '  cscope          - Generate cscope index'
+	@echo  ''
+	@$(if $(TARGET_DIRS), \
+		echo 'Architecture specific targets:'; \
+		$(foreach t, $(TARGET_DIRS), \
+		printf "  %-30s - Build for %s\\n" $(patsubst %,subdir-%,$(t)) $(t);) \
+		echo '')
+	@echo  'Cleaning targets:'
+	@echo  '  clean           - Remove most generated files but keep the config'
+	@echo  '  distclean       - Remove all generated files'
+	@echo  '  dist            - Build a distributable tarball'
+	@echo  ''
+	@echo  'Test targets:'
+	@echo  '  check           - Run all tests (check-help for details)'
+	@echo  '  docker          - Help about targets running tests inside Docker containers'
+	@echo  ''
+	@echo  'Documentation targets:'
+	@echo  '  dvi html info pdf'
+	@echo  '                  - Build documentation in specified format'
+	@echo  ''
+ifdef CONFIG_WIN32
+	@echo  'Windows targets:'
+	@echo  '  installer       - Build NSIS-based installer for QEMU'
+ifdef QEMU_GA_MSI_ENABLED
+	@echo  '  msi             - Build MSI-based installer for qemu-ga'
+endif
+	@echo  ''
+endif
+	@echo  '  make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
diff --git a/Makefile.objs b/Makefile.objs
index ecdc9452e1..ac70be0f0b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -16,6 +16,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
+block-obj-$(CONFIG_REPLICATION) += replication.o
 
 block-obj-m = block/
 
@@ -97,7 +98,7 @@ endif
 
 #######################################################################
 # Target-independent parts used in system and user emulation
-common-obj-y += tcg-runtime.o
+common-obj-y += cpus-common.o
 common-obj-y += hw/
 common-obj-y += qom/
 common-obj-y += disas/
@@ -150,6 +151,7 @@ trace-events-y += hw/dma/trace-events
 trace-events-y += hw/sparc/trace-events
 trace-events-y += hw/sd/trace-events
 trace-events-y += hw/isa/trace-events
+trace-events-y += hw/mem/trace-events
 trace-events-y += hw/i386/trace-events
 trace-events-y += hw/9pfs/trace-events
 trace-events-y += hw/ppc/trace-events
@@ -162,9 +164,11 @@ trace-events-y += hw/alpha/trace-events
 trace-events-y += ui/trace-events
 trace-events-y += audio/trace-events
 trace-events-y += net/trace-events
+trace-events-y += target-arm/trace-events
 trace-events-y += target-i386/trace-events
 trace-events-y += target-sparc/trace-events
 trace-events-y += target-s390x/trace-events
 trace-events-y += target-ppc/trace-events
 trace-events-y += qom/trace-events
 trace-events-y += linux-user/trace-events
+trace-events-y += qapi/trace-events
diff --git a/Makefile.target b/Makefile.target
index a440bcb5b8..7a5080e94a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -26,7 +26,7 @@ ifneq (,$(findstring -mwindows,$(libs_softmmu)))
 # Terminate program name with a 'w' because the linker builds a windows executable.
 QEMU_PROGW=qemu-system-$(TARGET_NAME)w$(EXESUF)
 $(QEMU_PROG): $(QEMU_PROGW)
-	$(call quiet-command,$(OBJCOPY) --subsystem console $(QEMU_PROGW) $(QEMU_PROG),"  GEN   $(TARGET_DIR)$(QEMU_PROG)")
+	$(call quiet-command,$(OBJCOPY) --subsystem console $(QEMU_PROGW) $(QEMU_PROG),"GEN","$(TARGET_DIR)$(QEMU_PROG)")
 QEMU_PROG_BUILD = $(QEMU_PROGW)
 else
 QEMU_PROG_BUILD = $(QEMU_PROG)
@@ -55,7 +55,7 @@ $(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
 		--binary=$(bindir)/$(QEMU_PROG) \
 		--target-name=$(TARGET_NAME) \
 		--target-type=$(TARGET_TYPE) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp-installed")
+		$< > $@,"GEN","$(TARGET_DIR)$(QEMU_PROG).stp-installed")
 
 $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
@@ -64,14 +64,14 @@ $(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
 		--binary=$(realpath .)/$(QEMU_PROG) \
 		--target-name=$(TARGET_NAME) \
 		--target-type=$(TARGET_TYPE) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG).stp")
+		$< > $@,"GEN","$(TARGET_DIR)$(QEMU_PROG).stp")
 
 $(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
 		--format=simpletrace-stap \
 		--backends=$(TRACE_BACKENDS) \
 		--probe-prefix=qemu.$(TARGET_TYPE).$(TARGET_NAME) \
-		< $< > $@,"  GEN   $(TARGET_DIR)$(QEMU_PROG)-simpletrace.stp")
+		$< > $@,"GEN","$(TARGET_DIR)$(QEMU_PROG)-simpletrace.stp")
 
 else
 stap:
@@ -94,6 +94,7 @@ obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
 obj-y += fpu/softfloat.o
 obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
+obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 
@@ -156,7 +157,7 @@ else
 obj-y += hw/$(TARGET_BASE_ARCH)/
 endif
 
-GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h
+GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h
 
 endif # CONFIG_SOFTMMU
 
@@ -196,26 +197,23 @@ $(QEMU_PROG_BUILD): config-devices.mak
 $(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
 	$(call LINK, $(filter-out %.mak, $^))
 ifdef CONFIG_DARWIN
-	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"  REZ   $(TARGET_DIR)$@")
-	$(call quiet-command,SetFile -a C $@,"  SETFILE $(TARGET_DIR)$@")
+	$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@,"REZ","$(TARGET_DIR)$@")
+	$(call quiet-command,SetFile -a C $@,"SETFILE","$(TARGET_DIR)$@")
 endif
 
 gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
-	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"  GEN   $(TARGET_DIR)$@")
+	$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES),"GEN","$(TARGET_DIR)$@")
 
 hmp-commands.h: $(SRC_PATH)/hmp-commands.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$(TARGET_DIR)$@")
 
 hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
+	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"GEN","$(TARGET_DIR)$@")
 
-qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
-	$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@,"  GEN   $(TARGET_DIR)$@")
-
-clean:
+clean: clean-target
 	rm -f *.a *~ $(PROGS)
 	rm -f $(shell find . -name '*.[od]')
-	rm -f hmp-commands.h qmp-commands-old.h gdbstub-xml.c
+	rm -f hmp-commands.h gdbstub-xml.c
 ifdef CONFIG_TRACE_SYSTEMTAP
 	rm -f *.stp
 endif
diff --git a/README b/README
index f38193fc67..bd8060a3ee 100644
--- a/README
+++ b/README
@@ -42,8 +42,6 @@ of other UNIX targets. The simple steps to build QEMU are:
   ../configure
   make
 
-Complete details of the process for building and configuring QEMU for
-all supported host platforms can be found in the qemu-tech.html file.
 Additional information can also be found online via the QEMU website:
 
   http://qemu-project.org/Hosts/Linux
diff --git a/VERSION b/VERSION
index 24ba9a38de..834f262953 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.7.0
+2.8.0
diff --git a/accel.c b/accel.c
index 403eb5e94d..664bb88422 100644
--- a/accel.c
+++ b/accel.c
@@ -33,7 +33,6 @@
 #include "sysemu/qtest.h"
 #include "hw/xen/xen.h"
 #include "qom/object.h"
-#include "hw/boards.h"
 
 int tcg_tb_size;
 static bool tcg_allowed = true;
diff --git a/aio-posix.c b/aio-posix.c
index 43162a9f29..e13b9ab2b0 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -81,29 +81,22 @@ static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
 {
     struct epoll_event event;
     int r;
+    int ctl;
 
     if (!ctx->epoll_enabled) {
         return;
     }
     if (!node->pfd.events) {
-        r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
-        if (r) {
-            aio_epoll_disable(ctx);
-        }
+        ctl = EPOLL_CTL_DEL;
     } else {
         event.data.ptr = node;
         event.events = epoll_events_from_pfd(node->pfd.events);
-        if (is_new) {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        } else {
-            r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
-            if (r) {
-                aio_epoll_disable(ctx);
-            }
-        }
+        ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
+    }
+
+    r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
+    if (r) {
+        aio_epoll_disable(ctx);
     }
 }
 
@@ -217,21 +210,23 @@ void aio_set_fd_handler(AioContext *ctx,
 
     /* Are we deleting the fd handler? */
     if (!io_read && !io_write) {
-        if (node) {
-            g_source_remove_poll(&ctx->source, &node->pfd);
-
-            /* If the lock is held, just mark the node as deleted */
-            if (ctx->walking_handlers) {
-                node->deleted = 1;
-                node->pfd.revents = 0;
-            } else {
-                /* Otherwise, delete it for real.  We can't just mark it as
-                 * deleted because deleted nodes are only cleaned up after
-                 * releasing the walking_handlers lock.
-                 */
-                QLIST_REMOVE(node, node);
-                deleted = true;
-            }
+        if (node == NULL) {
+            return;
+        }
+
+        g_source_remove_poll(&ctx->source, &node->pfd);
+
+        /* If the lock is held, just mark the node as deleted */
+        if (ctx->walking_handlers) {
+            node->deleted = 1;
+            node->pfd.revents = 0;
+        } else {
+            /* Otherwise, delete it for real.  We can't just mark it as
+             * deleted because deleted nodes are only cleaned up after
+             * releasing the walking_handlers lock.
+             */
+            QLIST_REMOVE(node, node);
+            deleted = true;
         }
     } else {
         if (node == NULL) {
@@ -431,11 +426,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
     assert(npfd == 0);
 
     /* fill pollfds */
-    QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        if (!node->deleted && node->pfd.events
-            && !aio_epoll_enabled(ctx)
-            && aio_node_check(ctx, node->is_external)) {
-            add_pollfd(node);
+
+    if (!aio_epoll_enabled(ctx)) {
+        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+            if (!node->deleted && node->pfd.events
+                && aio_node_check(ctx, node->is_external)) {
+                add_pollfd(node);
+            }
         }
     }
 
diff --git a/arch_init.c b/arch_init.c
index 09bdbd1f5e..8150a9a78a 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -243,25 +243,6 @@ void audio_init(void)
     }
 }
 
-int qemu_uuid_parse(const char *str, uint8_t *uuid)
-{
-    int ret;
-
-    if (strlen(str) != 36) {
-        return -1;
-    }
-
-    ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
-                 &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
-                 &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
-                 &uuid[15]);
-
-    if (ret != 16) {
-        return -1;
-    }
-    return 0;
-}
-
 void do_acpitable_option(const QemuOpts *opts)
 {
 #ifdef TARGET_I386
diff --git a/async.c b/async.c
index 3bca9b0adb..b2de360c23 100644
--- a/async.c
+++ b/async.c
@@ -44,6 +44,26 @@ struct QEMUBH {
     bool deleted;
 };
 
+void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh;
+    bh = g_new(QEMUBH, 1);
+    *bh = (QEMUBH){
+        .ctx = ctx,
+        .cb = cb,
+        .opaque = opaque,
+    };
+    qemu_mutex_lock(&ctx->bh_lock);
+    bh->next = ctx->first_bh;
+    bh->scheduled = 1;
+    bh->deleted = 1;
+    /* Make sure that the members are ready before putting bh into list */
+    smp_wmb();
+    ctx->first_bh = bh;
+    qemu_mutex_unlock(&ctx->bh_lock);
+    aio_notify(ctx);
+}
+
 QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
 {
     QEMUBH *bh;
@@ -86,9 +106,9 @@ int aio_bh_poll(AioContext *ctx)
          * thread sees the zero before bh->cb has run, and thus will call
          * aio_notify again if necessary.
          */
-        if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
-            /* Idle BHs and the notify BH don't count as progress */
-            if (!bh->idle && bh != ctx->notify_dummy_bh) {
+        if (atomic_xchg(&bh->scheduled, 0)) {
+            /* Idle BHs don't count as progress */
+            if (!bh->idle) {
                 ret = 1;
             }
             bh->idle = 0;
@@ -104,7 +124,7 @@ int aio_bh_poll(AioContext *ctx)
         bhp = &ctx->first_bh;
         while (*bhp) {
             bh = *bhp;
-            if (bh->deleted) {
+            if (bh->deleted && !bh->scheduled) {
                 *bhp = bh->next;
                 g_free(bh);
             } else {
@@ -168,7 +188,7 @@ aio_compute_timeout(AioContext *ctx)
     QEMUBH *bh;
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (bh->scheduled) {
             if (bh->idle) {
                 /* idle bottom halves will be polled at least
                  * every 10ms */
@@ -216,7 +236,7 @@ aio_ctx_check(GSource *source)
     aio_notify_accept(ctx);
 
     for (bh = ctx->first_bh; bh; bh = bh->next) {
-        if (!bh->deleted && bh->scheduled) {
+        if (bh->scheduled) {
             return true;
         }
     }
@@ -240,7 +260,6 @@ aio_ctx_finalize(GSource     *source)
 {
     AioContext *ctx = (AioContext *) source;
 
-    qemu_bh_delete(ctx->notify_dummy_bh);
     thread_pool_free(ctx->thread_pool);
 
 #ifdef CONFIG_LINUX_AIO
@@ -265,7 +284,7 @@ aio_ctx_finalize(GSource     *source)
 
     aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
     event_notifier_cleanup(&ctx->notifier);
-    rfifolock_destroy(&ctx->lock);
+    qemu_rec_mutex_destroy(&ctx->lock);
     qemu_mutex_destroy(&ctx->bh_lock);
     timerlistgroup_deinit(&ctx->tlg);
 }
@@ -326,19 +345,6 @@ static void aio_timerlist_notify(void *opaque)
     aio_notify(opaque);
 }
 
-static void aio_rfifolock_cb(void *opaque)
-{
-    AioContext *ctx = opaque;
-
-    /* Kick owner thread in case they are blocked in aio_poll() */
-    qemu_bh_schedule(ctx->notify_dummy_bh);
-}
-
-static void notify_dummy_bh(void *opaque)
-{
-    /* Do nothing, we were invoked just to force the event loop to iterate */
-}
-
 static void event_notifier_dummy_cb(EventNotifier *e)
 {
 }
@@ -366,11 +372,9 @@ AioContext *aio_context_new(Error **errp)
 #endif
     ctx->thread_pool = NULL;
     qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
+    qemu_rec_mutex_init(&ctx->lock);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
-    ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
-
     return ctx;
 fail:
     g_source_destroy(&ctx->source);
@@ -389,10 +393,10 @@ void aio_context_unref(AioContext *ctx)
 
 void aio_context_acquire(AioContext *ctx)
 {
-    rfifolock_lock(&ctx->lock);
+    qemu_rec_mutex_lock(&ctx->lock);
 }
 
 void aio_context_release(AioContext *ctx)
 {
-    rfifolock_unlock(&ctx->lock);
+    qemu_rec_mutex_unlock(&ctx->lock);
 }
diff --git a/atomic_template.h b/atomic_template.h
new file mode 100644
index 0000000000..b400b2a3d3
--- /dev/null
+++ b/atomic_template.h
@@ -0,0 +1,215 @@
+/*
+ * Atomic helper templates
+ * Included from tcg-runtime.c and cputlb.c.
+ *
+ * Copyright (c) 2016 Red Hat, Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if DATA_SIZE == 16
+# define SUFFIX     o
+# define DATA_TYPE  Int128
+# define BSWAP      bswap128
+#elif DATA_SIZE == 8
+# define SUFFIX     q
+# define DATA_TYPE  uint64_t
+# define BSWAP      bswap64
+#elif DATA_SIZE == 4
+# define SUFFIX     l
+# define DATA_TYPE  uint32_t
+# define BSWAP      bswap32
+#elif DATA_SIZE == 2
+# define SUFFIX     w
+# define DATA_TYPE  uint16_t
+# define BSWAP      bswap16
+#elif DATA_SIZE == 1
+# define SUFFIX     b
+# define DATA_TYPE  uint8_t
+# define BSWAP
+#else
+# error unsupported data size
+#endif
+
+#if DATA_SIZE >= 4
+# define ABI_TYPE  DATA_TYPE
+#else
+# define ABI_TYPE  uint32_t
+#endif
+
+/* Define host-endian atomic operations.  Note that END is used within
+   the ATOMIC_NAME macro, and redefined below.  */
+#if DATA_SIZE == 1
+# define END
+#elif defined(HOST_WORDS_BIGENDIAN)
+# define END  _be
+#else
+# define END  _le
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return atomic_xchg__nocheck(haddr, val);
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return atomic_##X(haddr, val);                                  \
+}                                                                   \
+
+GEN_ATOMIC_HELPER(fetch_add)
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(add_fetch)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+#endif /* DATA SIZE >= 16 */
+
+#undef END
+
+#if DATA_SIZE > 1
+
+/* Define reverse-host-endian atomic operations.  Note that END is used
+   within the ATOMIC_NAME macro.  */
+#ifdef HOST_WORDS_BIGENDIAN
+# define END  _le
+#else
+# define END  _be
+#endif
+
+ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
+                              ABI_TYPE cmpv, ABI_TYPE newv EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv)));
+}
+
+#if DATA_SIZE >= 16
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS)
+{
+    DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP;
+    __atomic_load(haddr, &val, __ATOMIC_RELAXED);
+    return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr,
+                     ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    val = BSWAP(val);
+    __atomic_store(haddr, &val, __ATOMIC_RELAXED);
+}
+#else
+ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr,
+                           ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    return BSWAP(atomic_xchg__nocheck(haddr, BSWAP(val)));
+}
+
+#define GEN_ATOMIC_HELPER(X)                                        \
+ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
+                 ABI_TYPE val EXTRA_ARGS)                           \
+{                                                                   \
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;                           \
+    return BSWAP(atomic_##X(haddr, BSWAP(val)));                    \
+}
+
+GEN_ATOMIC_HELPER(fetch_and)
+GEN_ATOMIC_HELPER(fetch_or)
+GEN_ATOMIC_HELPER(fetch_xor)
+GEN_ATOMIC_HELPER(and_fetch)
+GEN_ATOMIC_HELPER(or_fetch)
+GEN_ATOMIC_HELPER(xor_fetch)
+
+#undef GEN_ATOMIC_HELPER
+
+/* Note that for addition, we need to use a separate cmpxchg loop instead
+   of bswaps for the reverse-host-endian helpers.  */
+ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo);
+        sto = BSWAP(ret + val);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+
+ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr,
+                         ABI_TYPE val EXTRA_ARGS)
+{
+    DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP;
+    DATA_TYPE ldo, ldn, ret, sto;
+
+    ldo = atomic_read__nocheck(haddr);
+    while (1) {
+        ret = BSWAP(ldo) + val;
+        sto = BSWAP(ret);
+        ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto);
+        if (ldn == ldo) {
+            return ret;
+        }
+        ldo = ldn;
+    }
+}
+#endif /* DATA_SIZE >= 16 */
+
+#undef END
+#endif /* DATA_SIZE > 1 */
+
+#undef BSWAP
+#undef ABI_TYPE
+#undef DATA_TYPE
+#undef SUFFIX
+#undef DATA_SIZE
diff --git a/backends/Makefile.objs b/backends/Makefile.objs
index 31a3a894f5..18469980e6 100644
--- a/backends/Makefile.objs
+++ b/backends/Makefile.objs
@@ -9,3 +9,6 @@ common-obj-$(CONFIG_TPM) += tpm.o
 
 common-obj-y += hostmem.o hostmem-ram.o
 common-obj-$(CONFIG_LINUX) += hostmem-file.o
+
+common-obj-y += cryptodev.o
+common-obj-y += cryptodev-builtin.o
diff --git a/backends/baum.c b/backends/baum.c
index c537141b22..b92369d840 100644
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -1,7 +1,7 @@
 /*
  * QEMU Baum Braille Device
  *
- * Copyright (c) 2008 Samuel Thibault
+ * Copyright (c) 2008, 2010-2011, 2016 Samuel Thibault
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -92,6 +92,7 @@ typedef struct {
     brlapi_handle_t *brlapi;
     int brlapi_fd;
     unsigned int x, y;
+    bool deferred_init;
 
     uint8_t in_buf[BUF_SIZE];
     uint8_t in_buf_used;
@@ -102,8 +103,11 @@ typedef struct {
 } BaumDriverState;
 
 /* Let's assume NABCC by default */
-static const uint8_t nabcc_translation[256] = {
-    [0] = ' ',
+enum way {
+    DOTS2ASCII,
+    ASCII2DOTS
+};
+static const uint8_t nabcc_translation[2][256] = {
 #ifndef BRLAPI_DOTS
 #define BRLAPI_DOTS(d1,d2,d3,d4,d5,d6,d7,d8) \
     ((d1?BRLAPI_DOT1:0)|\
@@ -115,107 +119,154 @@ static const uint8_t nabcc_translation[256] = {
      (d7?BRLAPI_DOT7:0)|\
      (d8?BRLAPI_DOT8:0))
 #endif
-    [BRLAPI_DOTS(1,0,0,0,0,0,0,0)] = 'a',
-    [BRLAPI_DOTS(1,1,0,0,0,0,0,0)] = 'b',
-    [BRLAPI_DOTS(1,0,0,1,0,0,0,0)] = 'c',
-    [BRLAPI_DOTS(1,0,0,1,1,0,0,0)] = 'd',
-    [BRLAPI_DOTS(1,0,0,0,1,0,0,0)] = 'e',
-    [BRLAPI_DOTS(1,1,0,1,0,0,0,0)] = 'f',
-    [BRLAPI_DOTS(1,1,0,1,1,0,0,0)] = 'g',
-    [BRLAPI_DOTS(1,1,0,0,1,0,0,0)] = 'h',
-    [BRLAPI_DOTS(0,1,0,1,0,0,0,0)] = 'i',
-    [BRLAPI_DOTS(0,1,0,1,1,0,0,0)] = 'j',
-    [BRLAPI_DOTS(1,0,1,0,0,0,0,0)] = 'k',
-    [BRLAPI_DOTS(1,1,1,0,0,0,0,0)] = 'l',
-    [BRLAPI_DOTS(1,0,1,1,0,0,0,0)] = 'm',
-    [BRLAPI_DOTS(1,0,1,1,1,0,0,0)] = 'n',
-    [BRLAPI_DOTS(1,0,1,0,1,0,0,0)] = 'o',
-    [BRLAPI_DOTS(1,1,1,1,0,0,0,0)] = 'p',
-    [BRLAPI_DOTS(1,1,1,1,1,0,0,0)] = 'q',
-    [BRLAPI_DOTS(1,1,1,0,1,0,0,0)] = 'r',
-    [BRLAPI_DOTS(0,1,1,1,0,0,0,0)] = 's',
-    [BRLAPI_DOTS(0,1,1,1,1,0,0,0)] = 't',
-    [BRLAPI_DOTS(1,0,1,0,0,1,0,0)] = 'u',
-    [BRLAPI_DOTS(1,1,1,0,0,1,0,0)] = 'v',
-    [BRLAPI_DOTS(0,1,0,1,1,1,0,0)] = 'w',
-    [BRLAPI_DOTS(1,0,1,1,0,1,0,0)] = 'x',
-    [BRLAPI_DOTS(1,0,1,1,1,1,0,0)] = 'y',
-    [BRLAPI_DOTS(1,0,1,0,1,1,0,0)] = 'z',
-
-    [BRLAPI_DOTS(1,0,0,0,0,0,1,0)] = 'A',
-    [BRLAPI_DOTS(1,1,0,0,0,0,1,0)] = 'B',
-    [BRLAPI_DOTS(1,0,0,1,0,0,1,0)] = 'C',
-    [BRLAPI_DOTS(1,0,0,1,1,0,1,0)] = 'D',
-    [BRLAPI_DOTS(1,0,0,0,1,0,1,0)] = 'E',
-    [BRLAPI_DOTS(1,1,0,1,0,0,1,0)] = 'F',
-    [BRLAPI_DOTS(1,1,0,1,1,0,1,0)] = 'G',
-    [BRLAPI_DOTS(1,1,0,0,1,0,1,0)] = 'H',
-    [BRLAPI_DOTS(0,1,0,1,0,0,1,0)] = 'I',
-    [BRLAPI_DOTS(0,1,0,1,1,0,1,0)] = 'J',
-    [BRLAPI_DOTS(1,0,1,0,0,0,1,0)] = 'K',
-    [BRLAPI_DOTS(1,1,1,0,0,0,1,0)] = 'L',
-    [BRLAPI_DOTS(1,0,1,1,0,0,1,0)] = 'M',
-    [BRLAPI_DOTS(1,0,1,1,1,0,1,0)] = 'N',
-    [BRLAPI_DOTS(1,0,1,0,1,0,1,0)] = 'O',
-    [BRLAPI_DOTS(1,1,1,1,0,0,1,0)] = 'P',
-    [BRLAPI_DOTS(1,1,1,1,1,0,1,0)] = 'Q',
-    [BRLAPI_DOTS(1,1,1,0,1,0,1,0)] = 'R',
-    [BRLAPI_DOTS(0,1,1,1,0,0,1,0)] = 'S',
-    [BRLAPI_DOTS(0,1,1,1,1,0,1,0)] = 'T',
-    [BRLAPI_DOTS(1,0,1,0,0,1,1,0)] = 'U',
-    [BRLAPI_DOTS(1,1,1,0,0,1,1,0)] = 'V',
-    [BRLAPI_DOTS(0,1,0,1,1,1,1,0)] = 'W',
-    [BRLAPI_DOTS(1,0,1,1,0,1,1,0)] = 'X',
-    [BRLAPI_DOTS(1,0,1,1,1,1,1,0)] = 'Y',
-    [BRLAPI_DOTS(1,0,1,0,1,1,1,0)] = 'Z',
-
-    [BRLAPI_DOTS(0,0,1,0,1,1,0,0)] = '0',
-    [BRLAPI_DOTS(0,1,0,0,0,0,0,0)] = '1',
-    [BRLAPI_DOTS(0,1,1,0,0,0,0,0)] = '2',
-    [BRLAPI_DOTS(0,1,0,0,1,0,0,0)] = '3',
-    [BRLAPI_DOTS(0,1,0,0,1,1,0,0)] = '4',
-    [BRLAPI_DOTS(0,1,0,0,0,1,0,0)] = '5',
-    [BRLAPI_DOTS(0,1,1,0,1,0,0,0)] = '6',
-    [BRLAPI_DOTS(0,1,1,0,1,1,0,0)] = '7',
-    [BRLAPI_DOTS(0,1,1,0,0,1,0,0)] = '8',
-    [BRLAPI_DOTS(0,0,1,0,1,0,0,0)] = '9',
-
-    [BRLAPI_DOTS(0,0,0,1,0,1,0,0)] = '.',
-    [BRLAPI_DOTS(0,0,1,1,0,1,0,0)] = '+',
-    [BRLAPI_DOTS(0,0,1,0,0,1,0,0)] = '-',
-    [BRLAPI_DOTS(1,0,0,0,0,1,0,0)] = '*',
-    [BRLAPI_DOTS(0,0,1,1,0,0,0,0)] = '/',
-    [BRLAPI_DOTS(1,1,1,0,1,1,0,0)] = '(',
-    [BRLAPI_DOTS(0,1,1,1,1,1,0,0)] = ')',
-
-    [BRLAPI_DOTS(1,1,1,1,0,1,0,0)] = '&',
-    [BRLAPI_DOTS(0,0,1,1,1,1,0,0)] = '#',
-
-    [BRLAPI_DOTS(0,0,0,0,0,1,0,0)] = ',',
-    [BRLAPI_DOTS(0,0,0,0,1,1,0,0)] = ';',
-    [BRLAPI_DOTS(1,0,0,0,1,1,0,0)] = ':',
-    [BRLAPI_DOTS(0,1,1,1,0,1,0,0)] = '!',
-    [BRLAPI_DOTS(1,0,0,1,1,1,0,0)] = '?',
-    [BRLAPI_DOTS(0,0,0,0,1,0,0,0)] = '"',
-    [BRLAPI_DOTS(0,0,1,0,0,0,0,0)] ='\'',
-    [BRLAPI_DOTS(0,0,0,1,0,0,0,0)] = '`',
-    [BRLAPI_DOTS(0,0,0,1,1,0,1,0)] = '^',
-    [BRLAPI_DOTS(0,0,0,1,1,0,0,0)] = '~',
-    [BRLAPI_DOTS(0,1,0,1,0,1,1,0)] = '[',
-    [BRLAPI_DOTS(1,1,0,1,1,1,1,0)] = ']',
-    [BRLAPI_DOTS(0,1,0,1,0,1,0,0)] = '{',
-    [BRLAPI_DOTS(1,1,0,1,1,1,0,0)] = '}',
-    [BRLAPI_DOTS(1,1,1,1,1,1,0,0)] = '=',
-    [BRLAPI_DOTS(1,1,0,0,0,1,0,0)] = '<',
-    [BRLAPI_DOTS(0,0,1,1,1,0,0,0)] = '>',
-    [BRLAPI_DOTS(1,1,0,1,0,1,0,0)] = '$',
-    [BRLAPI_DOTS(1,0,0,1,0,1,0,0)] = '%',
-    [BRLAPI_DOTS(0,0,0,1,0,0,1,0)] = '@',
-    [BRLAPI_DOTS(1,1,0,0,1,1,0,0)] = '|',
-    [BRLAPI_DOTS(1,1,0,0,1,1,1,0)] ='\\',
-    [BRLAPI_DOTS(0,0,0,1,1,1,0,0)] = '_',
+#define DO(dots, ascii) \
+    [DOTS2ASCII][dots] = ascii, \
+    [ASCII2DOTS][ascii] = dots
+    DO(0, ' '),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 0, 0), 'a'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 0, 0), 'b'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 0, 0), 'c'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 0, 0), 'd'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 0, 0), 'e'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 0, 0), 'f'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 0, 0), 'g'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 0, 0), 'h'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 0, 0), 'i'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 0, 0), 'j'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 0, 0), 'k'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 0, 0), 'l'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 0, 0), 'm'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 0, 0), 'n'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 0, 0), 'o'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 0, 0), 'p'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 0, 0), 'q'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 0, 0), 'r'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 0, 0), 's'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 0, 0), 't'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 0, 0), 'u'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 0, 0), 'v'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 0, 0), 'w'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 0, 0), 'x'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 0, 0), 'y'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 0, 0), 'z'),
+
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 0, 1, 0), 'A'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 0, 1, 0), 'B'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 0, 1, 0), 'C'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 0, 1, 0), 'D'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 0, 1, 0), 'E'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 0, 1, 0), 'F'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 0, 1, 0), 'G'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 0, 1, 0), 'H'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 0, 1, 0), 'I'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 0, 1, 0), 'J'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 0, 1, 0), 'K'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 0, 1, 0), 'L'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 0, 1, 0), 'M'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 0, 1, 0), 'N'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 0, 1, 0), 'O'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 0, 1, 0), 'P'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 0, 1, 0), 'Q'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 0, 1, 0), 'R'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 0, 1, 0), 'S'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 0, 1, 0), 'T'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 0, 1, 1, 0), 'U'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 0, 1, 1, 0), 'V'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 1, 1, 1, 0), 'W'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 0, 1, 1, 0), 'X'),
+    DO(BRLAPI_DOTS(1, 0, 1, 1, 1, 1, 1, 0), 'Y'),
+    DO(BRLAPI_DOTS(1, 0, 1, 0, 1, 1, 1, 0), 'Z'),
+
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 1, 0, 0), '0'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 0, 0, 0), '1'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 0, 0, 0), '2'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 0, 0, 0), '3'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 1, 1, 0, 0), '4'),
+    DO(BRLAPI_DOTS(0, 1, 0, 0, 0, 1, 0, 0), '5'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 0, 0, 0), '6'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 1, 1, 0, 0), '7'),
+    DO(BRLAPI_DOTS(0, 1, 1, 0, 0, 1, 0, 0), '8'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 1, 0, 0, 0), '9'),
+
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 1, 0, 0), '.'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 1, 0, 0), '+'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 1, 0, 0), '-'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 0, 1, 0, 0), '*'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 0, 0, 0, 0), '/'),
+    DO(BRLAPI_DOTS(1, 1, 1, 0, 1, 1, 0, 0), '('),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 1, 1, 0, 0), ')'),
+
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 0, 1, 0, 0), '&'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 1, 0, 0), '#'),
+
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 0, 1, 0, 0), ','),
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 1, 0, 0), ';'),
+    DO(BRLAPI_DOTS(1, 0, 0, 0, 1, 1, 0, 0), ':'),
+    DO(BRLAPI_DOTS(0, 1, 1, 1, 0, 1, 0, 0), '!'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 1, 1, 0, 0), '?'),
+    DO(BRLAPI_DOTS(0, 0, 0, 0, 1, 0, 0, 0), '"'),
+    DO(BRLAPI_DOTS(0, 0, 1, 0, 0, 0, 0, 0), '\''),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 0, 0), '`'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 1, 0), '^'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 0, 0, 0), '~'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 1, 0), '['),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 1, 0), ']'),
+    DO(BRLAPI_DOTS(0, 1, 0, 1, 0, 1, 0, 0), '{'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 1, 1, 0, 0), '}'),
+    DO(BRLAPI_DOTS(1, 1, 1, 1, 1, 1, 0, 0), '='),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 0, 1, 0, 0), '<'),
+    DO(BRLAPI_DOTS(0, 0, 1, 1, 1, 0, 0, 0), '>'),
+    DO(BRLAPI_DOTS(1, 1, 0, 1, 0, 1, 0, 0), '$'),
+    DO(BRLAPI_DOTS(1, 0, 0, 1, 0, 1, 0, 0), '%'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 0, 0, 1, 0), '@'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 0, 0), '|'),
+    DO(BRLAPI_DOTS(1, 1, 0, 0, 1, 1, 1, 0), '\\'),
+    DO(BRLAPI_DOTS(0, 0, 0, 1, 1, 1, 0, 0), '_'),
 };
 
+/* The guest OS has started discussing with us, finish initializing BrlAPI */
+static int baum_deferred_init(BaumDriverState *baum)
+{
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    SDL_SysWMinfo info;
+#endif
+#endif
+    int tty;
+
+    if (baum->deferred_init) {
+        return 1;
+    }
+
+    if (brlapi__getDisplaySize(baum->brlapi, &baum->x, &baum->y) == -1) {
+        brlapi_perror("baum: brlapi__getDisplaySize");
+        return 0;
+    }
+
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    memset(&info, 0, sizeof(info));
+    SDL_VERSION(&info.version);
+    if (SDL_GetWMInfo(&info)) {
+        tty = info.info.x11.wmwindow;
+    } else {
+#endif
+#endif
+        tty = BRLAPI_TTY_DEFAULT;
+#if defined(CONFIG_SDL)
+#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
+    }
+#endif
+#endif
+
+    if (brlapi__enterTtyMode(baum->brlapi, tty, NULL) == -1) {
+        brlapi_perror("baum: brlapi__enterTtyMode");
+        return 0;
+    }
+    baum->deferred_init = 1;
+    return 1;
+}
+
 /* The serial port can receive more of our data */
 static void baum_accept_input(struct CharDriverState *chr)
 {
@@ -346,8 +397,10 @@ static int baum_eat_packet(BaumDriverState *baum, const uint8_t *buf, int len)
                 cursor = i + 1;
                 c &= ~(BRLAPI_DOT7|BRLAPI_DOT8);
             }
-            if (!(c = nabcc_translation[c]))
+            c = nabcc_translation[DOTS2ASCII][c];
+            if (!c) {
                 c = '?';
+            }
             text[i] = c;
         }
         timer_del(baum->cellCount_timer);
@@ -440,6 +493,8 @@ static int baum_write(CharDriverState *chr, const uint8_t *buf, int len)
         return 0;
     if (!baum->brlapi)
         return len;
+    if (!baum_deferred_init(baum))
+        return len;
 
     while (len) {
         /* Complete our buffer as much as possible */
@@ -476,6 +531,13 @@ static void baum_send_key(BaumDriverState *baum, uint8_t type, uint8_t value) {
     baum_write_packet(baum, packet, sizeof(packet));
 }
 
+static void baum_send_key2(BaumDriverState *baum, uint8_t type, uint8_t value,
+                           uint8_t value2) {
+    uint8_t packet[] = { type, value, value2 };
+    DPRINTF("writing key %x %x\n", type, value);
+    baum_write_packet(baum, packet, sizeof(packet));
+}
+
 /* We got some data on the BrlAPI socket */
 static void baum_chr_read(void *opaque)
 {
@@ -484,6 +546,8 @@ static void baum_chr_read(void *opaque)
     int ret;
     if (!baum->brlapi)
         return;
+    if (!baum_deferred_init(baum))
+        return;
     while ((ret = brlapi__readKey(baum->brlapi, 0, &code)) == 1) {
         DPRINTF("got key %"BRLAPI_PRIxKEYCODE"\n", code);
         /* Emulate */
@@ -540,7 +604,17 @@ static void baum_chr_read(void *opaque)
             }
             break;
         case BRLAPI_KEY_TYPE_SYM:
-            break;
+            {
+                brlapi_keyCode_t keysym = code & BRLAPI_KEY_CODE_MASK;
+                if (keysym < 0x100) {
+                    uint8_t dots = nabcc_translation[ASCII2DOTS][keysym];
+                    if (dots) {
+                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, dots);
+                        baum_send_key2(baum, BAUM_RSP_EntryKeys, 0, 0);
+                    }
+                }
+                break;
+            }
         }
     }
     if (ret == -1 && (brlapi_errno != BRLAPI_ERROR_LIBCERR || errno != EINTR)) {
@@ -551,7 +625,7 @@ static void baum_chr_read(void *opaque)
     }
 }
 
-static void baum_close(struct CharDriverState *chr)
+static void baum_free(struct CharDriverState *chr)
 {
     BaumDriverState *baum = chr->opaque;
 
@@ -566,18 +640,13 @@ static void baum_close(struct CharDriverState *chr)
 static CharDriverState *chr_baum_init(const char *id,
                                       ChardevBackend *backend,
                                       ChardevReturn *ret,
+                                      bool *be_opened,
                                       Error **errp)
 {
     ChardevCommon *common = backend->u.braille.data;
     BaumDriverState *baum;
     CharDriverState *chr;
     brlapi_handle_t *handle;
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
-    SDL_SysWMinfo info;
-#endif
-#endif
-    int tty;
 
     chr = qemu_chr_alloc(common, errp);
     if (!chr) {
@@ -589,7 +658,7 @@ static CharDriverState *chr_baum_init(const char *id,
     chr->opaque = baum;
     chr->chr_write = baum_write;
     chr->chr_accept_input = baum_accept_input;
-    chr->chr_close = baum_close;
+    chr->chr_free = baum_free;
 
     handle = g_malloc0(brlapi_getHandleSize());
     baum->brlapi = handle;
@@ -600,39 +669,14 @@ static CharDriverState *chr_baum_init(const char *id,
                    brlapi_strerror(brlapi_error_location()));
         goto fail_handle;
     }
+    baum->deferred_init = 0;
 
     baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);
 
-    if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
-        error_setg(errp, "brlapi__getDisplaySize: %s",
-                   brlapi_strerror(brlapi_error_location()));
-        goto fail;
-    }
-
-#if defined(CONFIG_SDL)
-#if SDL_COMPILEDVERSION < SDL_VERSIONNUM(2, 0, 0)
-    memset(&info, 0, sizeof(info));
-    SDL_VERSION(&info.version);
-    if (SDL_GetWMInfo(&info))
-        tty = info.info.x11.wmwindow;
-    else
-#endif
-#endif
-        tty = BRLAPI_TTY_DEFAULT;
-
-    if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
-        error_setg(errp, "brlapi__enterTtyMode: %s",
-                   brlapi_strerror(brlapi_error_location()));
-        goto fail;
-    }
-
     qemu_set_fd_handler(baum->brlapi_fd, baum_chr_read, NULL, baum);
 
     return chr;
 
-fail:
-    timer_free(baum->cellCount_timer);
-    brlapi__closeConnection(handle);
 fail_handle:
     g_free(handle);
     g_free(chr);
diff --git a/backends/cryptodev-builtin.c b/backends/cryptodev-builtin.c
new file mode 100644
index 0000000000..eda954b2a2
--- /dev/null
+++ b/backends/cryptodev-builtin.c
@@ -0,0 +1,361 @@
+/*
+ * QEMU Cryptodev backend for QEMU cipher APIs
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/cryptodev.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "standard-headers/linux/virtio_crypto.h"
+#include "crypto/cipher.h"
+
+
+/**
+ * @TYPE_CRYPTODEV_BACKEND_BUILTIN:
+ * name of backend that uses QEMU cipher API
+ */
+#define TYPE_CRYPTODEV_BACKEND_BUILTIN "cryptodev-backend-builtin"
+
+#define CRYPTODEV_BACKEND_BUILTIN(obj) \
+    OBJECT_CHECK(CryptoDevBackendBuiltin, \
+                 (obj), TYPE_CRYPTODEV_BACKEND_BUILTIN)
+
+typedef struct CryptoDevBackendBuiltin
+                         CryptoDevBackendBuiltin;
+
+typedef struct CryptoDevBackendBuiltinSession {
+    QCryptoCipher *cipher;
+    uint8_t direction; /* encryption or decryption */
+    uint8_t type; /* cipher? hash? aead? */
+    QTAILQ_ENTRY(CryptoDevBackendBuiltinSession) next;
+} CryptoDevBackendBuiltinSession;
+
+/* Max number of symmetric sessions */
+#define MAX_NUM_SESSIONS 256
+
+#define CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN    512
+#define CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN  64
+
+struct CryptoDevBackendBuiltin {
+    CryptoDevBackend parent_obj;
+
+    CryptoDevBackendBuiltinSession *sessions[MAX_NUM_SESSIONS];
+};
+
+static void cryptodev_builtin_init(
+             CryptoDevBackend *backend, Error **errp)
+{
+    /* Only support one queue */
+    int queues = backend->conf.peers.queues;
+    CryptoDevBackendClient *cc;
+
+    if (queues != 1) {
+        error_setg(errp,
+                  "Only support one queue in cryptdov-builtin backend");
+        return;
+    }
+
+    cc = cryptodev_backend_new_client(
+              "cryptodev-builtin", NULL);
+    cc->info_str = g_strdup_printf("cryptodev-builtin0");
+    cc->queue_index = 0;
+    backend->conf.peers.ccs[0] = cc;
+
+    backend->conf.crypto_services =
+                         1u << VIRTIO_CRYPTO_SERVICE_CIPHER |
+                         1u << VIRTIO_CRYPTO_SERVICE_HASH |
+                         1u << VIRTIO_CRYPTO_SERVICE_MAC;
+    backend->conf.cipher_algo_l = 1u << VIRTIO_CRYPTO_CIPHER_AES_CBC;
+    backend->conf.hash_algo = 1u << VIRTIO_CRYPTO_HASH_SHA1;
+    /*
+     * Set the Maximum length of crypto request.
+     * Why this value? Just avoid to overflow when
+     * memory allocation for each crypto request.
+     */
+    backend->conf.max_size = LONG_MAX - sizeof(CryptoDevBackendSymOpInfo);
+    backend->conf.max_cipher_key_len = CRYPTODEV_BUITLIN_MAX_CIPHER_KEY_LEN;
+    backend->conf.max_auth_key_len = CRYPTODEV_BUITLIN_MAX_AUTH_KEY_LEN;
+}
+
+static int
+cryptodev_builtin_get_unused_session_index(
+                 CryptoDevBackendBuiltin *builtin)
+{
+    size_t i;
+
+    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
+        if (builtin->sessions[i] == NULL) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+static int
+cryptodev_builtin_get_aes_algo(uint32_t key_len, Error **errp)
+{
+    int algo;
+
+    if (key_len == 128 / 8) {
+        algo = QCRYPTO_CIPHER_ALG_AES_128;
+    } else if (key_len == 192 / 8) {
+        algo = QCRYPTO_CIPHER_ALG_AES_192;
+    } else if (key_len == 256 / 8) {
+        algo = QCRYPTO_CIPHER_ALG_AES_256;
+    } else {
+        error_setg(errp, "Unsupported key length :%u", key_len);
+        return -1;
+    }
+
+    return algo;
+}
+
+static int cryptodev_builtin_create_cipher_session(
+                    CryptoDevBackendBuiltin *builtin,
+                    CryptoDevBackendSymSessionInfo *sess_info,
+                    Error **errp)
+{
+    int algo;
+    int mode;
+    QCryptoCipher *cipher;
+    int index;
+    CryptoDevBackendBuiltinSession *sess;
+
+    if (sess_info->op_type != VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        error_setg(errp, "Unsupported optype :%u", sess_info->op_type);
+        return -1;
+    }
+
+    index = cryptodev_builtin_get_unused_session_index(builtin);
+    if (index < 0) {
+        error_setg(errp, "Total number of sessions created exceeds %u",
+                  MAX_NUM_SESSIONS);
+        return -1;
+    }
+
+    switch (sess_info->cipher_alg) {
+    case VIRTIO_CRYPTO_CIPHER_AES_ECB:
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                          errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        mode = QCRYPTO_CIPHER_MODE_ECB;
+        break;
+    case VIRTIO_CRYPTO_CIPHER_AES_CBC:
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                          errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        mode = QCRYPTO_CIPHER_MODE_CBC;
+        break;
+    case VIRTIO_CRYPTO_CIPHER_AES_CTR:
+        algo = cryptodev_builtin_get_aes_algo(sess_info->key_len,
+                                                          errp);
+        if (algo < 0)  {
+            return -1;
+        }
+        mode = QCRYPTO_CIPHER_MODE_CTR;
+        break;
+    case VIRTIO_CRYPTO_CIPHER_DES_ECB:
+        algo = QCRYPTO_CIPHER_ALG_DES_RFB;
+        mode = QCRYPTO_CIPHER_MODE_ECB;
+        break;
+    default:
+        error_setg(errp, "Unsupported cipher alg :%u",
+                   sess_info->cipher_alg);
+        return -1;
+    }
+
+    cipher = qcrypto_cipher_new(algo, mode,
+                               sess_info->cipher_key,
+                               sess_info->key_len,
+                               errp);
+    if (!cipher) {
+        return -1;
+    }
+
+    sess = g_new0(CryptoDevBackendBuiltinSession, 1);
+    sess->cipher = cipher;
+    sess->direction = sess_info->direction;
+    sess->type = sess_info->op_type;
+
+    builtin->sessions[index] = sess;
+
+    return index;
+}
+
+static int64_t cryptodev_builtin_sym_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSymSessionInfo *sess_info,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    int64_t session_id = -1;
+    int ret;
+
+    switch (sess_info->op_code) {
+    case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+        ret = cryptodev_builtin_create_cipher_session(
+                           builtin, sess_info, errp);
+        if (ret < 0) {
+            return ret;
+        } else {
+            session_id = ret;
+        }
+        break;
+    case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
+    case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
+    default:
+        error_setg(errp, "Unsupported opcode :%" PRIu32 "",
+                   sess_info->op_code);
+        return -1;
+    }
+
+    return session_id;
+}
+
+static int cryptodev_builtin_sym_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+
+    if (session_id >= MAX_NUM_SESSIONS ||
+              builtin->sessions[session_id] == NULL) {
+        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
+                      session_id);
+        return -1;
+    }
+
+    qcrypto_cipher_free(builtin->sessions[session_id]->cipher);
+    g_free(builtin->sessions[session_id]);
+    builtin->sessions[session_id] = NULL;
+    return 0;
+}
+
+static int cryptodev_builtin_sym_operation(
+                 CryptoDevBackend *backend,
+                 CryptoDevBackendSymOpInfo *op_info,
+                 uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    CryptoDevBackendBuiltinSession *sess;
+    int ret;
+
+    if (op_info->session_id >= MAX_NUM_SESSIONS ||
+              builtin->sessions[op_info->session_id] == NULL) {
+        error_setg(errp, "Cannot find a valid session id: %" PRIu64 "",
+                   op_info->session_id);
+        return -VIRTIO_CRYPTO_INVSESS;
+    }
+
+    if (op_info->op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        error_setg(errp,
+               "Algorithm chain is unsupported for cryptdoev-builtin");
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    sess = builtin->sessions[op_info->session_id];
+
+    ret = qcrypto_cipher_setiv(sess->cipher, op_info->iv,
+                               op_info->iv_len, errp);
+    if (ret < 0) {
+        return -VIRTIO_CRYPTO_ERR;
+    }
+
+    if (sess->direction == VIRTIO_CRYPTO_OP_ENCRYPT) {
+        ret = qcrypto_cipher_encrypt(sess->cipher, op_info->src,
+                                     op_info->dst, op_info->src_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
+    } else {
+        ret = qcrypto_cipher_decrypt(sess->cipher, op_info->src,
+                                     op_info->dst, op_info->src_len, errp);
+        if (ret < 0) {
+            return -VIRTIO_CRYPTO_ERR;
+        }
+    }
+    return VIRTIO_CRYPTO_OK;
+}
+
+static void cryptodev_builtin_cleanup(
+             CryptoDevBackend *backend,
+             Error **errp)
+{
+    CryptoDevBackendBuiltin *builtin =
+                      CRYPTODEV_BACKEND_BUILTIN(backend);
+    size_t i;
+    int queues = backend->conf.peers.queues;
+    CryptoDevBackendClient *cc;
+
+    for (i = 0; i < MAX_NUM_SESSIONS; i++) {
+        if (builtin->sessions[i] != NULL) {
+            cryptodev_builtin_sym_close_session(
+                    backend, i, 0, errp);
+        }
+    }
+
+    assert(queues == 1);
+
+    for (i = 0; i < queues; i++) {
+        cc = backend->conf.peers.ccs[i];
+        if (cc) {
+            cryptodev_backend_free_client(cc);
+            backend->conf.peers.ccs[i] = NULL;
+        }
+    }
+}
+
+static void
+cryptodev_builtin_class_init(ObjectClass *oc, void *data)
+{
+    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_CLASS(oc);
+
+    bc->init = cryptodev_builtin_init;
+    bc->cleanup = cryptodev_builtin_cleanup;
+    bc->create_session = cryptodev_builtin_sym_create_session;
+    bc->close_session = cryptodev_builtin_sym_close_session;
+    bc->do_sym_op = cryptodev_builtin_sym_operation;
+}
+
+static const TypeInfo cryptodev_builtin_info = {
+    .name = TYPE_CRYPTODEV_BACKEND_BUILTIN,
+    .parent = TYPE_CRYPTODEV_BACKEND,
+    .class_init = cryptodev_builtin_class_init,
+    .instance_size = sizeof(CryptoDevBackendBuiltin),
+};
+
+static void
+cryptodev_builtin_register_types(void)
+{
+    type_register_static(&cryptodev_builtin_info);
+}
+
+type_init(cryptodev_builtin_register_types);
diff --git a/backends/cryptodev.c b/backends/cryptodev.c
new file mode 100644
index 0000000000..4a49f9762f
--- /dev/null
+++ b/backends/cryptodev.c
@@ -0,0 +1,245 @@
+/*
+ * QEMU Crypto Device Implementation
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/cryptodev.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qapi-types.h"
+#include "qapi-visit.h"
+#include "qemu/config-file.h"
+#include "qom/object_interfaces.h"
+#include "hw/virtio/virtio-crypto.h"
+
+
+static QTAILQ_HEAD(, CryptoDevBackendClient) crypto_clients;
+
+
+CryptoDevBackendClient *
+cryptodev_backend_new_client(const char *model,
+                                    const char *name)
+{
+    CryptoDevBackendClient *cc;
+
+    cc = g_malloc0(sizeof(CryptoDevBackendClient));
+    cc->model = g_strdup(model);
+    if (name) {
+        cc->name = g_strdup(name);
+    }
+
+    QTAILQ_INSERT_TAIL(&crypto_clients, cc, next);
+
+    return cc;
+}
+
+void cryptodev_backend_free_client(
+                  CryptoDevBackendClient *cc)
+{
+    QTAILQ_REMOVE(&crypto_clients, cc, next);
+    g_free(cc->name);
+    g_free(cc->model);
+    g_free(cc->info_str);
+    g_free(cc);
+}
+
+void cryptodev_backend_cleanup(
+             CryptoDevBackend *backend,
+             Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                  CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->cleanup) {
+        bc->cleanup(backend, errp);
+    }
+
+    backend->ready = false;
+}
+
+int64_t cryptodev_backend_sym_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSymSessionInfo *sess_info,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->create_session) {
+        return bc->create_session(backend, sess_info, queue_index, errp);
+    }
+
+    return -1;
+}
+
+int cryptodev_backend_sym_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->close_session) {
+        return bc->close_session(backend, session_id, queue_index, errp);
+    }
+
+    return -1;
+}
+
+static int cryptodev_backend_sym_operation(
+                 CryptoDevBackend *backend,
+                 CryptoDevBackendSymOpInfo *op_info,
+                 uint32_t queue_index, Error **errp)
+{
+    CryptoDevBackendClass *bc =
+                      CRYPTODEV_BACKEND_GET_CLASS(backend);
+
+    if (bc->do_sym_op) {
+        return bc->do_sym_op(backend, op_info, queue_index, errp);
+    }
+
+    return -VIRTIO_CRYPTO_ERR;
+}
+
+int cryptodev_backend_crypto_operation(
+                 CryptoDevBackend *backend,
+                 void *opaque,
+                 uint32_t queue_index, Error **errp)
+{
+    VirtIOCryptoReq *req = opaque;
+
+    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+        CryptoDevBackendSymOpInfo *op_info;
+        op_info = req->u.sym_op_info;
+
+        return cryptodev_backend_sym_operation(backend,
+                         op_info, queue_index, errp);
+    } else {
+        error_setg(errp, "Unsupported cryptodev alg type: %" PRIu32 "",
+                   req->flags);
+       return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    return -VIRTIO_CRYPTO_ERR;
+}
+
+static void
+cryptodev_backend_get_queues(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    uint32_t value = backend->conf.peers.queues;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void
+cryptodev_backend_set_queues(Object *obj, Visitor *v, const char *name,
+                             void *opaque, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(obj);
+    Error *local_err = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    if (!value) {
+        error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
+                   PRIu32 "'", object_get_typename(obj), name, value);
+        goto out;
+    }
+    backend->conf.peers.queues = value;
+out:
+    error_propagate(errp, local_err);
+}
+
+static void
+cryptodev_backend_complete(UserCreatable *uc, Error **errp)
+{
+    CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
+    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
+    Error *local_err = NULL;
+
+    if (bc->init) {
+        bc->init(backend, &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+    backend->ready = true;
+    return;
+
+out:
+    backend->ready = false;
+    error_propagate(errp, local_err);
+}
+
+static void cryptodev_backend_instance_init(Object *obj)
+{
+    object_property_add(obj, "queues", "int",
+                          cryptodev_backend_get_queues,
+                          cryptodev_backend_set_queues,
+                          NULL, NULL, NULL);
+    /* Initialize devices' queues property to 1 */
+    object_property_set_int(obj, 1, "queues", NULL);
+}
+
+static void cryptodev_backend_finalize(Object *obj)
+{
+
+}
+
+static void
+cryptodev_backend_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = cryptodev_backend_complete;
+
+    QTAILQ_INIT(&crypto_clients);
+}
+
+static const TypeInfo cryptodev_backend_info = {
+    .name = TYPE_CRYPTODEV_BACKEND,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(CryptoDevBackend),
+    .instance_init = cryptodev_backend_instance_init,
+    .instance_finalize = cryptodev_backend_finalize,
+    .class_size = sizeof(CryptoDevBackendClass),
+    .class_init = cryptodev_backend_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void
+cryptodev_backend_register_types(void)
+{
+    type_register_static(&cryptodev_backend_info);
+}
+
+type_init(cryptodev_backend_register_types);
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 5c4b808c1a..42efb2f28a 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -64,14 +64,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 #endif
 }
 
-static void
-file_backend_class_init(ObjectClass *oc, void *data)
-{
-    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
-
-    bc->alloc = file_backend_memory_alloc;
-}
-
 static char *get_mem_path(Object *o, Error **errp)
 {
     HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
@@ -112,13 +104,18 @@ static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
 }
 
 static void
-file_backend_instance_init(Object *o)
+file_backend_class_init(ObjectClass *oc, void *data)
 {
-    object_property_add_bool(o, "share",
-                        file_memory_backend_get_share,
-                        file_memory_backend_set_share, NULL);
-    object_property_add_str(o, "mem-path", get_mem_path,
-                            set_mem_path, NULL);
+    HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
+
+    bc->alloc = file_backend_memory_alloc;
+
+    object_class_property_add_bool(oc, "share",
+        file_memory_backend_get_share, file_memory_backend_set_share,
+        &error_abort);
+    object_class_property_add_str(oc, "mem-path",
+        get_mem_path, set_mem_path,
+        &error_abort);
 }
 
 static void file_backend_instance_finalize(Object *o)
@@ -132,7 +129,6 @@ static const TypeInfo file_backend_info = {
     .name = TYPE_MEMORY_BACKEND_FILE,
     .parent = TYPE_MEMORY_BACKEND,
     .class_init = file_backend_class_init,
-    .instance_init = file_backend_instance_init,
     .instance_finalize = file_backend_instance_finalize,
     .instance_size = sizeof(HostMemoryBackendFile),
 };
diff --git a/backends/hostmem.c b/backends/hostmem.c
index b7a208d0da..4256d24acb 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -241,26 +241,6 @@ static void host_memory_backend_init(Object *obj)
     backend->merge = machine_mem_merge(machine);
     backend->dump = machine_dump_guest_core(machine);
     backend->prealloc = mem_prealloc;
-
-    object_property_add_bool(obj, "merge",
-                        host_memory_backend_get_merge,
-                        host_memory_backend_set_merge, NULL);
-    object_property_add_bool(obj, "dump",
-                        host_memory_backend_get_dump,
-                        host_memory_backend_set_dump, NULL);
-    object_property_add_bool(obj, "prealloc",
-                        host_memory_backend_get_prealloc,
-                        host_memory_backend_set_prealloc, NULL);
-    object_property_add(obj, "size", "int",
-                        host_memory_backend_get_size,
-                        host_memory_backend_set_size, NULL, NULL, NULL);
-    object_property_add(obj, "host-nodes", "int",
-                        host_memory_backend_get_host_nodes,
-                        host_memory_backend_set_host_nodes, NULL, NULL, NULL);
-    object_property_add_enum(obj, "policy", "HostMemPolicy",
-                             HostMemPolicy_lookup,
-                             host_memory_backend_get_policy,
-                             host_memory_backend_set_policy, NULL);
 }
 
 MemoryRegion *
@@ -375,6 +355,28 @@ host_memory_backend_class_init(ObjectClass *oc, void *data)
 
     ucc->complete = host_memory_backend_memory_complete;
     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
+
+    object_class_property_add_bool(oc, "merge",
+        host_memory_backend_get_merge,
+        host_memory_backend_set_merge, &error_abort);
+    object_class_property_add_bool(oc, "dump",
+        host_memory_backend_get_dump,
+        host_memory_backend_set_dump, &error_abort);
+    object_class_property_add_bool(oc, "prealloc",
+        host_memory_backend_get_prealloc,
+        host_memory_backend_set_prealloc, &error_abort);
+    object_class_property_add(oc, "size", "int",
+        host_memory_backend_get_size,
+        host_memory_backend_set_size,
+        NULL, NULL, &error_abort);
+    object_class_property_add(oc, "host-nodes", "int",
+        host_memory_backend_get_host_nodes,
+        host_memory_backend_set_host_nodes,
+        NULL, NULL, &error_abort);
+    object_class_property_add_enum(oc, "policy", "HostMemPolicy",
+        HostMemPolicy_lookup,
+        host_memory_backend_get_policy,
+        host_memory_backend_set_policy, &error_abort);
 }
 
 static const TypeInfo host_memory_backend_info = {
diff --git a/backends/msmouse.c b/backends/msmouse.c
index aeb905562d..733ca80f48 100644
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -133,13 +133,12 @@ static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int
     return len;
 }
 
-static void msmouse_chr_close (struct CharDriverState *chr)
+static void msmouse_chr_free(struct CharDriverState *chr)
 {
     MouseState *mouse = chr->opaque;
 
     qemu_input_handler_unregister(mouse->hs);
     g_free(mouse);
-    g_free(chr);
 }
 
 static QemuInputHandler msmouse_handler = {
@@ -152,6 +151,7 @@ static QemuInputHandler msmouse_handler = {
 static CharDriverState *qemu_chr_open_msmouse(const char *id,
                                               ChardevBackend *backend,
                                               ChardevReturn *ret,
+                                              bool *be_opened,
                                               Error **errp)
 {
     ChardevCommon *common = backend->u.msmouse.data;
@@ -159,10 +159,13 @@ static CharDriverState *qemu_chr_open_msmouse(const char *id,
     CharDriverState *chr;
 
     chr = qemu_chr_alloc(common, errp);
+    if (!chr) {
+        return NULL;
+    }
     chr->chr_write = msmouse_chr_write;
-    chr->chr_close = msmouse_chr_close;
+    chr->chr_free = msmouse_chr_free;
     chr->chr_accept_input = msmouse_chr_accept_input;
-    chr->explicit_be_open = true;
+    *be_opened = false;
 
     mouse = g_new0(MouseState, 1);
     mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 7a1b9242d8..69c04b1b26 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -15,7 +15,6 @@
 #include "sysemu/char.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
-#include "hw/qdev.h" /* just for DEFINE_PROP_CHR */
 
 #define TYPE_RNG_EGD "rng-egd"
 #define RNG_EGD(obj) OBJECT_CHECK(RngEgd, (obj), TYPE_RNG_EGD)
@@ -24,7 +23,7 @@ typedef struct RngEgd
 {
     RngBackend parent;
 
-    CharDriverState *chr;
+    CharBackend chr;
     char *chr_name;
 } RngEgd;
 
@@ -41,7 +40,9 @@ static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
         header[0] = 0x02;
         header[1] = len;
 
-        qemu_chr_fe_write(s->chr, header, sizeof(header));
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, header, sizeof(header));
 
         size -= len;
     }
@@ -85,6 +86,7 @@ static void rng_egd_chr_read(void *opaque, const uint8_t *buf, int size)
 static void rng_egd_opened(RngBackend *b, Error **errp)
 {
     RngEgd *s = RNG_EGD(b);
+    CharDriverState *chr;
 
     if (s->chr_name == NULL) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
@@ -92,21 +94,19 @@ static void rng_egd_opened(RngBackend *b, Error **errp)
         return;
     }
 
-    s->chr = qemu_chr_find(s->chr_name);
-    if (s->chr == NULL) {
+    chr = qemu_chr_find(s->chr_name);
+    if (chr == NULL) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                   "Device '%s' not found", s->chr_name);
         return;
     }
-
-    if (qemu_chr_fe_claim(s->chr) != 0) {
-        error_setg(errp, QERR_DEVICE_IN_USE, s->chr_name);
+    if (!qemu_chr_fe_init(&s->chr, chr, errp)) {
         return;
     }
 
     /* FIXME we should resubmit pending requests when the CDS reconnects. */
-    qemu_chr_add_handlers(s->chr, rng_egd_chr_can_read, rng_egd_chr_read,
-                          NULL, s);
+    qemu_chr_fe_set_handlers(&s->chr, rng_egd_chr_can_read,
+                             rng_egd_chr_read, NULL, s, NULL, true);
 }
 
 static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
@@ -125,9 +125,10 @@ static void rng_egd_set_chardev(Object *obj, const char *value, Error **errp)
 static char *rng_egd_get_chardev(Object *obj, Error **errp)
 {
     RngEgd *s = RNG_EGD(obj);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&s->chr);
 
-    if (s->chr && s->chr->label) {
-        return g_strdup(s->chr->label);
+    if (chr && chr->label) {
+        return g_strdup(chr->label);
     }
 
     return NULL;
@@ -144,11 +145,7 @@ static void rng_egd_finalize(Object *obj)
 {
     RngEgd *s = RNG_EGD(obj);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(s->chr);
-    }
-
+    qemu_chr_fe_deinit(&s->chr);
     g_free(s->chr_name);
 }
 
diff --git a/backends/testdev.c b/backends/testdev.c
index 3ab1c90c1c..60156e320e 100644
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -102,7 +102,7 @@ static int testdev_write(CharDriverState *chr, const uint8_t *buf, int len)
     return orig_len;
 }
 
-static void testdev_close(struct CharDriverState *chr)
+static void testdev_free(struct CharDriverState *chr)
 {
     TestdevCharState *testdev = chr->opaque;
 
@@ -112,6 +112,7 @@ static void testdev_close(struct CharDriverState *chr)
 static CharDriverState *chr_testdev_init(const char *id,
                                          ChardevBackend *backend,
                                          ChardevReturn *ret,
+                                         bool *be_opened,
                                          Error **errp)
 {
     TestdevCharState *testdev;
@@ -122,7 +123,7 @@ static CharDriverState *chr_testdev_init(const char *id,
 
     chr->opaque = testdev;
     chr->chr_write = testdev_write;
-    chr->chr_close = testdev_close;
+    chr->chr_free = testdev_free;
 
     return chr;
 }
diff --git a/block.c b/block.c
index 2ac5e1a6bf..5f9e5125c3 100644
--- a/block.c
+++ b/block.c
@@ -28,7 +28,9 @@
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/nbd.h"
 #include "qemu/error-report.h"
+#include "module_block.h"
 #include "qemu/module.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
@@ -43,6 +45,7 @@
 #include "qapi-event.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
+#include "qapi/util.h"
 
 #ifdef CONFIG_BSD
 #include <sys/ioctl.h>
@@ -244,17 +247,40 @@ BlockDriverState *bdrv_new(void)
     return bs;
 }
 
-BlockDriver *bdrv_find_format(const char *format_name)
+static BlockDriver *bdrv_do_find_format(const char *format_name)
 {
     BlockDriver *drv1;
+
     QLIST_FOREACH(drv1, &bdrv_drivers, list) {
         if (!strcmp(drv1->format_name, format_name)) {
             return drv1;
         }
     }
+
     return NULL;
 }
 
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+    BlockDriver *drv1;
+    int i;
+
+    drv1 = bdrv_do_find_format(format_name);
+    if (drv1) {
+        return drv1;
+    }
+
+    /* The driver isn't registered, maybe we need to load a module */
+    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
+        if (!strcmp(block_driver_modules[i].format_name, format_name)) {
+            block_module_load_one(block_driver_modules[i].library_name);
+            break;
+        }
+    }
+
+    return bdrv_do_find_format(format_name);
+}
+
 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 {
     static const char *whitelist_rw[] = {
@@ -463,6 +489,19 @@ static BlockDriver *find_hdev_driver(const char *filename)
     return drv;
 }
 
+static BlockDriver *bdrv_do_find_protocol(const char *protocol)
+{
+    BlockDriver *drv1;
+
+    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+        if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
+            return drv1;
+        }
+    }
+
+    return NULL;
+}
+
 BlockDriver *bdrv_find_protocol(const char *filename,
                                 bool allow_protocol_prefix,
                                 Error **errp)
@@ -471,6 +510,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
     char protocol[128];
     int len;
     const char *p;
+    int i;
 
     /* TODO Drivers without bdrv_file_open must be specified explicitly */
 
@@ -497,15 +537,25 @@ BlockDriver *bdrv_find_protocol(const char *filename,
         len = sizeof(protocol) - 1;
     memcpy(protocol, filename, len);
     protocol[len] = '\0';
-    QLIST_FOREACH(drv1, &bdrv_drivers, list) {
-        if (drv1->protocol_name &&
-            !strcmp(drv1->protocol_name, protocol)) {
-            return drv1;
+
+    drv1 = bdrv_do_find_protocol(protocol);
+    if (drv1) {
+        return drv1;
+    }
+
+    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
+        if (block_driver_modules[i].protocol_name &&
+            !strcmp(block_driver_modules[i].protocol_name, protocol)) {
+            block_module_load_one(block_driver_modules[i].library_name);
+            break;
         }
     }
 
-    error_setg(errp, "Unknown protocol '%s'", protocol);
-    return NULL;
+    drv1 = bdrv_do_find_protocol(protocol);
+    if (!drv1) {
+        error_setg(errp, "Unknown protocol '%s'", protocol);
+    }
+    return drv1;
 }
 
 /*
@@ -687,6 +737,9 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 
+    /* Copy the read-only option from the parent */
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
+
     /* aio=native doesn't work for cache.direct=off, so disable it for the
      * temporary snapshot */
     *child_flags &= ~BDRV_O_NATIVE_AIO;
@@ -709,10 +762,13 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 
+    /* Inherit the read-only option from the parent if it's not set */
+    qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
+
     /* Our block drivers take care to send flushes and respect unmap policy,
      * so we can default to enable both on lower layers regardless of the
      * corresponding parent options. */
-    flags |= BDRV_O_UNMAP;
+    qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
 
     /* Clear flags that only apply to the top layer */
     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
@@ -762,7 +818,8 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 
     /* backing files always opened read-only */
-    flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
+    qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
+    flags &= ~BDRV_O_COPY_ON_READ;
 
     /* snapshot=on is handled on the top layer */
     flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
@@ -809,6 +866,14 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
         *flags |= BDRV_O_NOCACHE;
     }
+
+    *flags &= ~BDRV_O_RDWR;
+
+    assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY));
+    if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) {
+        *flags |= BDRV_O_RDWR;
+    }
+
 }
 
 static void update_options_from_flags(QDict *options, int flags)
@@ -821,6 +886,10 @@ static void update_options_from_flags(QDict *options, int flags)
         qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
                   qbool_from_bool(flags & BDRV_O_NO_FLUSH));
     }
+    if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
+        qdict_put(options, BDRV_OPT_READ_ONLY,
+                  qbool_from_bool(!(flags & BDRV_O_RDWR)));
+    }
 }
 
 static void bdrv_assign_node_name(BlockDriverState *bs,
@@ -860,7 +929,7 @@ static void bdrv_assign_node_name(BlockDriverState *bs,
     g_free(gen_node_name);
 }
 
-static QemuOptsList bdrv_runtime_opts = {
+QemuOptsList bdrv_runtime_opts = {
     .name = "bdrv_common",
     .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
     .desc = {
@@ -884,6 +953,21 @@ static QemuOptsList bdrv_runtime_opts = {
             .type = QEMU_OPT_BOOL,
             .help = "Ignore flush requests",
         },
+        {
+            .name = BDRV_OPT_READ_ONLY,
+            .type = QEMU_OPT_BOOL,
+            .help = "Node is opened in read-only mode",
+        },
+        {
+            .name = "detect-zeroes",
+            .type = QEMU_OPT_STRING,
+            .help = "try to optimize zero writes (off, on, unmap)",
+        },
+        {
+            .name = "discard",
+            .type = QEMU_OPT_STRING,
+            .help = "discard operation (ignore/off, unmap/on)",
+        },
         { /* end of list */ }
     },
 };
@@ -900,6 +984,8 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     const char *filename;
     const char *driver_name = NULL;
     const char *node_name = NULL;
+    const char *discard;
+    const char *detect_zeroes;
     QemuOpts *opts;
     BlockDriver *drv;
     Error *local_err = NULL;
@@ -915,6 +1001,8 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
         goto fail_opts;
     }
 
+    update_flags_from_options(&bs->open_flags, opts);
+
     driver_name = qemu_opt_get(opts, "driver");
     drv = bdrv_find_format(driver_name);
     assert(drv != NULL);
@@ -966,6 +1054,41 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
         }
     }
 
+    discard = qemu_opt_get(opts, "discard");
+    if (discard != NULL) {
+        if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
+            error_setg(errp, "Invalid discard option");
+            ret = -EINVAL;
+            goto fail_opts;
+        }
+    }
+
+    detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
+    if (detect_zeroes) {
+        BlockdevDetectZeroesOptions value =
+            qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                            detect_zeroes,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
+                            BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                            &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            ret = -EINVAL;
+            goto fail_opts;
+        }
+
+        if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+            !(bs->open_flags & BDRV_O_UNMAP))
+        {
+            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+                             "without setting discard operation to unmap");
+            ret = -EINVAL;
+            goto fail_opts;
+        }
+
+        bs->detect_zeroes = value;
+    }
+
     if (filename != NULL) {
         pstrcpy(bs->filename, sizeof(bs->filename), filename);
     } else {
@@ -976,9 +1099,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     bs->drv = drv;
     bs->opaque = g_malloc0(drv->instance_size);
 
-    /* Apply cache mode options */
-    update_flags_from_options(&bs->open_flags, opts);
-
     /* Open the image, either directly or using a protocol */
     open_flags = bdrv_open_flags(bs, bs->open_flags);
     if (drv->bdrv_file_open) {
@@ -1311,9 +1431,28 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
             backing_hd->drv ? backing_hd->drv->format_name : "");
 
     bdrv_op_block_all(backing_hd, bs->backing_blocker);
-    /* Otherwise we won't be able to commit due to check in bdrv_commit */
+    /* Otherwise we won't be able to commit or stream */
     bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
                     bs->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
+                    bs->backing_blocker);
+    /*
+     * We do backup in 3 ways:
+     * 1. drive backup
+     *    The target bs is new opened, and the source is top BDS
+     * 2. blockdev backup
+     *    Both the source and the target are top BDSes.
+     * 3. internal backup(used for block replication)
+     *    Both the source and the target are backing file
+     *
+     * In case 1 and 2, neither the source nor the target is the backing file.
+     * In case 3, we will block the top BDS, so there is only one block job
+     * for the top BDS and its backing chain.
+     */
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+                    bs->backing_blocker);
+    bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+                    bs->backing_blocker);
 out:
     bdrv_refresh_limits(bs, NULL);
 }
@@ -1612,6 +1751,25 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
         goto fail;
     }
 
+    /* Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
+     * FIXME: we're parsing the QDict to avoid having to create a
+     * QemuOpts just for this, but neither option is optimal. */
+    if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
+        !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
+        flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
+    } else {
+        flags &= ~BDRV_O_RDWR;
+    }
+
+    if (flags & BDRV_O_SNAPSHOT) {
+        snapshot_options = qdict_new();
+        bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+                                   flags, options);
+        /* Let bdrv_backing_options() override "read-only" */
+        qdict_del(options, BDRV_OPT_READ_ONLY);
+        bdrv_backing_options(&flags, options, flags, options);
+    }
+
     bs->open_flags = flags;
     bs->options = options;
     options = qdict_clone_shallow(options);
@@ -1636,18 +1794,6 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
 
     /* Open image file without format layer */
     if ((flags & BDRV_O_PROTOCOL) == 0) {
-        if (flags & BDRV_O_RDWR) {
-            flags |= BDRV_O_ALLOW_RDWR;
-        }
-        if (flags & BDRV_O_SNAPSHOT) {
-            snapshot_options = qdict_new();
-            bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
-                                       flags, options);
-            bdrv_backing_options(&flags, options, flags, options);
-        }
-
-        bs->open_flags = flags;
-
         file = bdrv_open_child(filename, options, "file", bs,
                                &child_file, true, &local_err);
         if (local_err) {
@@ -1832,6 +1978,13 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
         options = qdict_new();
     }
 
+    /* Check if this BlockDriverState is already in the queue */
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        if (bs == bs_entry->state.bs) {
+            break;
+        }
+    }
+
     /*
      * Precedence of options:
      * 1. Explicitly passed in options (highest)
@@ -1852,7 +2005,11 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
     }
 
     /* Old explicitly set values (don't overwrite by inherited value) */
-    old_options = qdict_clone_shallow(bs->explicit_options);
+    if (bs_entry) {
+        old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
+    } else {
+        old_options = qdict_clone_shallow(bs->explicit_options);
+    }
     bdrv_join_options(bs, options, old_options);
     QDECREF(old_options);
 
@@ -1891,8 +2048,13 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
                                 child->role, options, flags);
     }
 
-    bs_entry = g_new0(BlockReopenQueueEntry, 1);
-    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+    if (!bs_entry) {
+        bs_entry = g_new0(BlockReopenQueueEntry, 1);
+        QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+    } else {
+        QDECREF(bs_entry->state.options);
+        QDECREF(bs_entry->state.explicit_options);
+    }
 
     bs_entry->state.bs = bs;
     bs_entry->state.options = options;
@@ -1925,7 +2087,7 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
  * to all devices.
  *
  */
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp)
 {
     int ret = -1;
     BlockReopenQueueEntry *bs_entry, *next;
@@ -1933,7 +2095,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
 
     assert(bs_queue != NULL);
 
-    bdrv_drain_all();
+    aio_context_release(ctx);
+    bdrv_drain_all_begin();
+    aio_context_acquire(ctx);
 
     QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
         if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
@@ -1963,6 +2127,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
         g_free(bs_entry);
     }
     g_free(bs_queue);
+
+    bdrv_drain_all_end();
+
     return ret;
 }
 
@@ -1974,7 +2141,7 @@ int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
     Error *local_err = NULL;
     BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
 
-    ret = bdrv_reopen_multiple(queue, &local_err);
+    ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
     }
@@ -2209,6 +2376,7 @@ static void bdrv_close(BlockDriverState *bs)
 void bdrv_close_all(void)
 {
     block_job_cancel_sync_all();
+    nbd_export_close_all();
 
     /* Drop references from requests still in flight, such as canceled block
      * jobs whose AIO context has not been polled yet */
@@ -2631,7 +2799,7 @@ const char *bdrv_get_format_name(BlockDriverState *bs)
 
 static int qsort_strcmp(const void *a, const void *b)
 {
-    return strcmp(a, b);
+    return strcmp(*(char *const *)a, *(char *const *)b);
 }
 
 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
@@ -2657,6 +2825,24 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
         }
     }
 
+    for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
+        const char *format_name = block_driver_modules[i].format_name;
+
+        if (format_name) {
+            bool found = false;
+            int j = count;
+
+            while (formats && j && !found) {
+                found = !strcmp(formats[--j], format_name);
+            }
+
+            if (!found) {
+                formats = g_renew(const char *, formats, count + 1);
+                formats[count++] = format_name;
+            }
+        }
+    }
+
     qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
 
     for (i = 0; i < count; i++) {
@@ -2949,11 +3135,6 @@ bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
     return false;
 }
 
-int bdrv_is_snapshot(BlockDriverState *bs)
-{
-    return !!(bs->open_flags & BDRV_O_SNAPSHOT);
-}
-
 /* backing_file can either be relative, or absolute, or a protocol.  If it is
  * relative, it must be relative to the chain.  So, passing in bs->filename
  * from a BDS as backing_file should not be done, as that may be relative to
@@ -3207,17 +3388,10 @@ int bdrv_media_changed(BlockDriverState *bs)
 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
 {
     BlockDriver *drv = bs->drv;
-    const char *device_name;
 
     if (drv && drv->bdrv_eject) {
         drv->bdrv_eject(bs, eject_flag);
     }
-
-    device_name = bdrv_get_device_name(bs);
-    if (device_name[0] != '\0') {
-        qapi_event_send_device_tray_moved(device_name,
-                                          eject_flag, &error_abort);
-    }
 }
 
 /**
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 2593a2f8a6..67a036a1df 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,8 +1,8 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o dmg.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
-block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
+block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
@@ -22,12 +22,14 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
+block-obj-y += backup.o
+block-obj-$(CONFIG_REPLICATION) += replication.o
 
 block-obj-y += crypto.o
 
 common-obj-y += stream.o
-common-obj-y += backup.o
 
+nfs.o-libs         := $(LIBNFS_LIBS)
 iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
 iscsi.o-libs       := $(LIBISCSI_LIBS)
 curl.o-cflags      := $(CURL_CFLAGS)
@@ -39,7 +41,7 @@ gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
-block-obj-m        += dmg.o
-dmg.o-libs         := $(BZIP2_LIBS)
+block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o
+dmg-bz2.o-libs     := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
 linux-aio.o-libs   := -laio
diff --git a/block/archipelago.c b/block/archipelago.c
index 37b8aca78d..2449cfc702 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -87,7 +87,6 @@ typedef enum {
 
 typedef struct ArchipelagoAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     struct BDRVArchipelagoState *s;
     QEMUIOVector *qiov;
     ARCHIPCmd cmd;
@@ -154,11 +153,10 @@ static void archipelago_finish_aiocb(AIORequestData *reqdata)
     } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
         reqdata->aio_cb->ret = 0;
     }
-    reqdata->aio_cb->bh = aio_bh_new(
+    aio_bh_schedule_oneshot(
                         bdrv_get_aio_context(reqdata->aio_cb->common.bs),
                         qemu_archipelago_complete_aio, reqdata
                         );
-    qemu_bh_schedule(reqdata->aio_cb->bh);
 }
 
 static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
@@ -313,7 +311,6 @@ static void qemu_archipelago_complete_aio(void *opaque)
     AIORequestData *reqdata = (AIORequestData *) opaque;
     ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
 
-    qemu_bh_delete(aio_cb->bh);
     aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
     aio_cb->status = 0;
 
diff --git a/block/backup.c b/block/backup.c
index 2c0532314f..ea38733849 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -16,7 +16,8 @@
 #include "trace.h"
 #include "block/block.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
+#include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -27,13 +28,6 @@
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 100000000ULL /* ns */
 
-typedef struct CowRequest {
-    int64_t start;
-    int64_t end;
-    QLIST_ENTRY(CowRequest) list;
-    CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
 typedef struct BackupBlockJob {
     BlockJob common;
     BlockBackend *target;
@@ -47,6 +41,7 @@ typedef struct BackupBlockJob {
     uint64_t sectors_read;
     unsigned long *done_bitmap;
     int64_t cluster_size;
+    bool compress;
     NotifierWithReturn before_write;
     QLIST_HEAD(, CowRequest) inflight_reqs;
 } BackupBlockJob;
@@ -154,7 +149,8 @@ static int coroutine_fn backup_do_cow(BackupBlockJob *job,
                                        bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
         } else {
             ret = blk_co_pwritev(job->target, start * job->cluster_size,
-                                 bounce_qiov.size, &bounce_qiov, 0);
+                                 bounce_qiov.size, &bounce_qiov,
+                                 job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
         }
         if (ret < 0) {
             trace_backup_do_cow_write_fail(job, start, ret);
@@ -246,6 +242,14 @@ static void backup_abort(BlockJob *job)
     }
 }
 
+static void backup_clean(BlockJob *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+    assert(s->target);
+    blk_unref(s->target);
+    s->target = NULL;
+}
+
 static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
@@ -253,14 +257,71 @@ static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
     blk_set_aio_context(s->target, aio_context);
 }
 
-static const BlockJobDriver backup_job_driver = {
-    .instance_size          = sizeof(BackupBlockJob),
-    .job_type               = BLOCK_JOB_TYPE_BACKUP,
-    .set_speed              = backup_set_speed,
-    .commit                 = backup_commit,
-    .abort                  = backup_abort,
-    .attached_aio_context   = backup_attached_aio_context,
-};
+void backup_do_checkpoint(BlockJob *job, Error **errp)
+{
+    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+    int64_t len;
+
+    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+    if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
+        error_setg(errp, "The backup job only supports block checkpoint in"
+                   " sync=none mode");
+        return;
+    }
+
+    len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
+    bitmap_zero(backup_job->done_bitmap, len);
+}
+
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+                                          int nb_sectors)
+{
+    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+    int64_t start, end;
+
+    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    wait_for_overlapping_requests(backup_job, start, end);
+}
+
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+                              int64_t sector_num,
+                              int nb_sectors)
+{
+    BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+    int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+    int64_t start, end;
+
+    assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+    start = sector_num / sectors_per_cluster;
+    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+    cow_request_begin(req, backup_job, start, end);
+}
+
+void backup_cow_request_end(CowRequest *req)
+{
+    cow_request_end(req);
+}
+
+static void backup_drain(BlockJob *job)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+    /* Need to keep a reference in case blk_drain triggers execution
+     * of backup_complete...
+     */
+    if (s->target) {
+        BlockBackend *target = s->target;
+        blk_ref(target);
+        blk_drain(target);
+        blk_unref(target);
+    }
+}
 
 static BlockErrorAction backup_error_action(BackupBlockJob *job,
                                             bool read, int error)
@@ -280,11 +341,8 @@ typedef struct {
 
 static void backup_complete(BlockJob *job, void *opaque)
 {
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
     BackupCompleteData *data = opaque;
 
-    blk_unref(s->target);
-
     block_job_completed(job, data->ret);
     g_free(data);
 }
@@ -325,14 +383,14 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
     int64_t end;
     int64_t last_cluster = -1;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
-    HBitmapIter hbi;
+    BdrvDirtyBitmapIter *dbi;
 
     granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
     clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
+    dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
 
     /* Find the next dirty sector(s) */
-    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+    while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
         cluster = sector / sectors_per_cluster;
 
         /* Fake progress updates for any clusters we skipped */
@@ -344,7 +402,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
         for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
             do {
                 if (yield_and_check(job)) {
-                    return ret;
+                    goto out;
                 }
                 ret = backup_do_cow(job, cluster * sectors_per_cluster,
                                     sectors_per_cluster, &error_is_read,
@@ -352,7 +410,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
                 if ((ret < 0) &&
                     backup_error_action(job, error_is_read, -ret) ==
                     BLOCK_ERROR_ACTION_REPORT) {
-                    return ret;
+                    goto out;
                 }
             } while (ret < 0);
         }
@@ -360,7 +418,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
         /* If the bitmap granularity is smaller than the backup granularity,
          * we need to advance the iterator pointer to the next cluster. */
         if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
+            bdrv_set_dirty_iter(dbi, cluster * sectors_per_cluster);
         }
 
         last_cluster = cluster - 1;
@@ -372,6 +430,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
         job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
     }
 
+out:
+    bdrv_dirty_iter_free(dbi);
     return ret;
 }
 
@@ -380,7 +440,6 @@ static void coroutine_fn backup_run(void *opaque)
     BackupBlockJob *job = opaque;
     BackupCompleteData *data;
     BlockDriverState *bs = blk_bs(job->common.blk);
-    BlockBackend *target = job->target;
     int64_t start, end;
     int64_t sectors_per_cluster = cluster_size_sectors(job);
     int ret = 0;
@@ -467,18 +526,30 @@ static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_unlock(&job->flush_rwlock);
     g_free(job->done_bitmap);
 
-    bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
-
     data = g_malloc(sizeof(*data));
     data->ret = ret;
     block_job_defer_to_main_loop(&job->common, backup_complete, data);
 }
 
-void backup_start(const char *job_id, BlockDriverState *bs,
+static const BlockJobDriver backup_job_driver = {
+    .instance_size          = sizeof(BackupBlockJob),
+    .job_type               = BLOCK_JOB_TYPE_BACKUP,
+    .start                  = backup_run,
+    .set_speed              = backup_set_speed,
+    .commit                 = backup_commit,
+    .abort                  = backup_abort,
+    .clean                  = backup_clean,
+    .attached_aio_context   = backup_attached_aio_context,
+    .drain                  = backup_drain,
+};
+
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *target, int64_t speed,
                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+                  bool compress,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
+                  int creation_flags,
                   BlockCompletionFunc *cb, void *opaque,
                   BlockJobTxn *txn, Error **errp)
 {
@@ -492,46 +563,52 @@ void backup_start(const char *job_id, BlockDriverState *bs,
 
     if (bs == target) {
         error_setg(errp, "Source and target cannot be the same");
-        return;
+        return NULL;
     }
 
     if (!bdrv_is_inserted(bs)) {
         error_setg(errp, "Device is not inserted: %s",
                    bdrv_get_device_name(bs));
-        return;
+        return NULL;
     }
 
     if (!bdrv_is_inserted(target)) {
         error_setg(errp, "Device is not inserted: %s",
                    bdrv_get_device_name(target));
-        return;
+        return NULL;
+    }
+
+    if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
+        error_setg(errp, "Compression is not supported for this drive %s",
+                   bdrv_get_device_name(target));
+        return NULL;
     }
 
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
+        return NULL;
     }
 
     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
+        return NULL;
     }
 
     if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
         if (!sync_bitmap) {
             error_setg(errp, "must provide a valid bitmap name for "
                              "\"incremental\" sync mode");
-            return;
+            return NULL;
         }
 
         /* Create a new bitmap, and freeze/disable this one. */
         if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
+            return NULL;
         }
     } else if (sync_bitmap) {
         error_setg(errp,
                    "a sync_bitmap was provided to backup_run, "
                    "but received an incompatible sync_mode (%s)",
                    MirrorSyncMode_lookup[sync_mode]);
-        return;
+        return NULL;
     }
 
     len = bdrv_getlength(bs);
@@ -542,7 +619,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
     }
 
     job = block_job_create(job_id, &backup_job_driver, bs, speed,
-                           cb, opaque, errp);
+                           creation_flags, cb, opaque, errp);
     if (!job) {
         goto error;
     }
@@ -555,6 +632,7 @@ void backup_start(const char *job_id, BlockDriverState *bs,
     job->sync_mode = sync_mode;
     job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
                        sync_bitmap : NULL;
+    job->compress = compress;
 
     /* If there is no backing file on the target, we cannot rely on COW if our
      * backup cluster size is smaller than the target cluster size. Even for
@@ -574,19 +652,20 @@ void backup_start(const char *job_id, BlockDriverState *bs,
         job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
     }
 
-    bdrv_op_block_all(target, job->common.blocker);
+    block_job_add_bdrv(&job->common, target);
     job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run, job);
     block_job_txn_add_job(txn, &job->common);
-    qemu_coroutine_enter(job->common.co);
-    return;
+
+    return &job->common;
 
  error:
     if (sync_bitmap) {
         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
     }
     if (job) {
-        blk_unref(job->target);
+        backup_clean(&job->common);
         block_job_unref(&job->common);
     }
+
+    return NULL;
 }
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d5db166815..4127571454 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -49,7 +49,6 @@ typedef struct BDRVBlkdebugState {
 
 typedef struct BlkdebugAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int ret;
 } BlkdebugAIOCB;
 
@@ -410,7 +409,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
 static void error_callback_bh(void *opaque)
 {
     struct BlkdebugAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_aio_unref(acb);
 }
@@ -421,7 +419,6 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
     BDRVBlkdebugState *s = bs->opaque;
     int error = rule->options.inject.error;
     struct BlkdebugAIOCB *acb;
-    QEMUBH *bh;
     bool immediately = rule->options.inject.immediately;
 
     if (rule->options.inject.once) {
@@ -436,9 +433,7 @@ static BlockAIOCB *inject_error(BlockDriverState *bs,
     acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
     acb->ret = -error;
 
-    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), error_callback_bh, acb);
 
     return &acb->common;
 }
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 30f9d5ff6c..a741654d35 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -20,11 +20,6 @@ typedef struct Request {
     QEMUBH *bh;
 } Request;
 
-/* Next request id.
-   This counter is global, because requests from different
-   block devices should not get overlapping ids. */
-static uint64_t request_id;
-
 static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
                           Error **errp)
 {
@@ -84,7 +79,7 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
 static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
     uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    uint64_t reqid = request_id++;
+    uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();
@@ -95,7 +90,7 @@ static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
 static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
     uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    uint64_t reqid = request_id++;
+    uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();
@@ -106,7 +101,7 @@ static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
 static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
     int64_t offset, int count, BdrvRequestFlags flags)
 {
-    uint64_t reqid = request_id++;
+    uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();
@@ -117,7 +112,7 @@ static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
 static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
                                               int64_t offset, int count)
 {
-    uint64_t reqid = request_id++;
+    uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();
@@ -127,7 +122,7 @@ static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
 
 static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
 {
-    uint64_t reqid = request_id++;
+    uint64_t reqid = blkreplay_next_id();
     int ret = bdrv_co_flush(bs->file->bs);
     block_request_create(reqid, bs, qemu_coroutine_self());
     qemu_coroutine_yield();
diff --git a/block/blkverify.c b/block/blkverify.c
index da62d75969..28f9af6dba 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -22,7 +22,6 @@ typedef struct {
 typedef struct BlkverifyAIOCB BlkverifyAIOCB;
 struct BlkverifyAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
 
     /* Request metadata */
     bool is_write;
@@ -175,7 +174,6 @@ static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
 {
     BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
 
-    acb->bh = NULL;
     acb->is_write = is_write;
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
@@ -191,7 +189,6 @@ static void blkverify_aio_bh(void *opaque)
 {
     BlkverifyAIOCB *acb = opaque;
 
-    qemu_bh_delete(acb->bh);
     if (acb->buf) {
         qemu_iovec_destroy(&acb->raw_qiov);
         qemu_vfree(acb->buf);
@@ -218,9 +215,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
             acb->verify(acb);
         }
 
-        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                             blkverify_aio_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                                blkverify_aio_bh, acb);
         break;
     }
 }
diff --git a/block/block-backend.c b/block/block-backend.c
index effa038924..efbf398bb5 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -38,6 +38,7 @@ struct BlockBackend {
     BlockBackendPublic public;
 
     void *dev;                  /* attached device model, if any */
+    bool legacy_dev;            /* true if dev is not a DeviceState */
     /* TODO change to DeviceState when all users are qdevified */
     const BlockDevOps *dev_ops;
     void *dev_opaque;
@@ -65,7 +66,6 @@ struct BlockBackend {
 
 typedef struct BlockBackendAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     BlockBackend *blk;
     int ret;
 } BlockBackendAIOCB;
@@ -409,6 +409,22 @@ bool bdrv_has_blk(BlockDriverState *bs)
     return bdrv_first_blk(bs) != NULL;
 }
 
+/*
+ * Returns true if @bs has only BlockBackends as parents.
+ */
+bool bdrv_is_root_node(BlockDriverState *bs)
+{
+    BdrvChild *c;
+
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role != &child_root) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 /*
  * Return @blk's DriveInfo if any, else null.
  */
@@ -491,32 +507,38 @@ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
     }
 }
 
-/*
- * Attach device model @dev to @blk.
- * Return 0 on success, -EBUSY when a device model is attached already.
- */
-int blk_attach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
+static int blk_do_attach_dev(BlockBackend *blk, void *dev)
 {
     if (blk->dev) {
         return -EBUSY;
     }
     blk_ref(blk);
     blk->dev = dev;
+    blk->legacy_dev = false;
     blk_iostatus_reset(blk);
     return 0;
 }
 
+/*
+ * Attach device model @dev to @blk.
+ * Return 0 on success, -EBUSY when a device model is attached already.
+ */
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
+{
+    return blk_do_attach_dev(blk, dev);
+}
+
 /*
  * Attach device model @dev to @blk.
  * @blk must not have a device model attached already.
  * TODO qdevified devices don't use this, remove when devices are qdevified
  */
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
+void blk_attach_dev_legacy(BlockBackend *blk, void *dev)
 {
-    if (blk_attach_dev(blk, dev) < 0) {
+    if (blk_do_attach_dev(blk, dev) < 0) {
         abort();
     }
+    blk->legacy_dev = true;
 }
 
 /*
@@ -543,6 +565,42 @@ void *blk_get_attached_dev(BlockBackend *blk)
     return blk->dev;
 }
 
+/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
+ * device attached to the BlockBackend. */
+static char *blk_get_attached_dev_id(BlockBackend *blk)
+{
+    DeviceState *dev;
+
+    assert(!blk->legacy_dev);
+    dev = blk->dev;
+
+    if (!dev) {
+        return g_strdup("");
+    } else if (dev->id) {
+        return g_strdup(dev->id);
+    }
+    return object_get_canonical_path(OBJECT(dev));
+}
+
+/*
+ * Return the BlockBackend which has the device model @dev attached if it
+ * exists, else null.
+ *
+ * @dev must not be null.
+ */
+BlockBackend *blk_by_dev(void *dev)
+{
+    BlockBackend *blk = NULL;
+
+    assert(dev != NULL);
+    while ((blk = blk_all_next(blk)) != NULL) {
+        if (blk->dev == dev) {
+            return blk;
+        }
+    }
+    return NULL;
+}
+
 /*
  * Set @blk's device model callbacks to @ops.
  * @opaque is the opaque argument to pass to the callbacks.
@@ -551,6 +609,11 @@ void *blk_get_attached_dev(BlockBackend *blk)
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
                      void *opaque)
 {
+    /* All drivers that use blk_set_dev_ops() are qdevified and we want to keep
+     * it that way, so we can assume blk->dev is a DeviceState if blk->dev_ops
+     * is set. */
+    assert(!blk->legacy_dev);
+
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
 }
@@ -566,13 +629,17 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
     if (blk->dev_ops && blk->dev_ops->change_media_cb) {
         bool tray_was_open, tray_is_open;
 
+        assert(!blk->legacy_dev);
+
         tray_was_open = blk_dev_is_tray_open(blk);
         blk->dev_ops->change_media_cb(blk->dev_opaque, load);
         tray_is_open = blk_dev_is_tray_open(blk);
 
         if (tray_was_open != tray_is_open) {
-            qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+            char *id = blk_get_attached_dev_id(blk);
+            qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open,
                                               &error_abort);
+            g_free(id);
         }
     }
 }
@@ -727,40 +794,30 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
     return 0;
 }
 
-static int blk_check_request(BlockBackend *blk, int64_t sector_num,
-                             int nb_sectors)
-{
-    if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
-                                  nb_sectors * BDRV_SECTOR_SIZE);
-}
-
 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
                                unsigned int bytes, QEMUIOVector *qiov,
                                BdrvRequestFlags flags)
 {
     int ret;
+    BlockDriverState *bs = blk_bs(blk);
 
-    trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_preadv(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
 
+    bdrv_inc_in_flight(bs);
+
     /* throttling disk I/O */
     if (blk->public.throttle_state) {
         throttle_group_co_io_limits_intercept(blk, bytes, false);
     }
 
-    return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+    bdrv_dec_in_flight(bs);
+    return ret;
 }
 
 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
@@ -768,14 +825,17 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                 BdrvRequestFlags flags)
 {
     int ret;
+    BlockDriverState *bs = blk_bs(blk);
 
-    trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+    trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
 
     ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
 
+    bdrv_inc_in_flight(bs);
+
     /* throttling disk I/O */
     if (blk->public.throttle_state) {
         throttle_group_co_io_limits_intercept(blk, bytes, true);
@@ -785,7 +845,9 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
         flags |= BDRV_REQ_FUA;
     }
 
-    return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+    ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+    bdrv_dec_in_flight(bs);
+    return ret;
 }
 
 typedef struct BlkRwCo {
@@ -816,7 +878,6 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
                    int64_t bytes, CoroutineEntry co_entry,
                    BdrvRequestFlags flags)
 {
-    AioContext *aio_context;
     QEMUIOVector qiov;
     struct iovec iov;
     Coroutine *co;
@@ -838,11 +899,7 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
 
     co = qemu_coroutine_create(co_entry, &rwco);
     qemu_coroutine_enter(co);
-
-    aio_context = blk_get_aio_context(blk);
-    while (rwco.ret == NOT_DONE) {
-        aio_poll(aio_context, true);
-    }
+    BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
 
     return rwco.ret;
 }
@@ -878,7 +935,8 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
 static void error_callback_bh(void *opaque)
 {
     struct BlockBackendAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
+
+    bdrv_dec_in_flight(acb->common.bs);
     acb->common.cb(acb->common.opaque, acb->ret);
     qemu_aio_unref(acb);
 }
@@ -888,16 +946,13 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
                                   void *opaque, int ret)
 {
     struct BlockBackendAIOCB *acb;
-    QEMUBH *bh;
 
+    bdrv_inc_in_flight(blk_bs(blk));
     acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
     acb->blk = blk;
     acb->ret = ret;
 
-    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
-
+    aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
     return &acb->common;
 }
 
@@ -906,7 +961,6 @@ typedef struct BlkAioEmAIOCB {
     BlkRwCo rwco;
     int bytes;
     bool has_returned;
-    QEMUBH* bh;
 } BlkAioEmAIOCB;
 
 static const AIOCBInfo blk_aio_em_aiocb_info = {
@@ -915,11 +969,8 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
 
 static void blk_aio_complete(BlkAioEmAIOCB *acb)
 {
-    if (acb->bh) {
-        assert(acb->has_returned);
-        qemu_bh_delete(acb->bh);
-    }
     if (acb->has_returned) {
+        bdrv_dec_in_flight(acb->common.bs);
         acb->common.cb(acb->common.opaque, acb->rwco.ret);
         qemu_aio_unref(acb);
     }
@@ -927,7 +978,10 @@ static void blk_aio_complete(BlkAioEmAIOCB *acb)
 
 static void blk_aio_complete_bh(void *opaque)
 {
-    blk_aio_complete(opaque);
+    BlkAioEmAIOCB *acb = opaque;
+
+    assert(acb->has_returned);
+    blk_aio_complete(acb);
 }
 
 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
@@ -938,6 +992,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
     BlkAioEmAIOCB *acb;
     Coroutine *co;
 
+    bdrv_inc_in_flight(blk_bs(blk));
     acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
     acb->rwco = (BlkRwCo) {
         .blk    = blk,
@@ -947,7 +1002,6 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
         .ret    = NOT_DONE,
     };
     acb->bytes = bytes;
-    acb->bh = NULL;
     acb->has_returned = false;
 
     co = qemu_coroutine_create(co_entry, acb);
@@ -955,8 +1009,8 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
 
     acb->has_returned = true;
     if (acb->rwco.ret != NOT_DONE) {
-        acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(blk_get_aio_context(blk),
+                                blk_aio_complete_bh, acb);
     }
 
     return &acb->common;
@@ -1055,26 +1109,36 @@ BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
                         blk_aio_write_entry, flags, cb, opaque);
 }
 
+static void blk_aio_flush_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_flush(rwco->blk);
+    blk_aio_complete(acb);
+}
+
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
                           BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
+    return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}
+
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
 
-    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
+    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+    blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
                              int64_t offset, int count,
                              BlockCompletionFunc *cb, void *opaque)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return blk_abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
+    return blk_aio_prwv(blk, offset, count, NULL, blk_aio_pdiscard_entry, 0,
+                        cb, opaque);
 }
 
 void blk_aio_cancel(BlockAIOCB *acb)
@@ -1087,23 +1151,50 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
     bdrv_aio_cancel_async(acb);
 }
 
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
-    return bdrv_ioctl(blk_bs(blk), req, buf);
+    return bdrv_co_ioctl(blk_bs(blk), req, buf);
+}
+
+static void blk_ioctl_entry(void *opaque)
+{
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+                             rwco->qiov->iov[0].iov_base);
+}
+
+int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+{
+    return blk_prw(blk, req, buf, 0, blk_ioctl_entry, 0);
+}
+
+static void blk_aio_ioctl_entry(void *opaque)
+{
+    BlkAioEmAIOCB *acb = opaque;
+    BlkRwCo *rwco = &acb->rwco;
+
+    rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
+                             rwco->qiov->iov[0].iov_base);
+    blk_aio_complete(acb);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque)
 {
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
+    QEMUIOVector qiov;
+    struct iovec iov;
 
-    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
+    iov = (struct iovec) {
+        .iov_base = buf,
+        .iov_len = 0,
+    };
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
 }
 
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
@@ -1125,13 +1216,15 @@ int blk_co_flush(BlockBackend *blk)
     return bdrv_co_flush(blk_bs(blk));
 }
 
-int blk_flush(BlockBackend *blk)
+static void blk_flush_entry(void *opaque)
 {
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_flush(rwco->blk);
+}
 
-    return bdrv_flush(blk_bs(blk));
+int blk_flush(BlockBackend *blk)
+{
+    return blk_prw(blk, 0, NULL, 0, blk_flush_entry, 0);
 }
 
 void blk_drain(BlockBackend *blk)
@@ -1186,8 +1279,9 @@ static void send_qmp_error_event(BlockBackend *blk,
     IoOperationType optype;
 
     optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(blk_name(blk), optype, action,
-                                   blk_iostatus_is_enabled(blk),
+    qapi_event_send_block_io_error(blk_name(blk),
+                                   bdrv_get_node_name(blk_bs(blk)), optype,
+                                   action, blk_iostatus_is_enabled(blk),
                                    error == ENOSPC, strerror(error),
                                    &error_abort);
 }
@@ -1292,10 +1386,21 @@ void blk_lock_medium(BlockBackend *blk, bool locked)
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
     BlockDriverState *bs = blk_bs(blk);
+    char *id;
+
+    /* blk_eject is only called by qdevified devices */
+    assert(!blk->legacy_dev);
 
     if (bs) {
         bdrv_eject(bs, eject_flag);
     }
+
+    /* Whether or not we ejected on the backend,
+     * the frontend experienced a tray event. */
+    id = blk_get_attached_dev_id(blk);
+    qapi_event_send_device_tray_moved(blk_name(blk), id,
+                                      eject_flag, &error_abort);
+    g_free(id);
 }
 
 int blk_get_flags(BlockBackend *blk)
@@ -1484,15 +1589,11 @@ int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
                           flags | BDRV_REQ_ZERO_WRITE);
 }
 
-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
-                         const uint8_t *buf, int nb_sectors)
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+                          int count)
 {
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
+    return blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+                   BDRV_REQ_WRITE_COMPRESSED);
 }
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
@@ -1504,14 +1605,15 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
     return bdrv_truncate(blk_bs(blk), offset);
 }
 
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+static void blk_pdiscard_entry(void *opaque)
 {
-    int ret = blk_check_byte_request(blk, offset, count);
-    if (ret < 0) {
-        return ret;
-    }
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+}
 
-    return bdrv_pdiscard(blk_bs(blk), offset, count);
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+{
+    return blk_prw(blk, offset, NULL, count, blk_pdiscard_entry, 0);
 }
 
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
@@ -1576,13 +1678,12 @@ void blk_update_root_state(BlockBackend *blk)
 }
 
 /*
- * Applies the information in the root state to the given BlockDriverState. This
- * does not include the flags which have to be specified for bdrv_open(), use
- * blk_get_open_flags_from_root_state() to inquire them.
+ * Returns the detect-zeroes setting to be used for bdrv_open() of a
+ * BlockDriverState which is supposed to inherit the root state.
  */
-void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
 {
-    bs->detect_zeroes = blk->root_state.detect_zeroes;
+    return blk->root_state.detect_zeroes;
 }
 
 /*
@@ -1624,28 +1725,6 @@ int blk_commit_all(void)
     return 0;
 }
 
-int blk_flush_all(void)
-{
-    BlockBackend *blk = NULL;
-    int result = 0;
-
-    while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *aio_context = blk_get_aio_context(blk);
-        int ret;
-
-        aio_context_acquire(aio_context);
-        if (blk_is_inserted(blk)) {
-            ret = blk_flush(blk);
-            if (ret < 0 && !result) {
-                result = ret;
-            }
-        }
-        aio_context_release(aio_context);
-    }
-
-    return result;
-}
-
 
 /* throttling disk I/O limits */
 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
diff --git a/block/commit.c b/block/commit.c
index 553e18da52..c284e8535d 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -15,7 +15,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -83,7 +83,7 @@ static void commit_complete(BlockJob *job, void *opaque)
     BlockDriverState *active = s->active;
     BlockDriverState *top = blk_bs(s->top);
     BlockDriverState *base = blk_bs(s->base);
-    BlockDriverState *overlay_bs;
+    BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
     int ret = data->ret;
 
     if (!block_job_is_cancelled(&s->common) && ret == 0) {
@@ -97,7 +97,6 @@ static void commit_complete(BlockJob *job, void *opaque)
     if (s->base_flags != bdrv_get_flags(base)) {
         bdrv_reopen(base, s->base_flags, NULL);
     }
-    overlay_bs = bdrv_find_overlay(active, top);
     if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
         bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
     }
@@ -206,17 +205,19 @@ static const BlockJobDriver commit_job_driver = {
     .instance_size = sizeof(CommitBlockJob),
     .job_type      = BLOCK_JOB_TYPE_COMMIT,
     .set_speed     = commit_set_speed,
+    .start         = commit_run,
 };
 
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
-                  void *opaque, const char *backing_file_str, Error **errp)
+                  BlockdevOnError on_error, const char *backing_file_str,
+                  Error **errp)
 {
     CommitBlockJob *s;
     BlockReopenQueue *reopen_queue = NULL;
     int orig_overlay_flags;
     int orig_base_flags;
+    BlockDriverState *iter;
     BlockDriverState *overlay_bs;
     Error *local_err = NULL;
 
@@ -234,7 +235,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     }
 
     s = block_job_create(job_id, &commit_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
     if (!s) {
         return;
     }
@@ -243,16 +244,16 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     orig_overlay_flags = bdrv_get_flags(overlay_bs);
 
     /* convert base & overlay_bs to r/w, if necessary */
-    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
-        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
-                                         orig_overlay_flags | BDRV_O_RDWR);
-    }
     if (!(orig_base_flags & BDRV_O_RDWR)) {
         reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
                                          orig_base_flags | BDRV_O_RDWR);
     }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
     if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs), reopen_queue, &local_err);
         if (local_err != NULL) {
             error_propagate(errp, local_err);
             block_job_unref(&s->common);
@@ -261,6 +262,19 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     }
 
 
+    /* Block all nodes between top and base, because they will
+     * disappear from the chain after this operation. */
+    assert(bdrv_chain_contains(top, base));
+    for (iter = top; iter != backing_bs(base); iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
+    }
+    /* overlay_bs must be blocked because it needs to be modified to
+     * update the backing image string, but if it's the root node then
+     * don't block it again */
+    if (bs != overlay_bs) {
+        block_job_add_bdrv(&s->common, overlay_bs);
+    }
+
     s->base = blk_new();
     blk_insert_bs(s->base, base);
 
@@ -275,10 +289,9 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     s->backing_file_str = g_strdup(backing_file_str);
 
     s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(commit_run, s);
 
-    trace_commit_start(bs, base, top, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_commit_start(bs, base, top, s);
+    block_job_start(&s->common);
 }
 
 
diff --git a/block/crypto.c b/block/crypto.c
index 7f61e12686..7aa7eb553e 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -33,6 +33,7 @@
 #define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
 #define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
 #define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
 
 typedef struct BlockCrypto BlockCrypto;
 
@@ -183,6 +184,11 @@ static QemuOptsList block_crypto_create_opts_luks = {
             .type = QEMU_OPT_STRING,
             .help = "Name of encryption hash algorithm",
         },
+        {
+            .name = BLOCK_CRYPTO_OPT_LUKS_ITER_TIME,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Time to spend in PBKDF in milliseconds",
+        },
         { /* end of list */ }
     },
 };
diff --git a/block/curl.c b/block/curl.c
index 426fb4d674..0404c1b5fa 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -68,12 +68,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
 #endif
 
 #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
-                   CURLPROTO_FTP | CURLPROTO_FTPS | \
-                   CURLPROTO_TFTP)
+                   CURLPROTO_FTP | CURLPROTO_FTPS)
 
 #define CURL_NUM_STATES 8
 #define CURL_NUM_ACB    8
-#define SECTOR_SIZE     512
 #define READ_AHEAD_DEFAULT (256 * 1024)
 #define CURL_TIMEOUT_DEFAULT 5
 #define CURL_TIMEOUT_MAX 10000
@@ -96,7 +94,6 @@ struct BDRVCURLState;
 
 typedef struct CURLAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     QEMUIOVector *qiov;
 
     int64_t sector_num;
@@ -106,12 +103,17 @@ typedef struct CURLAIOCB {
     size_t end;
 } CURLAIOCB;
 
+typedef struct CURLSocket {
+    int fd;
+    QLIST_ENTRY(CURLSocket) next;
+} CURLSocket;
+
 typedef struct CURLState
 {
     struct BDRVCURLState *s;
     CURLAIOCB *acb[CURL_NUM_ACB];
     CURL *curl;
-    curl_socket_t sock_fd;
+    QLIST_HEAD(, CURLSocket) sockets;
     char *orig_buf;
     size_t buf_start;
     size_t buf_off;
@@ -165,10 +167,27 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
 {
     BDRVCURLState *s;
     CURLState *state = NULL;
+    CURLSocket *socket;
+
     curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
-    state->sock_fd = fd;
     s = state->s;
 
+    QLIST_FOREACH(socket, &state->sockets, next) {
+        if (socket->fd == fd) {
+            if (action == CURL_POLL_REMOVE) {
+                QLIST_REMOVE(socket, next);
+                g_free(socket);
+            }
+            break;
+        }
+    }
+    if (!socket) {
+        socket = g_new0(CURLSocket, 1);
+        socket->fd = fd;
+        QLIST_INSERT_HEAD(&state->sockets, socket, next);
+    }
+    socket = NULL;
+
     DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
     switch (action) {
         case CURL_POLL_IN:
@@ -214,12 +233,13 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
 
     DPRINTF("CURL: Just reading %zd bytes\n", realsize);
 
-    if (!s || !s->orig_buf)
-        return 0;
+    if (!s || !s->orig_buf) {
+        goto read_end;
+    }
 
     if (s->buf_off >= s->buf_len) {
         /* buffer full, read nothing */
-        return 0;
+        goto read_end;
     }
     realsize = MIN(realsize, s->buf_len - s->buf_off);
     memcpy(s->orig_buf + s->buf_off, ptr, realsize);
@@ -232,15 +252,26 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
             continue;
 
         if ((s->buf_off >= acb->end)) {
+            size_t request_length = acb->nb_sectors * BDRV_SECTOR_SIZE;
+
             qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
                                 acb->end - acb->start);
+
+            if (acb->end - acb->start < request_length) {
+                size_t offset = acb->end - acb->start;
+                qemu_iovec_memset(acb->qiov, offset, 0,
+                                  request_length - offset);
+            }
+
             acb->common.cb(acb->common.opaque, 0);
             qemu_aio_unref(acb);
             s->acb[i] = NULL;
         }
     }
 
-    return realsize;
+read_end:
+    /* curl will error out if we do not return this value */
+    return size * nmemb;
 }
 
 static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
@@ -248,6 +279,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
 {
     int i;
     size_t end = start + len;
+    size_t clamped_end = MIN(end, s->len);
+    size_t clamped_len = clamped_end - start;
 
     for (i=0; i<CURL_NUM_STATES; i++) {
         CURLState *state = &s->states[i];
@@ -262,12 +295,15 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
         // Does the existing buffer cover our section?
         if ((start >= state->buf_start) &&
             (start <= buf_end) &&
-            (end >= state->buf_start) &&
-            (end <= buf_end))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_end))
         {
             char *buf = state->orig_buf + (start - state->buf_start);
 
-            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
+            qemu_iovec_from_buf(acb->qiov, 0, buf, clamped_len);
+            if (clamped_len < len) {
+                qemu_iovec_memset(acb->qiov, clamped_len, 0, len - clamped_len);
+            }
             acb->common.cb(acb->common.opaque, 0);
 
             return FIND_RET_OK;
@@ -277,13 +313,13 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
         if (state->in_use &&
             (start >= state->buf_start) &&
             (start <= buf_fend) &&
-            (end >= state->buf_start) &&
-            (end <= buf_fend))
+            (clamped_end >= state->buf_start) &&
+            (clamped_end <= buf_fend))
         {
             int j;
 
             acb->start = start - state->buf_start;
-            acb->end = acb->start + len;
+            acb->end = acb->start + clamped_len;
 
             for (j=0; j<CURL_NUM_ACB; j++) {
                 if (!state->acb[j]) {
@@ -353,6 +389,7 @@ static void curl_multi_check_completion(BDRVCURLState *s)
 static void curl_multi_do(void *arg)
 {
     CURLState *s = (CURLState *)arg;
+    CURLSocket *socket, *next_socket;
     int running;
     int r;
 
@@ -360,10 +397,13 @@ static void curl_multi_do(void *arg)
         return;
     }
 
-    do {
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
+    /* Need to use _SAFE because curl_multi_socket_action() may trigger
+     * curl_sock_cb() which might modify this list */
+    QLIST_FOREACH_SAFE(socket, &s->sockets, next, next_socket) {
+        do {
+            r = curl_multi_socket_action(s->s->multi, socket->fd, 0, &running);
+        } while (r == CURLM_CALL_MULTI_PERFORM);
+    }
 }
 
 static void curl_multi_read(void *arg)
@@ -467,6 +507,7 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
 #endif
     }
 
+    QLIST_INIT(&state->sockets);
     state->s = s;
 
     return state;
@@ -476,6 +517,14 @@ static void curl_clean_state(CURLState *s)
 {
     if (s->s->multi)
         curl_multi_remove_handle(s->s->multi, s->curl);
+
+    while (!QLIST_EMPTY(&s->sockets)) {
+        CURLSocket *socket = QLIST_FIRST(&s->sockets);
+
+        QLIST_REMOVE(socket, next);
+        g_free(socket);
+    }
+
     s->in_use = 0;
 }
 
@@ -675,11 +724,28 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
     curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
     if (curl_easy_perform(state->curl))
         goto out;
-    curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d);
-    if (d)
-        s->len = (size_t)d;
-    else if(!s->len)
+    if (curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d)) {
+        goto out;
+    }
+    /* Prior CURL 7.19.4 return value of 0 could mean that the file size is not
+     * know or the size is zero. From 7.19.4 CURL returns -1 if size is not
+     * known and zero if it is realy zero-length file. */
+#if LIBCURL_VERSION_NUM >= 0x071304
+    if (d < 0) {
+        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+                "Server didn't report file size.");
+        goto out;
+    }
+#else
+    if (d <= 0) {
+        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
+                "Unknown file size or zero-length file.");
         goto out;
+    }
+#endif
+
+    s->len = (size_t)d;
+
     if ((!strncasecmp(s->url, "http://", strlen("http://"))
         || !strncasecmp(s->url, "https://", strlen("https://")))
         && !s->accept_range) {
@@ -722,15 +788,12 @@ static void curl_readv_bh_cb(void *p)
     CURLAIOCB *acb = p;
     BDRVCURLState *s = acb->common.bs->opaque;
 
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    size_t start = acb->sector_num * SECTOR_SIZE;
+    size_t start = acb->sector_num * BDRV_SECTOR_SIZE;
     size_t end;
 
     // In case we have the requested data already (e.g. read-ahead),
     // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
+    switch (curl_find_buf(s, start, acb->nb_sectors * BDRV_SECTOR_SIZE, acb)) {
         case FIND_RET_OK:
             qemu_aio_unref(acb);
             // fall through
@@ -749,13 +812,13 @@ static void curl_readv_bh_cb(void *p)
     }
 
     acb->start = 0;
-    acb->end = (acb->nb_sectors * SECTOR_SIZE);
+    acb->end = MIN(acb->nb_sectors * BDRV_SECTOR_SIZE, s->len - start);
 
     state->buf_off = 0;
     g_free(state->orig_buf);
     state->buf_start = start;
-    state->buf_len = acb->end + s->readahead_size;
-    end = MIN(start + state->buf_len, s->len) - 1;
+    state->buf_len = MIN(acb->end + s->readahead_size, s->len - start);
+    end = start + state->buf_len - 1;
     state->orig_buf = g_try_malloc(state->buf_len);
     if (state->buf_len && state->orig_buf == NULL) {
         curl_clean_state(state);
@@ -766,8 +829,8 @@ static void curl_readv_bh_cb(void *p)
     state->acb[0] = acb;
 
     snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
-            (acb->nb_sectors * SECTOR_SIZE), start, state->range);
+    DPRINTF("CURL (AIO): Reading %llu at %zd (%s)\n",
+            (acb->nb_sectors * BDRV_SECTOR_SIZE), start, state->range);
     curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
 
     curl_multi_add_handle(s->multi, state->curl);
@@ -788,8 +851,7 @@ static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
     acb->sector_num = sector_num;
     acb->nb_sectors = nb_sectors;
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
     return &acb->common;
 }
 
@@ -874,29 +936,12 @@ static BlockDriver bdrv_ftps = {
     .bdrv_attach_aio_context    = curl_attach_aio_context,
 };
 
-static BlockDriver bdrv_tftp = {
-    .format_name                = "tftp",
-    .protocol_name              = "tftp",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
 static void curl_block_init(void)
 {
     bdrv_register(&bdrv_http);
     bdrv_register(&bdrv_https);
     bdrv_register(&bdrv_ftp);
     bdrv_register(&bdrv_ftps);
-    bdrv_register(&bdrv_tftp);
 }
 
 block_init(curl_block_init);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index f2bfdcfdea..519737c8d3 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -38,13 +38,20 @@
  */
 struct BdrvDirtyBitmap {
     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
+    HBitmap *meta;              /* Meta dirty bitmap */
     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
     char *name;                 /* Optional non-empty unique ID */
     int64_t size;               /* Size of the bitmap (Number of sectors) */
     bool disabled;              /* Bitmap is read-only */
+    int active_iterators;       /* How many iterators are active */
     QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
+struct BdrvDirtyBitmapIter {
+    HBitmapIter hbi;
+    BdrvDirtyBitmap *bitmap;
+};
+
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
     BdrvDirtyBitmap *bm;
@@ -97,6 +104,66 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
     return bitmap;
 }
 
+/* bdrv_create_meta_dirty_bitmap
+ *
+ * Create a meta dirty bitmap that tracks the changes of bits in @bitmap. I.e.
+ * when a dirty status bit in @bitmap is changed (either from reset to set or
+ * the other way around), its respective meta dirty bitmap bit will be marked
+ * dirty as well.
+ *
+ * @bitmap: the block dirty bitmap for which to create a meta dirty bitmap.
+ * @chunk_size: how many bytes of bitmap data does each bit in the meta bitmap
+ * track.
+ */
+void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                                   int chunk_size)
+{
+    assert(!bitmap->meta);
+    bitmap->meta = hbitmap_create_meta(bitmap->bitmap,
+                                       chunk_size * BITS_PER_BYTE);
+}
+
+void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(bitmap->meta);
+    hbitmap_free_meta(bitmap->bitmap);
+    bitmap->meta = NULL;
+}
+
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors)
+{
+    uint64_t i;
+    int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
+
+    /* To optimize: we can make hbitmap to internally check the range in a
+     * coarse level, or at least do it word by word. */
+    for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) {
+        if (hbitmap_get(bitmap->meta, i)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
+                                  BdrvDirtyBitmap *bitmap, int64_t sector,
+                                  int nb_sectors)
+{
+    hbitmap_reset(bitmap->meta, sector, nb_sectors);
+}
+
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->size;
+}
+
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
+{
+    return bitmap->name;
+}
+
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
     return bitmap->successor;
@@ -212,6 +279,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
         assert(!bdrv_dirty_bitmap_frozen(bitmap));
+        assert(!bitmap->active_iterators);
         hbitmap_truncate(bitmap->bitmap, size);
         bitmap->size = size;
     }
@@ -224,7 +292,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
     BdrvDirtyBitmap *bm, *next;
     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
         if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+            assert(!bm->active_iterators);
             assert(!bdrv_dirty_bitmap_frozen(bm));
+            assert(!bm->meta);
             QLIST_REMOVE(bm, list);
             hbitmap_free(bm->bitmap);
             g_free(bm->name);
@@ -235,6 +305,9 @@ static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
             }
         }
     }
+    if (bitmap) {
+        abort();
+    }
 }
 
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
@@ -320,9 +393,43 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
     return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
 
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap)
+{
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta);
+}
+
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
+                                         uint64_t first_sector)
 {
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+    BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
+    hbitmap_iter_init(&iter->hbi, bitmap->bitmap, first_sector);
+    iter->bitmap = bitmap;
+    bitmap->active_iterators++;
+    return iter;
+}
+
+BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap)
+{
+    BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
+    hbitmap_iter_init(&iter->hbi, bitmap->meta, 0);
+    iter->bitmap = bitmap;
+    bitmap->active_iterators++;
+    return iter;
+}
+
+void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
+{
+    if (!iter) {
+        return;
+    }
+    assert(iter->bitmap->active_iterators > 0);
+    iter->bitmap->active_iterators--;
+    g_free(iter);
+}
+
+int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
+{
+    return hbitmap_iter_next(&iter->hbi);
 }
 
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -360,6 +467,43 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
     hbitmap_free(tmp);
 }
 
+uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
+                                              uint64_t start, uint64_t count)
+{
+    return hbitmap_serialization_size(bitmap->bitmap, start, count);
+}
+
+uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
+{
+    return hbitmap_serialization_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
+                                      uint8_t *buf, uint64_t start,
+                                      uint64_t count)
+{
+    hbitmap_serialize_part(bitmap->bitmap, buf, start, count);
+}
+
+void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
+                                        uint8_t *buf, uint64_t start,
+                                        uint64_t count, bool finish)
+{
+    hbitmap_deserialize_part(bitmap->bitmap, buf, start, count, finish);
+}
+
+void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
+                                          uint64_t start, uint64_t count,
+                                          bool finish)
+{
+    hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
+}
+
+void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
+{
+    hbitmap_deserialize_finish(bitmap->bitmap);
+}
+
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                     int64_t nr_sectors)
 {
@@ -373,15 +517,19 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
 }
 
 /**
- * Advance an HBitmapIter to an arbitrary offset.
+ * Advance a BdrvDirtyBitmapIter to an arbitrary offset.
  */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num)
 {
-    assert(hbi->hb);
-    hbitmap_iter_init(hbi, hbi->hb, offset);
+    hbitmap_iter_init(&iter->hbi, iter->hbi.hb, sector_num);
 }
 
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
 {
     return hbitmap_count(bitmap->bitmap);
 }
+
+int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+    return hbitmap_count(bitmap->meta);
+}
diff --git a/block/dmg-bz2.c b/block/dmg-bz2.c
new file mode 100644
index 0000000000..9059492a9f
--- /dev/null
+++ b/block/dmg-bz2.c
@@ -0,0 +1,61 @@
+/*
+ * DMG bzip2 uncompression
+ *
+ * Copyright (c) 2004 Johannes E. Schindelin
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "dmg.h"
+#include <bzlib.h>
+
+static int dmg_uncompress_bz2_do(char *next_in, unsigned int avail_in,
+                                 char *next_out, unsigned int avail_out)
+{
+    int ret;
+    uint64_t total_out;
+    bz_stream bzstream = {};
+
+    ret = BZ2_bzDecompressInit(&bzstream, 0, 0);
+    if (ret != BZ_OK) {
+        return -1;
+    }
+    bzstream.next_in = next_in;
+    bzstream.avail_in = avail_in;
+    bzstream.next_out = next_out;
+    bzstream.avail_out = avail_out;
+    ret = BZ2_bzDecompress(&bzstream);
+    total_out = ((uint64_t)bzstream.total_out_hi32 << 32) +
+                bzstream.total_out_lo32;
+    BZ2_bzDecompressEnd(&bzstream);
+    if (ret != BZ_STREAM_END ||
+        total_out != avail_out) {
+        return -1;
+    }
+    return 0;
+}
+
+__attribute__((constructor))
+static void dmg_bz2_init(void)
+{
+    assert(!dmg_uncompress_bz2);
+    dmg_uncompress_bz2 = dmg_uncompress_bz2_do;
+}
diff --git a/block/dmg.c b/block/dmg.c
index b0ed89baa7..58a3ae86c1 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -28,10 +28,10 @@
 #include "qemu/bswap.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
-#include <zlib.h>
-#ifdef CONFIG_BZIP2
-#include <bzlib.h>
-#endif
+#include "dmg.h"
+
+int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                          char *next_out, unsigned int avail_out);
 
 enum {
     /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -41,31 +41,6 @@ enum {
     DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
 };
 
-typedef struct BDRVDMGState {
-    CoMutex lock;
-    /* each chunk contains a certain number of sectors,
-     * offsets[i] is the offset in the .dmg file,
-     * lengths[i] is the length of the compressed chunk,
-     * sectors[i] is the sector beginning at offsets[i],
-     * sectorcounts[i] is the number of sectors in that chunk,
-     * the sectors array is ordered
-     * 0<=i<n_chunks */
-
-    uint32_t n_chunks;
-    uint32_t* types;
-    uint64_t* offsets;
-    uint64_t* lengths;
-    uint64_t* sectors;
-    uint64_t* sectorcounts;
-    uint32_t current_chunk;
-    uint8_t *compressed_chunk;
-    uint8_t *uncompressed_chunk;
-    z_stream zstream;
-#ifdef CONFIG_BZIP2
-    bz_stream bzstream;
-#endif
-} BDRVDMGState;
-
 static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     int len;
@@ -210,10 +185,9 @@ static bool dmg_is_known_block_type(uint32_t entry_type)
     case 0x00000001:    /* uncompressed */
     case 0x00000002:    /* zeroes */
     case 0x80000005:    /* zlib */
-#ifdef CONFIG_BZIP2
-    case 0x80000006:    /* bzip2 */
-#endif
         return true;
+    case 0x80000006:    /* bzip2 */
+        return !!dmg_uncompress_bz2;
     default:
         return false;
     }
@@ -439,6 +413,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
     int64_t offset;
     int ret;
 
+    block_module_load_one("dmg-bz2");
     bs->read_only = true;
 
     s->n_chunks = 0;
@@ -587,9 +562,6 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
     if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
         int ret;
         uint32_t chunk = search_chunk(s, sector_num);
-#ifdef CONFIG_BZIP2
-        uint64_t total_out;
-#endif
 
         if (chunk >= s->n_chunks) {
             return -1;
@@ -620,8 +592,10 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                 return -1;
             }
             break; }
-#ifdef CONFIG_BZIP2
         case 0x80000006: /* bzip2 compressed */
+            if (!dmg_uncompress_bz2) {
+                break;
+            }
             /* we need to buffer, because only the chunk as whole can be
              * inflated. */
             ret = bdrv_pread(bs->file, s->offsets[chunk],
@@ -630,24 +604,15 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
                 return -1;
             }
 
-            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
-            if (ret != BZ_OK) {
-                return -1;
-            }
-            s->bzstream.next_in = (char *)s->compressed_chunk;
-            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
-            s->bzstream.next_out = (char *)s->uncompressed_chunk;
-            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
-            ret = BZ2_bzDecompress(&s->bzstream);
-            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
-                        s->bzstream.total_out_lo32;
-            BZ2_bzDecompressEnd(&s->bzstream);
-            if (ret != BZ_STREAM_END ||
-                total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
+            ret = dmg_uncompress_bz2((char *)s->compressed_chunk,
+                                     (unsigned int) s->lengths[chunk],
+                                     (char *)s->uncompressed_chunk,
+                                     (unsigned int)
+                                         (512 * s->sectorcounts[chunk]));
+            if (ret < 0) {
+                return ret;
             }
             break;
-#endif /* CONFIG_BZIP2 */
         case 1: /* copy */
             ret = bdrv_pread(bs->file, s->offsets[chunk],
                              s->uncompressed_chunk, s->lengths[chunk]);
diff --git a/block/dmg.h b/block/dmg.h
new file mode 100644
index 0000000000..b592d6fa8b
--- /dev/null
+++ b/block/dmg.h
@@ -0,0 +1,59 @@
+/*
+ * Header for DMG driver
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ * Copyright (c) 2016 Red hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_DMG_H
+#define BLOCK_DMG_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include <zlib.h>
+
+typedef struct BDRVDMGState {
+    CoMutex lock;
+    /* each chunk contains a certain number of sectors,
+     * offsets[i] is the offset in the .dmg file,
+     * lengths[i] is the length of the compressed chunk,
+     * sectors[i] is the sector beginning at offsets[i],
+     * sectorcounts[i] is the number of sectors in that chunk,
+     * the sectors array is ordered
+     * 0<=i<n_chunks */
+
+    uint32_t n_chunks;
+    uint32_t *types;
+    uint64_t *offsets;
+    uint64_t *lengths;
+    uint64_t *sectors;
+    uint64_t *sectorcounts;
+    uint32_t current_chunk;
+    uint8_t *compressed_chunk;
+    uint8_t *uncompressed_chunk;
+    z_stream zstream;
+} BDRVDMGState;
+
+extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                                 char *next_out, unsigned int avail_out);
+
+#endif
diff --git a/block/gluster.c b/block/gluster.c
index 01b479fbb9..a0a74e49fd 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -14,6 +14,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
 
 #define GLUSTER_OPT_FILENAME        "filename"
 #define GLUSTER_OPT_VOLUME          "volume"
@@ -30,13 +31,14 @@
 #define GLUSTER_DEFAULT_PORT        24007
 #define GLUSTER_DEBUG_DEFAULT       4
 #define GLUSTER_DEBUG_MAX           9
+#define GLUSTER_OPT_LOGFILE         "logfile"
+#define GLUSTER_LOGFILE_DEFAULT     "-" /* handled in libgfapi as /dev/stderr */
 
 #define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
 
 typedef struct GlusterAIOCB {
     int64_t size;
     int ret;
-    QEMUBH *bh;
     Coroutine *coroutine;
     AioContext *aio_context;
 } GlusterAIOCB;
@@ -44,8 +46,9 @@ typedef struct GlusterAIOCB {
 typedef struct BDRVGlusterState {
     struct glfs *glfs;
     struct glfs_fd *fd;
+    char *logfile;
     bool supports_seek_data;
-    int debug_level;
+    int debug;
 } BDRVGlusterState;
 
 typedef struct BDRVGlusterReopenState {
@@ -54,6 +57,19 @@ typedef struct BDRVGlusterReopenState {
 } BDRVGlusterReopenState;
 
 
+typedef struct GlfsPreopened {
+    char *volume;
+    glfs_t *fs;
+    int ref;
+} GlfsPreopened;
+
+typedef struct ListElement {
+    QLIST_ENTRY(ListElement) list;
+    GlfsPreopened saved;
+} ListElement;
+
+static QLIST_HEAD(glfs_list, ListElement) glfs_list;
+
 static QemuOptsList qemu_gluster_create_opts = {
     .name = "qemu-gluster-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
@@ -73,6 +89,11 @@ static QemuOptsList qemu_gluster_create_opts = {
             .type = QEMU_OPT_NUMBER,
             .help = "Gluster log level, valid range is 0-9",
         },
+        {
+            .name = GLUSTER_OPT_LOGFILE,
+            .type = QEMU_OPT_STRING,
+            .help = "Logfile path of libgfapi",
+        },
         { /* end of list */ }
     }
 };
@@ -91,6 +112,11 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_NUMBER,
             .help = "Gluster log level, valid range is 0-9",
         },
+        {
+            .name = GLUSTER_OPT_LOGFILE,
+            .type = QEMU_OPT_STRING,
+            .help = "Logfile path of libgfapi",
+        },
         { /* end of list */ }
     },
 };
@@ -160,7 +186,7 @@ static QemuOptsList runtime_tcp_opts = {
         },
         {
             .name = GLUSTER_OPT_PORT,
-            .type = QEMU_OPT_NUMBER,
+            .type = QEMU_OPT_STRING,
             .help = "port number on which glusterd is listening (default 24007)",
         },
         {
@@ -182,6 +208,58 @@ static QemuOptsList runtime_tcp_opts = {
     },
 };
 
+static void glfs_set_preopened(const char *volume, glfs_t *fs)
+{
+    ListElement *entry = NULL;
+
+    entry = g_new(ListElement, 1);
+
+    entry->saved.volume = g_strdup(volume);
+
+    entry->saved.fs = fs;
+    entry->saved.ref = 1;
+
+    QLIST_INSERT_HEAD(&glfs_list, entry, list);
+}
+
+static glfs_t *glfs_find_preopened(const char *volume)
+{
+    ListElement *entry = NULL;
+
+     QLIST_FOREACH(entry, &glfs_list, list) {
+        if (strcmp(entry->saved.volume, volume) == 0) {
+            entry->saved.ref++;
+            return entry->saved.fs;
+        }
+     }
+
+    return NULL;
+}
+
+static void glfs_clear_preopened(glfs_t *fs)
+{
+    ListElement *entry = NULL;
+    ListElement *next;
+
+    if (fs == NULL) {
+        return;
+    }
+
+    QLIST_FOREACH_SAFE(entry, &glfs_list, list, next) {
+        if (entry->saved.fs == fs) {
+            if (--entry->saved.ref) {
+                return;
+            }
+
+            QLIST_REMOVE(entry, list);
+
+            glfs_fini(entry->saved.fs);
+            g_free(entry->saved.volume);
+            g_free(entry);
+        }
+    }
+}
+
 static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
 {
     char *p, *q;
@@ -318,22 +396,37 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
     int ret;
     int old_errno;
     GlusterServerList *server;
+    unsigned long long port;
+
+    glfs = glfs_find_preopened(gconf->volume);
+    if (glfs) {
+        return glfs;
+    }
 
     glfs = glfs_new(gconf->volume);
     if (!glfs) {
         goto out;
     }
 
+    glfs_set_preopened(gconf->volume, glfs);
+
     for (server = gconf->server; server; server = server->next) {
         if (server->value->type  == GLUSTER_TRANSPORT_UNIX) {
             ret = glfs_set_volfile_server(glfs,
                                    GlusterTransport_lookup[server->value->type],
                                    server->value->u.q_unix.path, 0);
         } else {
+            if (parse_uint_full(server->value->u.tcp.port, &port, 10) < 0 ||
+                port > 65535) {
+                error_setg(errp, "'%s' is not a valid port number",
+                           server->value->u.tcp.port);
+                errno = EINVAL;
+                goto out;
+            }
             ret = glfs_set_volfile_server(glfs,
                                    GlusterTransport_lookup[server->value->type],
                                    server->value->u.tcp.host,
-                                   atoi(server->value->u.tcp.port));
+                                   (int)port);
         }
 
         if (ret < 0) {
@@ -341,7 +434,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
         }
     }
 
-    ret = glfs_set_logging(glfs, "-", gconf->debug_level);
+    ret = glfs_set_logging(glfs, gconf->logfile, gconf->debug);
     if (ret < 0) {
         goto out;
     }
@@ -375,7 +468,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
 out:
     if (glfs) {
         old_errno = errno;
-        glfs_fini(glfs);
+        glfs_clear_preopened(glfs);
         errno = old_errno;
     }
     return NULL;
@@ -576,7 +669,9 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
         if (ret < 0) {
             error_setg(errp, "invalid URI");
             error_append_hint(errp, "Usage: file=gluster[+transport]://"
-                                    "[host[:port]]/volume/path[?socket=...]\n");
+                                    "[host[:port]]volume/path[?socket=...]"
+                                    "[,file.debug=N]"
+                                    "[,file.logfile=/path/filename.log]\n");
             errno = -ret;
             return NULL;
         }
@@ -586,7 +681,9 @@ static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
             error_append_hint(errp, "Usage: "
                              "-drive driver=qcow2,file.driver=gluster,"
                              "file.volume=testvol,file.path=/path/a.qcow2"
-                             "[,file.debug=9],file.server.0.type=tcp,"
+                             "[,file.debug=9]"
+                             "[,file.logfile=/path/filename.log],"
+                             "file.server.0.type=tcp,"
                              "file.server.0.host=1.2.3.4,"
                              "file.server.0.port=24007,"
                              "file.server.1.transport=unix,"
@@ -605,8 +702,6 @@ static void qemu_gluster_complete_aio(void *opaque)
 {
     GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
 
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
     qemu_coroutine_enter(acb->coroutine);
 }
 
@@ -625,8 +720,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
         acb->ret = -EIO; /* Partial read/write - fail it */
     }
 
-    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(acb->aio_context, qemu_gluster_complete_aio, acb);
 }
 
 static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
@@ -655,7 +749,10 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
  */
 static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 {
-    off_t ret, eof;
+    off_t ret = 0;
+
+#if defined SEEK_HOLE && defined SEEK_DATA
+    off_t eof;
 
     eof = glfs_lseek(fd, 0, SEEK_END);
     if (eof < 0) {
@@ -665,6 +762,8 @@ static bool qemu_gluster_test_seek(struct glfs_fd *fd)
 
     /* this should always fail with ENXIO if SEEK_DATA is supported */
     ret = glfs_lseek(fd, eof, SEEK_DATA);
+#endif
+
     return (ret < 0) && (errno == ENXIO);
 }
 
@@ -677,7 +776,7 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     BlockdevOptionsGluster *gconf = NULL;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *filename;
+    const char *filename, *logfile;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -689,17 +788,24 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
 
     filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
 
-    s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
-                                         GLUSTER_DEBUG_DEFAULT);
-    if (s->debug_level < 0) {
-        s->debug_level = 0;
-    } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
-        s->debug_level = GLUSTER_DEBUG_MAX;
+    s->debug = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+                                   GLUSTER_DEBUG_DEFAULT);
+    if (s->debug < 0) {
+        s->debug = 0;
+    } else if (s->debug > GLUSTER_DEBUG_MAX) {
+        s->debug = GLUSTER_DEBUG_MAX;
     }
 
     gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
-    gconf->has_debug_level = true;
+    gconf->debug = s->debug;
+    gconf->has_debug = true;
+
+    logfile = qemu_opt_get(opts, GLUSTER_OPT_LOGFILE);
+    s->logfile = g_strdup(logfile ? logfile : GLUSTER_LOGFILE_DEFAULT);
+
+    gconf->logfile = g_strdup(s->logfile);
+    gconf->has_logfile = true;
+
     s->glfs = qemu_gluster_init(gconf, filename, options, errp);
     if (!s->glfs) {
         ret = -errno;
@@ -738,12 +844,13 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
     if (!ret) {
         return ret;
     }
+    g_free(s->logfile);
     if (s->fd) {
         glfs_close(s->fd);
     }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
+
+    glfs_clear_preopened(s->glfs);
+
     return ret;
 }
 
@@ -767,8 +874,10 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
     qemu_gluster_parse_flags(state->flags, &open_flags);
 
     gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = s->debug_level;
-    gconf->has_debug_level = true;
+    gconf->debug = s->debug;
+    gconf->has_debug = true;
+    gconf->logfile = g_strdup(s->logfile);
+    gconf->has_logfile = true;
     reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
     if (reop_s->glfs == NULL) {
         ret = -errno;
@@ -808,9 +917,8 @@ static void qemu_gluster_reopen_commit(BDRVReopenState *state)
     if (s->fd) {
         glfs_close(s->fd);
     }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
+
+    glfs_clear_preopened(s->glfs);
 
     /* use the newly opened image / connection */
     s->fd         = reop_s->fd;
@@ -835,9 +943,7 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
         glfs_close(reop_s->fd);
     }
 
-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
+    glfs_clear_preopened(reop_s->glfs);
 
     g_free(state->opaque);
     state->opaque = NULL;
@@ -905,14 +1011,20 @@ static int qemu_gluster_create(const char *filename,
     char *tmp = NULL;
 
     gconf = g_new0(BlockdevOptionsGluster, 1);
-    gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
-                                                 GLUSTER_DEBUG_DEFAULT);
-    if (gconf->debug_level < 0) {
-        gconf->debug_level = 0;
-    } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
-        gconf->debug_level = GLUSTER_DEBUG_MAX;
+    gconf->debug = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+                                           GLUSTER_DEBUG_DEFAULT);
+    if (gconf->debug < 0) {
+        gconf->debug = 0;
+    } else if (gconf->debug > GLUSTER_DEBUG_MAX) {
+        gconf->debug = GLUSTER_DEBUG_MAX;
+    }
+    gconf->has_debug = true;
+
+    gconf->logfile = qemu_opt_get_del(opts, GLUSTER_OPT_LOGFILE);
+    if (!gconf->logfile) {
+        gconf->logfile = g_strdup(GLUSTER_LOGFILE_DEFAULT);
     }
-    gconf->has_debug_level = true;
+    gconf->has_logfile = true;
 
     glfs = qemu_gluster_init(gconf, filename, NULL, errp);
     if (!glfs) {
@@ -955,9 +1067,7 @@ static int qemu_gluster_create(const char *filename,
 out:
     g_free(tmp);
     qapi_free_BlockdevOptionsGluster(gconf);
-    if (glfs) {
-        glfs_fini(glfs);
-    }
+    glfs_clear_preopened(glfs);
     return ret;
 }
 
@@ -1025,11 +1135,12 @@ static void qemu_gluster_close(BlockDriverState *bs)
 {
     BDRVGlusterState *s = bs->opaque;
 
+    g_free(s->logfile);
     if (s->fd) {
         glfs_close(s->fd);
         s->fd = NULL;
     }
-    glfs_fini(s->glfs);
+    glfs_clear_preopened(s->glfs);
 }
 
 static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
@@ -1148,12 +1259,14 @@ static int find_allocation(BlockDriverState *bs, off_t start,
                            off_t *data, off_t *hole)
 {
     BDRVGlusterState *s = bs->opaque;
-    off_t offs;
 
     if (!s->supports_seek_data) {
-        return -ENOTSUP;
+        goto exit;
     }
 
+#if defined SEEK_HOLE && defined SEEK_DATA
+    off_t offs;
+
     /*
      * SEEK_DATA cases:
      * D1. offs == start: start is in data
@@ -1217,6 +1330,10 @@ static int find_allocation(BlockDriverState *bs, off_t start,
 
     /* D1 and H1 */
     return -EBUSY;
+#endif
+
+exit:
+    return -ENOTSUP;
 }
 
 /*
diff --git a/block/io.c b/block/io.c
index 420944d80d..4f005623f7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -143,7 +143,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 {
     BdrvChild *child;
 
-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
+    if (atomic_read(&bs->in_flight)) {
         return true;
     }
 
@@ -156,43 +156,38 @@ bool bdrv_requests_pending(BlockDriverState *bs)
     return false;
 }
 
-static void bdrv_drain_recurse(BlockDriverState *bs)
+static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
     BdrvChild *child;
+    bool waited;
+
+    waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
     if (bs->drv && bs->drv->bdrv_drain) {
         bs->drv->bdrv_drain(bs);
     }
+
     QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_drain_recurse(child->bs);
+        waited |= bdrv_drain_recurse(child->bs);
     }
+
+    return waited;
 }
 
 typedef struct {
     Coroutine *co;
     BlockDriverState *bs;
-    QEMUBH *bh;
     bool done;
 } BdrvCoDrainData;
 
-static void bdrv_drain_poll(BlockDriverState *bs)
-{
-    bool busy = true;
-
-    while (busy) {
-        /* Keep iterating */
-        busy = bdrv_requests_pending(bs);
-        busy |= aio_poll(bdrv_get_aio_context(bs), busy);
-    }
-}
-
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
     BdrvCoDrainData *data = opaque;
     Coroutine *co = data->co;
+    BlockDriverState *bs = data->bs;
 
-    qemu_bh_delete(data->bh);
-    bdrv_drain_poll(data->bs);
+    bdrv_dec_in_flight(bs);
+    bdrv_drained_begin(bs);
     data->done = true;
     qemu_coroutine_enter(co);
 }
@@ -210,9 +205,10 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
         .co = qemu_coroutine_self(),
         .bs = bs,
         .done = false,
-        .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
     };
-    qemu_bh_schedule(data.bh);
+    bdrv_inc_in_flight(bs);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
+                            bdrv_co_drain_bh_cb, &data);
 
     qemu_coroutine_yield();
     /* If we are resumed from some other event (such as an aio completion or a
@@ -222,6 +218,11 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
 
 void bdrv_drained_begin(BlockDriverState *bs)
 {
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs);
+        return;
+    }
+
     if (!bs->quiesce_counter++) {
         aio_disable_external(bdrv_get_aio_context(bs));
         bdrv_parent_drained_begin(bs);
@@ -229,11 +230,6 @@ void bdrv_drained_begin(BlockDriverState *bs)
 
     bdrv_io_unplugged_begin(bs);
     bdrv_drain_recurse(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_co_yield_to_drain(bs);
-    } else {
-        bdrv_drain_poll(bs);
-    }
     bdrv_io_unplugged_end(bs);
 }
 
@@ -277,11 +273,17 @@ void bdrv_drain(BlockDriverState *bs)
  *
  * This function does not flush data to disk, use bdrv_flush_all() for that
  * after calling this function.
+ *
+ * This pauses all block jobs and disables external clients. It must
+ * be paired with bdrv_drain_all_end().
+ *
+ * NOTE: no new block jobs or BlockDriverStates can be created between
+ * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
  */
-void bdrv_drain_all(void)
+void bdrv_drain_all_begin(void)
 {
     /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
+    bool waited = true;
     BlockDriverState *bs;
     BdrvNextIterator it;
     BlockJob *job = NULL;
@@ -301,7 +303,7 @@ void bdrv_drain_all(void)
         aio_context_acquire(aio_context);
         bdrv_parent_drained_begin(bs);
         bdrv_io_unplugged_begin(bs);
-        bdrv_drain_recurse(bs);
+        aio_disable_external(aio_context);
         aio_context_release(aio_context);
 
         if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -315,8 +317,8 @@ void bdrv_drain_all(void)
      * request completion.  Therefore we must keep looping until there was no
      * more activity rather than simply draining each device independently.
      */
-    while (busy) {
-        busy = false;
+    while (waited) {
+        waited = false;
 
         for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
             AioContext *aio_context = ctx->data;
@@ -324,28 +326,32 @@ void bdrv_drain_all(void)
             aio_context_acquire(aio_context);
             for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
                 if (aio_context == bdrv_get_aio_context(bs)) {
-                    if (bdrv_requests_pending(bs)) {
-                        busy = true;
-                        aio_poll(aio_context, busy);
-                    }
+                    waited |= bdrv_drain_recurse(bs);
                 }
             }
-            busy |= aio_poll(aio_context, false);
             aio_context_release(aio_context);
         }
     }
 
+    g_slist_free(aio_ctxs);
+}
+
+void bdrv_drain_all_end(void)
+{
+    BlockDriverState *bs;
+    BdrvNextIterator it;
+    BlockJob *job = NULL;
+
     for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
         AioContext *aio_context = bdrv_get_aio_context(bs);
 
         aio_context_acquire(aio_context);
+        aio_enable_external(aio_context);
         bdrv_io_unplugged_end(bs);
         bdrv_parent_drained_end(bs);
         aio_context_release(aio_context);
     }
-    g_slist_free(aio_ctxs);
 
-    job = NULL;
     while ((job = block_job_next(job))) {
         AioContext *aio_context = blk_get_aio_context(job->blk);
 
@@ -355,6 +361,12 @@ void bdrv_drain_all(void)
     }
 }
 
+void bdrv_drain_all(void)
+{
+    bdrv_drain_all_begin();
+    bdrv_drain_all_end();
+}
+
 /**
  * Remove an active request from the tracked requests list
  *
@@ -478,6 +490,28 @@ static bool tracked_request_overlaps(BdrvTrackedRequest *req,
     return true;
 }
 
+void bdrv_inc_in_flight(BlockDriverState *bs)
+{
+    atomic_inc(&bs->in_flight);
+}
+
+static void dummy_bh_cb(void *opaque)
+{
+}
+
+void bdrv_wakeup(BlockDriverState *bs)
+{
+    if (bs->wakeup) {
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
+    }
+}
+
+void bdrv_dec_in_flight(BlockDriverState *bs)
+{
+    atomic_dec(&bs->in_flight);
+    bdrv_wakeup(bs);
+}
+
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
     BlockDriverState *bs = self->bs;
@@ -540,17 +574,6 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
     return 0;
 }
 
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EIO;
-    }
-
-    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
-                                   nb_sectors * BDRV_SECTOR_SIZE);
-}
-
 typedef struct RwCo {
     BdrvChild *child;
     int64_t offset;
@@ -596,13 +619,9 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
         /* Fast-path if already in coroutine context */
         bdrv_rw_co_entry(&rwco);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(child->bs);
-
         co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
         qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(child->bs, rwco.ret == NOT_DONE);
     }
     return rwco.ret;
 }
@@ -897,6 +916,19 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
     return ret;
 }
 
+static int coroutine_fn
+bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                               uint64_t bytes, QEMUIOVector *qiov)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv->bdrv_co_pwritev_compressed) {
+        return -ENOTSUP;
+    }
+
+    return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
+}
+
 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
         int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
 {
@@ -1097,6 +1129,8 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
         return ret;
     }
 
+    bdrv_inc_in_flight(bs);
+
     /* Don't do copy-on-read if we read data before write operation */
     if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
         flags |= BDRV_REQ_COPY_ON_READ;
@@ -1132,6 +1166,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
                               use_local_qiov ? &local_qiov : qiov,
                               flags);
     tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
 
     if (use_local_qiov) {
         qemu_iovec_destroy(&local_qiov);
@@ -1179,6 +1214,8 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
     int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
     int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
                         bs->bl.request_alignment);
+    int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+                                    MAX_WRITE_ZEROES_BOUNCE_BUFFER);
 
     assert(alignment % bs->bl.request_alignment == 0);
     head = offset % alignment;
@@ -1194,9 +1231,12 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
          * boundaries.
          */
         if (head) {
-            /* Make a small request up to the first aligned sector.  */
-            num = MIN(count, alignment - head);
-            head = 0;
+            /* Make a small request up to the first aligned sector. For
+             * convenience, limit this request to max_transfer even if
+             * we don't need to fall back to writes.  */
+            num = MIN(MIN(count, max_transfer), alignment - head);
+            head = (head + num) % alignment;
+            assert(num < max_write_zeroes);
         } else if (tail && num > alignment) {
             /* Shorten the request to the last aligned sector.  */
             num -= tail;
@@ -1222,8 +1262,6 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 
         if (ret == -ENOTSUP) {
             /* Fall back to bounce buffer if write zeroes is unsupported */
-            int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
-                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
             BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
 
             if ((flags & BDRV_REQ_FUA) &&
@@ -1315,6 +1353,8 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
         bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
         ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
+    } else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
+        ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
     } else if (bytes <= max_transfer) {
         bdrv_debug_event(bs, BLKDBG_PWRITEV);
         ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
@@ -1478,6 +1518,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
         return ret;
     }
 
+    bdrv_inc_in_flight(bs);
     /*
      * Align write if necessary by performing a read-modify-write cycle.
      * Pad qiov with the read parts and be sure to have a tracked request not
@@ -1579,6 +1620,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     qemu_vfree(tail_buf);
 out:
     tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
     return ret;
 }
 
@@ -1615,6 +1657,31 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
                            BDRV_REQ_ZERO_WRITE | flags);
 }
 
+/*
+ * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not.
+ */
+int bdrv_flush_all(void)
+{
+    BdrvNextIterator it;
+    BlockDriverState *bs = NULL;
+    int result = 0;
+
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        int ret;
+
+        aio_context_acquire(aio_context);
+        ret = bdrv_flush(bs);
+        if (ret < 0 && !result) {
+            result = ret;
+        }
+        aio_context_release(aio_context);
+    }
+
+    return result;
+}
+
+
 typedef struct BdrvCoGetBlockStatusData {
     BlockDriverState *bs;
     BlockDriverState *base;
@@ -1678,17 +1745,19 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
     }
 
     *file = NULL;
+    bdrv_inc_in_flight(bs);
     ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
                                             file);
     if (ret < 0) {
         *pnum = 0;
-        return ret;
+        goto out;
     }
 
     if (ret & BDRV_BLOCK_RAW) {
         assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum, file);
+        ret = bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+                                    *pnum, pnum, file);
+        goto out;
     }
 
     if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1730,6 +1799,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
         }
     }
 
+out:
+    bdrv_dec_in_flight(bs);
     return ret;
 }
 
@@ -1795,14 +1866,10 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
         /* Fast-path if already in coroutine context */
         bdrv_get_block_status_above_co_entry(&data);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
                                    &data);
         qemu_coroutine_enter(co);
-        while (!data.done) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, !data.done);
     }
     return data.ret;
 }
@@ -1879,28 +1946,6 @@ int bdrv_is_allocated_above(BlockDriverState *top,
     return 0;
 }
 
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                          const uint8_t *buf, int nb_sectors)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (!drv->bdrv_write_compressed) {
-        return -ENOTSUP;
-    }
-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
-    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
-}
-
 typedef struct BdrvVmstateCo {
     BlockDriverState    *bs;
     QEMUIOVector        *qiov;
@@ -2088,7 +2133,6 @@ typedef struct BlockAIOCBCoroutine {
     bool is_write;
     bool need_bh;
     bool *done;
-    QEMUBH* bh;
 } BlockAIOCBCoroutine;
 
 static const AIOCBInfo bdrv_em_co_aiocb_info = {
@@ -2098,6 +2142,7 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = {
 static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
 {
     if (!acb->need_bh) {
+        bdrv_dec_in_flight(acb->common.bs);
         acb->common.cb(acb->common.opaque, acb->req.error);
         qemu_aio_unref(acb);
     }
@@ -2108,7 +2153,6 @@ static void bdrv_co_em_bh(void *opaque)
     BlockAIOCBCoroutine *acb = opaque;
 
     assert(!acb->need_bh);
-    qemu_bh_delete(acb->bh);
     bdrv_co_complete(acb);
 }
 
@@ -2118,8 +2162,7 @@ static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
     if (acb->req.error != -EINPROGRESS) {
         BlockDriverState *bs = acb->common.bs;
 
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
     }
 }
 
@@ -2150,6 +2193,9 @@ static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
     Coroutine *co;
     BlockAIOCBCoroutine *acb;
 
+    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
+    bdrv_inc_in_flight(child->bs);
+
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
     acb->child = child;
     acb->need_bh = true;
@@ -2183,6 +2229,9 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     Coroutine *co;
     BlockAIOCBCoroutine *acb;
 
+    /* Matched by bdrv_co_complete's bdrv_dec_in_flight.  */
+    bdrv_inc_in_flight(bs);
+
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
     acb->need_bh = true;
     acb->req.error = -EINPROGRESS;
@@ -2194,35 +2243,6 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     return &acb->common;
 }
 
-static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
-                              BlockCompletionFunc *cb, void *opaque)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.offset = offset;
-    acb->req.bytes = count;
-    co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
-    qemu_coroutine_enter(co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
                    BlockCompletionFunc *cb, void *opaque)
 {
@@ -2271,23 +2291,22 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 {
     int ret;
-    BdrvTrackedRequest req;
 
     if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
         bdrv_is_sg(bs)) {
         return 0;
     }
 
-    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+    bdrv_inc_in_flight(bs);
 
     int current_gen = bs->write_gen;
 
     /* Wait until any previous flushes are completed */
-    while (bs->active_flush_req != NULL) {
+    while (bs->active_flush_req) {
         qemu_co_queue_wait(&bs->flush_queue);
     }
 
-    bs->active_flush_req = &req;
+    bs->active_flush_req = true;
 
     /* Write back all layers by calling one driver function */
     if (bs->drv->bdrv_co_flush) {
@@ -2356,12 +2375,14 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
     ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
 out:
     /* Notify any pending flushes that we have completed */
-    bs->flushed_gen = current_gen;
-    bs->active_flush_req = NULL;
+    if (ret == 0) {
+        bs->flushed_gen = current_gen;
+    }
+    bs->active_flush_req = false;
     /* Return value is ignored - it's ok if wait queue is empty */
     qemu_co_queue_next(&bs->flush_queue);
 
-    tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
     return ret;
 }
 
@@ -2377,13 +2398,9 @@ int bdrv_flush(BlockDriverState *bs)
         /* Fast-path if already in coroutine context */
         bdrv_flush_co_entry(&flush_co);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
         qemu_coroutine_enter(co);
-        while (flush_co.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, flush_co.ret == NOT_DONE);
     }
 
     return flush_co.ret;
@@ -2407,7 +2424,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
 {
     BdrvTrackedRequest req;
     int max_pdiscard, ret;
-    int head, align;
+    int head, tail, align;
 
     if (!bs->drv) {
         return -ENOMEDIUM;
@@ -2430,20 +2447,17 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
         return 0;
     }
 
-    /* Discard is advisory, so ignore any unaligned head or tail */
+    /* Discard is advisory, but some devices track and coalesce
+     * unaligned requests, so we must pass everything down rather than
+     * round here.  Still, most devices will just silently ignore
+     * unaligned requests (by returning -ENOTSUP), so we must fragment
+     * the request accordingly.  */
     align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
     assert(align % bs->bl.request_alignment == 0);
     head = offset % align;
-    if (head) {
-        head = MIN(count, align - head);
-        count -= head;
-        offset += head;
-    }
-    count = QEMU_ALIGN_DOWN(count, align);
-    if (!count) {
-        return 0;
-    }
+    tail = (offset + count) % align;
 
+    bdrv_inc_in_flight(bs);
     tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
@@ -2453,11 +2467,34 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
 
     max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
                                    align);
-    assert(max_pdiscard);
+    assert(max_pdiscard >= bs->bl.request_alignment);
 
     while (count > 0) {
         int ret;
-        int num = MIN(count, max_pdiscard);
+        int num = count;
+
+        if (head) {
+            /* Make small requests to get to alignment boundaries. */
+            num = MIN(count, align - head);
+            if (!QEMU_IS_ALIGNED(num, bs->bl.request_alignment)) {
+                num %= bs->bl.request_alignment;
+            }
+            head = (head + num) % align;
+            assert(num < max_pdiscard);
+        } else if (tail) {
+            if (num > align) {
+                /* Shorten the request to the last aligned cluster.  */
+                num -= tail;
+            } else if (!QEMU_IS_ALIGNED(tail, bs->bl.request_alignment) &&
+                       tail > bs->bl.request_alignment) {
+                tail %= bs->bl.request_alignment;
+                num -= tail;
+            }
+        }
+        /* limit request size */
+        if (num > max_pdiscard) {
+            num = max_pdiscard;
+        }
 
         if (bs->drv->bdrv_co_pdiscard) {
             ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
@@ -2490,6 +2527,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
     bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                    req.bytes >> BDRV_SECTOR_BITS);
     tracked_request_end(&req);
+    bdrv_dec_in_flight(bs);
     return ret;
 }
 
@@ -2507,106 +2545,41 @@ int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
         /* Fast-path if already in coroutine context */
         bdrv_pdiscard_co_entry(&rwco);
     } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
         co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
         qemu_coroutine_enter(co);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
+        BDRV_POLL_WHILE(bs, rwco.ret == NOT_DONE);
     }
 
     return rwco.ret;
 }
 
-static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
 {
     BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest tracked_req;
     CoroutineIOCompletion co = {
         .coroutine = qemu_coroutine_self(),
     };
     BlockAIOCB *acb;
 
-    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
-    if (!drv || !drv->bdrv_aio_ioctl) {
+    bdrv_inc_in_flight(bs);
+    if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
         co.ret = -ENOTSUP;
         goto out;
     }
 
-    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
-    if (!acb) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-    qemu_coroutine_yield();
-out:
-    tracked_request_end(&tracked_req);
-    return co.ret;
-}
-
-typedef struct {
-    BlockDriverState *bs;
-    int req;
-    void *buf;
-    int ret;
-} BdrvIoctlCoData;
-
-static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
-{
-    BdrvIoctlCoData *data = opaque;
-    data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
-}
-
-/* needed for generic scsi interface */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    BdrvIoctlCoData data = {
-        .bs = bs,
-        .req = req,
-        .buf = buf,
-        .ret = -EINPROGRESS,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_co_ioctl_entry(&data);
+    if (drv->bdrv_co_ioctl) {
+        co.ret = drv->bdrv_co_ioctl(bs, req, buf);
     } else {
-        Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
-
-        qemu_coroutine_enter(co);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(bdrv_get_aio_context(bs), true);
+        acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+        if (!acb) {
+            co.ret = -ENOTSUP;
+            goto out;
         }
+        qemu_coroutine_yield();
     }
-    return data.ret;
-}
-
-static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
-                                      acb->req.req, acb->req.buf);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
-                                            bs, cb, opaque);
-    Coroutine *co;
-
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.req = req;
-    acb->req.buf = buf;
-    co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
-    qemu_coroutine_enter(co);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
+out:
+    bdrv_dec_in_flight(bs);
+    return co.ret;
 }
 
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
diff --git a/block/iscsi.c b/block/iscsi.c
index 95ce9e139e..0960929d57 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -36,7 +36,7 @@
 #include "block/block_int.h"
 #include "block/scsi.h"
 #include "qemu/iov.h"
-#include "sysemu/sysemu.h"
+#include "qemu/uuid.h"
 #include "qmp-commands.h"
 #include "qapi/qmp/qstring.h"
 #include "crypto/secret.h"
@@ -95,7 +95,6 @@ typedef struct IscsiTask {
     int do_retry;
     struct scsi_task *task;
     Coroutine *co;
-    QEMUBH *bh;
     IscsiLun *iscsilun;
     QEMUTimer retry_timer;
     int err_code;
@@ -167,7 +166,6 @@ static void iscsi_co_generic_bh_cb(void *opaque)
 {
     struct IscsiTask *iTask = opaque;
     iTask->complete = 1;
-    qemu_bh_delete(iTask->bh);
     qemu_coroutine_enter(iTask->co);
 }
 
@@ -204,6 +202,10 @@ static inline unsigned exp_random(double mean)
 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
 #endif
 
+#ifndef LIBISCSI_API_VERSION
+#define LIBISCSI_API_VERSION 20130701
+#endif
+
 static int iscsi_translate_sense(struct scsi_sense *sense)
 {
     int ret;
@@ -299,9 +301,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
 
 out:
     if (iTask->co) {
-        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
-                               iscsi_co_generic_bh_cb, iTask);
-        qemu_bh_schedule(iTask->bh);
+        aio_bh_schedule_oneshot(iTask->iscsilun->aio_context,
+                                 iscsi_co_generic_bh_cb, iTask);
     } else {
         iTask->complete = 1;
     }
@@ -595,6 +596,20 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (iscsilun->use_16_for_rw) {
+#if LIBISCSI_API_VERSION >= (20160603)
+        iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+                                            NULL, num_sectors * iscsilun->block_size,
+                                            iscsilun->block_size, 0, 0, fua, 0, 0,
+                                            iscsi_co_generic_cb, &iTask,
+                                            (struct scsi_iovec *)iov->iov, iov->niov);
+    } else {
+        iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+                                            NULL, num_sectors * iscsilun->block_size,
+                                            iscsilun->block_size, 0, 0, fua, 0, 0,
+                                            iscsi_co_generic_cb, &iTask,
+                                            (struct scsi_iovec *)iov->iov, iov->niov);
+    }
+#else
         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                         NULL, num_sectors * iscsilun->block_size,
                                         iscsilun->block_size, 0, 0, fua, 0, 0,
@@ -605,11 +620,14 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                                         iscsilun->block_size, 0, 0, fua, 0, 0,
                                         iscsi_co_generic_cb, &iTask);
     }
+#endif
     if (iTask.task == NULL) {
         return -ENOMEM;
     }
+#if LIBISCSI_API_VERSION < (20160603)
     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
                           iov->niov);
+#endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
         qemu_coroutine_yield();
@@ -792,6 +810,21 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (iscsilun->use_16_for_rw) {
+#if LIBISCSI_API_VERSION >= (20160603)
+        iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+                                           num_sectors * iscsilun->block_size,
+                                           iscsilun->block_size, 0, 0, 0, 0, 0,
+                                           iscsi_co_generic_cb, &iTask,
+                                           (struct scsi_iovec *)iov->iov, iov->niov);
+    } else {
+        iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
+                                           num_sectors * iscsilun->block_size,
+                                           iscsilun->block_size,
+                                           0, 0, 0, 0, 0,
+                                           iscsi_co_generic_cb, &iTask,
+                                           (struct scsi_iovec *)iov->iov, iov->niov);
+    }
+#else
         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                        num_sectors * iscsilun->block_size,
                                        iscsilun->block_size, 0, 0, 0, 0, 0,
@@ -803,11 +836,13 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
                                        0, 0, 0, 0, 0,
                                        iscsi_co_generic_cb, &iTask);
     }
+#endif
     if (iTask.task == NULL) {
         return -ENOMEM;
     }
+#if LIBISCSI_API_VERSION < (20160603)
     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
-
+#endif
     while (!iTask.complete) {
         iscsi_set_events(iscsilun);
         qemu_coroutine_yield();
@@ -1048,7 +1083,9 @@ coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
     struct IscsiTask iTask;
     struct unmap_list list;
 
-    assert(is_byte_request_lun_aligned(offset, count, iscsilun));
+    if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+        return -ENOTSUP;
+    }
 
     if (!iscsilun->lbp.lbpu) {
         /* UNMAP is not supported by the target */
@@ -1609,7 +1646,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -ENOMEM;
         goto out;
     }
-
+#if LIBISCSI_API_VERSION >= (20160603)
+    if (iscsi_init_transport(iscsi, iscsi_url->transport)) {
+        error_setg(errp, ("Error initializing transport."));
+        ret = -EINVAL;
+        goto out;
+    }
+#endif
     if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
         error_setg(errp, "iSCSI: Failed to set target name.");
         ret = -EINVAL;
@@ -1652,7 +1695,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
 
     /* timeout handling is broken in libiscsi before 1.15.0 */
     timeout = parse_timeout(iscsi_url->target);
-#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
+#if LIBISCSI_API_VERSION >= 20150621
     iscsi_set_timeout(iscsi, timeout);
 #else
     if (timeout) {
@@ -1813,19 +1856,22 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
 
     IscsiLun *iscsilun = bs->opaque;
     uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+    unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
+
+    assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bs->sg);
 
-    bs->bl.request_alignment = iscsilun->block_size;
+    bs->bl.request_alignment = block_size;
 
     if (iscsilun->bl.max_xfer_len) {
         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
     }
 
-    if (max_xfer_len * iscsilun->block_size < INT_MAX) {
+    if (max_xfer_len * block_size < INT_MAX) {
         bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
     }
 
     if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
+        if (iscsilun->bl.max_unmap < 0xffffffff / block_size) {
             bs->bl.max_pdiscard =
                 iscsilun->bl.max_unmap * iscsilun->block_size;
         }
@@ -1835,7 +1881,7 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
         bs->bl.pdiscard_alignment = iscsilun->block_size;
     }
 
-    if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
+    if (iscsilun->bl.max_ws_len < 0xffffffff / block_size) {
         bs->bl.max_pwrite_zeroes =
             iscsilun->bl.max_ws_len * iscsilun->block_size;
     }
@@ -1846,7 +1892,7 @@ static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
         bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
     }
     if (iscsilun->bl.opt_xfer_len &&
-        iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
+        iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
         bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
                                         iscsilun->block_size);
     }
@@ -2010,45 +2056,48 @@ static BlockDriver bdrv_iscsi = {
     .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
 
-static QemuOptsList qemu_iscsi_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
-    .desc = {
-        {
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "username for CHAP authentication to target",
-        },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-            .help = "password for CHAP authentication to target",
-        },{
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret providing password for CHAP "
-                    "authentication to target",
-        },{
-            .name = "header-digest",
-            .type = QEMU_OPT_STRING,
-            .help = "HeaderDigest setting. "
-                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
-        },{
-            .name = "initiator-name",
-            .type = QEMU_OPT_STRING,
-            .help = "Initiator iqn name to use when connecting",
-        },{
-            .name = "timeout",
-            .type = QEMU_OPT_NUMBER,
-            .help = "Request timeout in seconds (default 0 = no timeout)",
-        },
-        { /* end of list */ }
-    },
+#if LIBISCSI_API_VERSION >= (20160603)
+static BlockDriver bdrv_iser = {
+    .format_name     = "iser",
+    .protocol_name   = "iser",
+
+    .instance_size   = sizeof(IscsiLun),
+    .bdrv_needs_filename = true,
+    .bdrv_file_open  = iscsi_open,
+    .bdrv_close      = iscsi_close,
+    .bdrv_create     = iscsi_create,
+    .create_opts     = &iscsi_create_opts,
+    .bdrv_reopen_prepare   = iscsi_reopen_prepare,
+    .bdrv_reopen_commit    = iscsi_reopen_commit,
+    .bdrv_invalidate_cache = iscsi_invalidate_cache,
+
+    .bdrv_getlength  = iscsi_getlength,
+    .bdrv_get_info   = iscsi_get_info,
+    .bdrv_truncate   = iscsi_truncate,
+    .bdrv_refresh_limits = iscsi_refresh_limits,
+
+    .bdrv_co_get_block_status = iscsi_co_get_block_status,
+    .bdrv_co_pdiscard      = iscsi_co_pdiscard,
+    .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
+    .bdrv_co_readv         = iscsi_co_readv,
+    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
+    .bdrv_co_flush_to_disk = iscsi_co_flush,
+
+#ifdef __linux__
+    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
+#endif
+
+    .bdrv_detach_aio_context = iscsi_detach_aio_context,
+    .bdrv_attach_aio_context = iscsi_attach_aio_context,
 };
+#endif
 
 static void iscsi_block_init(void)
 {
     bdrv_register(&bdrv_iscsi);
-    qemu_add_opts(&qemu_iscsi_opts);
+#if LIBISCSI_API_VERSION >= (20160603)
+    bdrv_register(&bdrv_iser);
+#endif
 }
 
 block_init(iscsi_block_init);
diff --git a/block/linux-aio.c b/block/linux-aio.c
index e906abebb3..1685ec29a3 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -59,7 +59,6 @@ struct LinuxAioState {
 
     /* I/O completion processing */
     QEMUBH *completion_bh;
-    struct io_event events[MAX_EVENTS];
     int event_idx;
     int event_max;
 };
@@ -95,64 +94,156 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
 
     laiocb->ret = ret;
     if (laiocb->co) {
-        qemu_coroutine_enter(laiocb->co);
+        /* If the coroutine is already entered it must be in ioq_submit() and
+         * will notice laio->ret has been filled in when it eventually runs
+         * later.  Coroutines cannot be entered recursively so avoid doing
+         * that!
+         */
+        if (!qemu_coroutine_entered(laiocb->co)) {
+            qemu_coroutine_enter(laiocb->co);
+        }
     } else {
         laiocb->common.cb(laiocb->common.opaque, ret);
         qemu_aio_unref(laiocb);
     }
 }
 
-/* The completion BH fetches completed I/O requests and invokes their
- * callbacks.
+/**
+ * aio_ring buffer which is shared between userspace and kernel.
  *
- * The function is somewhat tricky because it supports nested event loops, for
- * example when a request callback invokes aio_poll().  In order to do this,
- * the completion events array and index are kept in LinuxAioState.  The BH
- * reschedules itself as long as there are completions pending so it will
- * either be called again in a nested event loop or will be called after all
- * events have been completed.  When there are no events left to complete, the
- * BH returns without rescheduling.
+ * This copied from linux/fs/aio.c, common header does not exist
+ * but AIO exists for ages so we assume ABI is stable.
  */
-static void qemu_laio_completion_bh(void *opaque)
+struct aio_ring {
+    unsigned    id;    /* kernel internal index number */
+    unsigned    nr;    /* number of io_events */
+    unsigned    head;  /* Written to by userland or by kernel. */
+    unsigned    tail;
+
+    unsigned    magic;
+    unsigned    compat_features;
+    unsigned    incompat_features;
+    unsigned    header_length;  /* size of aio_ring */
+
+    struct io_event io_events[0];
+};
+
+/**
+ * io_getevents_peek:
+ * @ctx: AIO context
+ * @events: pointer on events array, output value
+
+ * Returns the number of completed events and sets a pointer
+ * on events array.  This function does not update the internal
+ * ring buffer, only reads head and tail.  When @events has been
+ * processed io_getevents_commit() must be called.
+ */
+static inline unsigned int io_getevents_peek(io_context_t ctx,
+                                             struct io_event **events)
 {
-    LinuxAioState *s = opaque;
+    struct aio_ring *ring = (struct aio_ring *)ctx;
+    unsigned int head = ring->head, tail = ring->tail;
+    unsigned int nr;
 
-    /* Fetch more completion events when empty */
-    if (s->event_idx == s->event_max) {
-        do {
-            struct timespec ts = { 0 };
-            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
-                                        s->events, &ts);
-        } while (s->event_max == -EINTR);
-
-        s->event_idx = 0;
-        if (s->event_max <= 0) {
-            s->event_max = 0;
-            return; /* no more events */
-        }
-        s->io_q.in_flight -= s->event_max;
+    nr = tail >= head ? tail - head : ring->nr - head;
+    *events = ring->io_events + head;
+    /* To avoid speculative loads of s->events[i] before observing tail.
+       Paired with smp_wmb() inside linux/fs/aio.c: aio_complete(). */
+    smp_rmb();
+
+    return nr;
+}
+
+/**
+ * io_getevents_commit:
+ * @ctx: AIO context
+ * @nr: the number of events on which head should be advanced
+ *
+ * Advances head of a ring buffer.
+ */
+static inline void io_getevents_commit(io_context_t ctx, unsigned int nr)
+{
+    struct aio_ring *ring = (struct aio_ring *)ctx;
+
+    if (nr) {
+        ring->head = (ring->head + nr) % ring->nr;
     }
+}
+
+/**
+ * io_getevents_advance_and_peek:
+ * @ctx: AIO context
+ * @events: pointer on events array, output value
+ * @nr: the number of events on which head should be advanced
+ *
+ * Advances head of a ring buffer and returns number of elements left.
+ */
+static inline unsigned int
+io_getevents_advance_and_peek(io_context_t ctx,
+                              struct io_event **events,
+                              unsigned int nr)
+{
+    io_getevents_commit(ctx, nr);
+    return io_getevents_peek(ctx, events);
+}
+
+/**
+ * qemu_laio_process_completions:
+ * @s: AIO state
+ *
+ * Fetches completed I/O requests and invokes their callbacks.
+ *
+ * The function is somewhat tricky because it supports nested event loops, for
+ * example when a request callback invokes aio_poll().  In order to do this,
+ * indices are kept in LinuxAioState.  Function schedules BH completion so it
+ * can be called again in a nested event loop.  When there are no events left
+ * to complete the BH is being canceled.
+ */
+static void qemu_laio_process_completions(LinuxAioState *s)
+{
+    struct io_event *events;
 
     /* Reschedule so nested event loops see currently pending completions */
     qemu_bh_schedule(s->completion_bh);
 
-    /* Process completion events */
-    while (s->event_idx < s->event_max) {
-        struct iocb *iocb = s->events[s->event_idx].obj;
-        struct qemu_laiocb *laiocb =
+    while ((s->event_max = io_getevents_advance_and_peek(s->ctx, &events,
+                                                         s->event_idx))) {
+        for (s->event_idx = 0; s->event_idx < s->event_max; ) {
+            struct iocb *iocb = events[s->event_idx].obj;
+            struct qemu_laiocb *laiocb =
                 container_of(iocb, struct qemu_laiocb, iocb);
 
-        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
-        s->event_idx++;
+            laiocb->ret = io_event_ret(&events[s->event_idx]);
 
-        qemu_laio_process_completion(laiocb);
+            /* Change counters one-by-one because we can be nested. */
+            s->io_q.in_flight--;
+            s->event_idx++;
+            qemu_laio_process_completion(laiocb);
+        }
     }
 
+    qemu_bh_cancel(s->completion_bh);
+
+    /* If we are nested we have to notify the level above that we are done
+     * by setting event_max to zero, upper level will then jump out of it's
+     * own `for` loop.  If we are the last all counters droped to zero. */
+    s->event_max = 0;
+    s->event_idx = 0;
+}
+
+static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
+{
+    qemu_laio_process_completions(s);
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
     }
+}
 
-    qemu_bh_cancel(s->completion_bh);
+static void qemu_laio_completion_bh(void *opaque)
+{
+    LinuxAioState *s = opaque;
+
+    qemu_laio_process_completions_and_submit(s);
 }
 
 static void qemu_laio_completion_cb(EventNotifier *e)
@@ -160,7 +251,7 @@ static void qemu_laio_completion_cb(EventNotifier *e)
     LinuxAioState *s = container_of(e, LinuxAioState, e);
 
     if (event_notifier_test_and_clear(&s->e)) {
-        qemu_laio_completion_bh(s);
+        qemu_laio_process_completions_and_submit(s);
     }
 }
 
@@ -236,6 +327,19 @@ static void ioq_submit(LinuxAioState *s)
         QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
     } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
     s->io_q.blocked = (s->io_q.in_queue > 0);
+
+    if (s->io_q.in_flight) {
+        /* We can try to complete something just right away if there are
+         * still requests in-flight. */
+        qemu_laio_process_completions(s);
+        /*
+         * Even we have completed everything (in_flight == 0), the queue can
+         * have still pended requests (in_queue > 0).  We do not attempt to
+         * repeat submission to avoid IO hang.  The reason is simple: s->e is
+         * still set and completion callback will be called shortly and all
+         * pended requests will be submitted from there.
+         */
+    }
 }
 
 void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
@@ -293,6 +397,7 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         .co         = qemu_coroutine_self(),
         .nbytes     = qiov->size,
         .ctx        = s,
+        .ret        = -EINPROGRESS,
         .is_read    = (type == QEMU_AIO_READ),
         .qiov       = qiov,
     };
@@ -302,7 +407,9 @@ int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
         return ret;
     }
 
-    qemu_coroutine_yield();
+    if (laiocb.ret == -EINPROGRESS) {
+        qemu_coroutine_yield();
+    }
     return laiocb.ret;
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index e0b3f4180f..301ba9219a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -13,7 +13,7 @@
 
 #include "qemu/osdep.h"
 #include "trace.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/error.h"
@@ -55,7 +55,7 @@ typedef struct MirrorBlockJob {
     int64_t bdev_length;
     unsigned long *cow_bitmap;
     BdrvDirtyBitmap *dirty_bitmap;
-    HBitmapIter hbi;
+    BdrvDirtyBitmapIter *dbi;
     uint8_t *buf;
     QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
     int buf_free_count;
@@ -330,10 +330,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
                              MAX_IO_SECTORS);
 
-    sector_num = hbitmap_iter_next(&s->hbi);
+    sector_num = bdrv_dirty_iter_next(s->dbi);
     if (sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-        sector_num = hbitmap_iter_next(&s->hbi);
+        bdrv_set_dirty_iter(s->dbi, 0);
+        sector_num = bdrv_dirty_iter_next(s->dbi);
         trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
         assert(sector_num >= 0);
     }
@@ -349,7 +349,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
     /* Find the number of consective dirty chunks following the first dirty
      * one, and wait for in flight requests in them. */
     while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
-        int64_t hbitmap_next;
+        int64_t next_dirty;
         int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
         int64_t next_chunk = next_sector / sectors_per_chunk;
         if (next_sector >= end ||
@@ -360,13 +360,13 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
             break;
         }
 
-        hbitmap_next = hbitmap_iter_next(&s->hbi);
-        if (hbitmap_next > next_sector || hbitmap_next < 0) {
+        next_dirty = bdrv_dirty_iter_next(s->dbi);
+        if (next_dirty > next_sector || next_dirty < 0) {
             /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(&s->hbi, next_sector);
-            hbitmap_next = hbitmap_iter_next(&s->hbi);
+            bdrv_set_dirty_iter(s->dbi, next_sector);
+            next_dirty = bdrv_dirty_iter_next(s->dbi);
         }
-        assert(hbitmap_next == next_sector);
+        assert(next_dirty == next_sector);
         nb_chunks++;
     }
 
@@ -469,7 +469,11 @@ static void mirror_free_init(MirrorBlockJob *s)
     }
 }
 
-static void mirror_drain(MirrorBlockJob *s)
+/* This is also used for the .pause callback. There is no matching
+ * mirror_resume() because mirror_run() will begin iterating again
+ * when the job is resumed.
+ */
+static void mirror_wait_for_all_io(MirrorBlockJob *s)
 {
     while (s->in_flight > 0) {
         mirror_wait_for_io(s);
@@ -526,8 +530,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
         aio_context_release(replace_aio_context);
     }
     g_free(s->replaces);
-    bdrv_op_unblock_all(target_bs, s->common.blocker);
     blk_unref(s->target);
+    s->target = NULL;
     block_job_completed(&s->common, data->ret);
     g_free(data);
     bdrv_drained_end(src);
@@ -582,7 +586,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
             sector_num += nb_sectors;
         }
 
-        mirror_drain(s);
+        mirror_wait_for_all_io(s);
     }
 
     /* First part, loop on the sectors and initialize the dirty bitmap.  */
@@ -611,12 +615,27 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
     return 0;
 }
 
+/* Called when going out of the streaming phase to flush the bulk of the
+ * data to the medium, or just before completing.
+ */
+static int mirror_flush(MirrorBlockJob *s)
+{
+    int ret = blk_flush(s->target);
+    if (ret < 0) {
+        if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
+            s->ret = ret;
+        }
+    }
+    return ret;
+}
+
 static void coroutine_fn mirror_run(void *opaque)
 {
     MirrorBlockJob *s = opaque;
     MirrorExitData *data;
     BlockDriverState *bs = blk_bs(s->common.blk);
     BlockDriverState *target_bs = blk_bs(s->target);
+    bool need_drain = true;
     int64_t length;
     BlockDriverInfo bdi;
     char backing_filename[2]; /* we only need 2 characters because we are only
@@ -679,7 +698,8 @@ static void coroutine_fn mirror_run(void *opaque)
         }
     }
 
-    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+    assert(!s->dbi);
+    s->dbi = bdrv_dirty_iter_new(s->dirty_bitmap, 0);
     for (;;) {
         uint64_t delay_ns = 0;
         int64_t cnt, delta;
@@ -721,27 +741,23 @@ static void coroutine_fn mirror_run(void *opaque)
         should_complete = false;
         if (s->in_flight == 0 && cnt == 0) {
             trace_mirror_before_flush(s);
-            ret = blk_flush(s->target);
-            if (ret < 0) {
-                if (mirror_error_action(s, false, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    goto immediate_exit;
+            if (!s->synced) {
+                if (mirror_flush(s) < 0) {
+                    /* Go check s->ret.  */
+                    continue;
                 }
-            } else {
                 /* We're out of the streaming phase.  From now on, if the job
                  * is cancelled we will actually complete all pending I/O and
                  * report completion.  This way, block-job-cancel will leave
                  * the target in a consistent state.
                  */
-                if (!s->synced) {
-                    block_job_event_ready(&s->common);
-                    s->synced = true;
-                }
-
-                should_complete = s->should_complete ||
-                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+                block_job_event_ready(&s->common);
+                s->synced = true;
             }
+
+            should_complete = s->should_complete ||
+                block_job_is_cancelled(&s->common);
+            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         }
 
         if (cnt == 0 && should_complete) {
@@ -751,11 +767,26 @@ static void coroutine_fn mirror_run(void *opaque)
              * source has dirty data to copy!
              *
              * Note that I/O can be submitted by the guest while
-             * mirror_populate runs.
+             * mirror_populate runs, so pause it now.  Before deciding
+             * whether to switch to target check one last time if I/O has
+             * come in the meanwhile, and if not flush the data to disk.
              */
             trace_mirror_before_drain(s, cnt);
-            bdrv_co_drain(bs);
+
+            bdrv_drained_begin(bs);
             cnt = bdrv_get_dirty_count(s->dirty_bitmap);
+            if (cnt > 0 || mirror_flush(s) < 0) {
+                bdrv_drained_end(bs);
+                continue;
+            }
+
+            /* The two disks are in sync.  Exit and report successful
+             * completion.
+             */
+            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.cancelled = false;
+            need_drain = false;
+            break;
         }
 
         ret = 0;
@@ -768,13 +799,6 @@ static void coroutine_fn mirror_run(void *opaque)
         } else if (!should_complete) {
             delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
             block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        } else if (cnt == 0) {
-            /* The two disks are in sync.  Exit and report successful
-             * completion.
-             */
-            assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.cancelled = false;
-            break;
         }
         s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     }
@@ -786,20 +810,23 @@ static void coroutine_fn mirror_run(void *opaque)
          * the target is a copy of the source.
          */
         assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
-        mirror_drain(s);
+        assert(need_drain);
+        mirror_wait_for_all_io(s);
     }
 
     assert(s->in_flight == 0);
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
+    bdrv_dirty_iter_free(s->dbi);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
 
     data = g_malloc(sizeof(*data));
     data->ret = ret;
-    /* Before we switch to target in mirror_exit, make sure data doesn't
-     * change. */
-    bdrv_drained_begin(bs);
+
+    if (need_drain) {
+        bdrv_drained_begin(bs);
+    }
     block_job_defer_to_main_loop(&s->common, mirror_exit, data);
 }
 
@@ -870,14 +897,11 @@ static void mirror_complete(BlockJob *job, Error **errp)
     block_job_enter(&s->common);
 }
 
-/* There is no matching mirror_resume() because mirror_run() will begin
- * iterating again when the job is resumed.
- */
-static void coroutine_fn mirror_pause(BlockJob *job)
+static void mirror_pause(BlockJob *job)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 
-    mirror_drain(s);
+    mirror_wait_for_all_io(s);
 }
 
 static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
@@ -887,28 +911,47 @@ static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
     blk_set_aio_context(s->target, new_context);
 }
 
+static void mirror_drain(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    /* Need to keep a reference in case blk_drain triggers execution
+     * of mirror_complete...
+     */
+    if (s->target) {
+        BlockBackend *target = s->target;
+        blk_ref(target);
+        blk_drain(target);
+        blk_unref(target);
+    }
+}
+
 static const BlockJobDriver mirror_job_driver = {
     .instance_size          = sizeof(MirrorBlockJob),
     .job_type               = BLOCK_JOB_TYPE_MIRROR,
     .set_speed              = mirror_set_speed,
+    .start                  = mirror_run,
     .complete               = mirror_complete,
     .pause                  = mirror_pause,
     .attached_aio_context   = mirror_attached_aio_context,
+    .drain                  = mirror_drain,
 };
 
 static const BlockJobDriver commit_active_job_driver = {
     .instance_size          = sizeof(MirrorBlockJob),
     .job_type               = BLOCK_JOB_TYPE_COMMIT,
     .set_speed              = mirror_set_speed,
+    .start                  = mirror_run,
     .complete               = mirror_complete,
     .pause                  = mirror_pause,
     .attached_aio_context   = mirror_attached_aio_context,
+    .drain                  = mirror_drain,
 };
 
 static void mirror_start_job(const char *job_id, BlockDriverState *bs,
-                             BlockDriverState *target, const char *replaces,
-                             int64_t speed, uint32_t granularity,
-                             int64_t buf_size,
+                             int creation_flags, BlockDriverState *target,
+                             const char *replaces, int64_t speed,
+                             uint32_t granularity, int64_t buf_size,
                              BlockMirrorBackingMode backing_mode,
                              BlockdevOnError on_source_error,
                              BlockdevOnError on_target_error,
@@ -916,7 +959,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
                              BlockCompletionFunc *cb,
                              void *opaque, Error **errp,
                              const BlockJobDriver *driver,
-                             bool is_none_mode, BlockDriverState *base)
+                             bool is_none_mode, BlockDriverState *base,
+                             bool auto_complete)
 {
     MirrorBlockJob *s;
 
@@ -935,7 +979,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         buf_size = DEFAULT_MIRROR_BUF_SIZE;
     }
 
-    s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
+    s = block_job_create(job_id, driver, bs, speed, creation_flags,
+                         cb, opaque, errp);
     if (!s) {
         return;
     }
@@ -952,6 +997,9 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
     s->granularity = granularity;
     s->buf_size = ROUND_UP(buf_size, granularity);
     s->unmap = unmap;
+    if (auto_complete) {
+        s->should_complete = true;
+    }
 
     s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
@@ -961,11 +1009,18 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
         return;
     }
 
-    bdrv_op_block_all(target, s->common.blocker);
+    block_job_add_bdrv(&s->common, target);
+    /* In commit_active_start() all intermediate nodes disappear, so
+     * any jobs in them must be blocked */
+    if (bdrv_chain_contains(bs, target)) {
+        BlockDriverState *iter;
+        for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) {
+            block_job_add_bdrv(&s->common, iter);
+        }
+    }
 
-    s->common.co = qemu_coroutine_create(mirror_run, s);
-    trace_mirror_start(bs, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_mirror_start(bs, s, opaque);
+    block_job_start(&s->common);
 }
 
 void mirror_start(const char *job_id, BlockDriverState *bs,
@@ -974,9 +1029,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp)
+                  bool unmap, Error **errp)
 {
     bool is_none_mode;
     BlockDriverState *base;
@@ -987,17 +1040,17 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
     }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(job_id, bs, target, replaces,
+    mirror_start_job(job_id, bs, BLOCK_JOB_DEFAULT, target, replaces,
                      speed, granularity, buf_size, backing_mode,
-                     on_source_error, on_target_error, unmap, cb, opaque, errp,
-                     &mirror_job_driver, is_none_mode, base);
+                     on_source_error, on_target_error, unmap, NULL, NULL, errp,
+                     &mirror_job_driver, is_none_mode, base, false);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
-                         BlockDriverState *base, int64_t speed,
-                         BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
-                         void *opaque, Error **errp)
+                         BlockDriverState *base, int creation_flags,
+                         int64_t speed, BlockdevOnError on_error,
+                         BlockCompletionFunc *cb, void *opaque, Error **errp,
+                         bool auto_complete)
 {
     int64_t length, base_length;
     int orig_base_flags;
@@ -1035,10 +1088,10 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
         }
     }
 
-    mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+    mirror_start_job(job_id, bs, creation_flags, base, NULL, speed, 0, 0,
                      MIRROR_LEAVE_BACKING_CHAIN,
-                     on_error, on_error, false, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base);
+                     on_error, on_error, true, cb, opaque, &local_err,
+                     &commit_active_job_driver, false, base, auto_complete);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error_restore_flags;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 2cf3237ef3..3779c6c999 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -1,6 +1,7 @@
 /*
  * QEMU Block driver for  NBD
  *
+ * Copyright (C) 2016 Red Hat, Inc.
  * Copyright (C) 2008 Bull S.A.S.
  *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
  *
@@ -32,7 +33,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
+static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
 {
     int i;
 
@@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
 
 static void nbd_teardown_connection(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
 
     if (!client->ioc) { /* Already closed */
         return;
@@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 static void nbd_reply_ready(void *opaque)
 {
     BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
     uint64_t i;
     int ret;
 
@@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
 }
 
 static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
+                               NBDRequest *request,
                                QEMUIOVector *qiov)
 {
-    NbdClientSession *s = nbd_get_client_session(bs);
+    NBDClientSession *s = nbd_get_client_session(bs);
     AioContext *aio_context;
     int rc, ret, i;
 
@@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
     return rc;
 }
 
-static void nbd_co_receive_reply(NbdClientSession *s,
-                                 struct nbd_request *request,
-                                 struct nbd_reply *reply,
+static void nbd_co_receive_reply(NBDClientSession *s,
+                                 NBDRequest *request,
+                                 NBDReply *reply,
                                  QEMUIOVector *qiov)
 {
     int ret;
@@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
     }
 }
 
-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
+static void nbd_coroutine_start(NBDClientSession *s,
+                                NBDRequest *request)
 {
     /* Poor man semaphore.  The free_sema is locked when no other request
      * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
+    if (s->in_flight == MAX_NBD_REQUESTS) {
+        qemu_co_queue_wait(&s->free_sema);
         assert(s->in_flight < MAX_NBD_REQUESTS);
     }
     s->in_flight++;
@@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
     /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
 }
 
-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
+static void nbd_coroutine_end(NBDClientSession *s,
+                              NBDRequest *request)
 {
     int i = HANDLE_TO_INDEX(s, request->handle);
     s->recv_coroutine[i] = NULL;
     if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
+        qemu_co_queue_next(&s->free_sema);
     }
 }
 
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
         .type = NBD_CMD_READ,
         .from = offset,
         .len = bytes,
     };
-    struct nbd_reply reply;
+    NBDReply reply;
     ssize_t ret;
 
     assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                           uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
         .type = NBD_CMD_WRITE,
         .from = offset,
         .len = bytes,
     };
-    struct nbd_reply reply;
+    NBDReply reply;
     ssize_t ret;
 
     if (flags & BDRV_REQ_FUA) {
         assert(client->nbdflags & NBD_FLAG_SEND_FUA);
-        request.type |= NBD_CMD_FLAG_FUA;
+        request.flags |= NBD_CMD_FLAG_FUA;
     }
 
     assert(bytes <= NBD_MAX_BUFFER_SIZE);
@@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
     return -reply.error;
 }
 
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags)
+{
+    ssize_t ret;
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
+        .type = NBD_CMD_WRITE_ZEROES,
+        .from = offset,
+        .len = count,
+    };
+    NBDReply reply;
+
+    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
+        return -ENOTSUP;
+    }
+
+    if (flags & BDRV_REQ_FUA) {
+        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
+        request.flags |= NBD_CMD_FLAG_FUA;
+    }
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        request.flags |= NBD_CMD_FLAG_NO_HOLE;
+    }
+
+    nbd_coroutine_start(client, &request);
+    ret = nbd_co_send_request(bs, &request, NULL);
+    if (ret < 0) {
+        reply.error = -ret;
+    } else {
+        nbd_co_receive_reply(client, &request, &reply, NULL);
+    }
+    nbd_coroutine_end(client, &request);
+    return -reply.error;
+}
+
 int nbd_client_co_flush(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_FLUSH };
+    NBDReply reply;
     ssize_t ret;
 
     if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
 
 int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = {
         .type = NBD_CMD_TRIM,
         .from = offset,
         .len = count,
     };
-    struct nbd_reply reply;
+    NBDReply reply;
     ssize_t ret;
 
     if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
 
 void nbd_client_close(BlockDriverState *bs)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
+    NBDClientSession *client = nbd_get_client_session(bs);
+    NBDRequest request = { .type = NBD_CMD_DISC };
 
     if (client->ioc == NULL) {
         return;
@@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
                     const char *hostname,
                     Error **errp)
 {
-    NbdClientSession *client = nbd_get_client_session(bs);
+    NBDClientSession *client = nbd_get_client_session(bs);
     int ret;
 
     /* NBD handshake */
@@ -383,10 +415,14 @@ int nbd_client_init(BlockDriverState *bs,
     }
     if (client->nbdflags & NBD_FLAG_SEND_FUA) {
         bs->supported_write_flags = BDRV_REQ_FUA;
+        bs->supported_zero_flags |= BDRV_REQ_FUA;
+    }
+    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
+        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
     }
 
     qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
+    qemu_co_queue_init(&client->free_sema);
     client->sioc = sioc;
     object_ref(OBJECT(client->sioc));
 
diff --git a/block/nbd-client.h b/block/nbd-client.h
index 044aca4530..f8d6006849 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -17,24 +17,24 @@
 
 #define MAX_NBD_REQUESTS    16
 
-typedef struct NbdClientSession {
+typedef struct NBDClientSession {
     QIOChannelSocket *sioc; /* The master data channel */
     QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
     uint16_t nbdflags;
     off_t size;
 
     CoMutex send_mutex;
-    CoMutex free_sema;
+    CoQueue free_sema;
     Coroutine *send_coroutine;
     int in_flight;
 
     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
+    NBDReply reply;
 
     bool is_unix;
-} NbdClientSession;
+} NBDClientSession;
 
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
 
 int nbd_client_init(BlockDriverState *bs,
                     QIOChannelSocket *sock,
@@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int nbd_client_co_flush(BlockDriverState *bs);
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                           uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                int count, BdrvRequestFlags flags);
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags);
 
diff --git a/block/nbd.c b/block/nbd.c
index 6bc06d6198..35f24be069 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -32,6 +32,9 @@
 #include "qemu/uri.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi-visit.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qjson.h"
 #include "qapi/qmp/qint.h"
@@ -41,10 +44,11 @@
 #define EN_OPTSTR ":exportname="
 
 typedef struct BDRVNBDState {
-    NbdClientSession client;
+    NBDClientSession client;
 
     /* For nbd_refresh_filename() */
-    char *path, *host, *port, *export, *tlscredsid;
+    SocketAddress *saddr;
+    char *export, *tlscredsid;
 } BDRVNBDState;
 
 static int nbd_parse_uri(const char *filename, QDict *options)
@@ -90,9 +94,13 @@ static int nbd_parse_uri(const char *filename, QDict *options)
             ret = -EINVAL;
             goto out;
         }
-        qdict_put(options, "path", qstring_from_str(qp->p[0].value));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path",
+                  qstring_from_str(qp->p[0].value));
     } else {
         QString *host;
+        char *port_str;
+
         /* nbd[+tcp]://host[:port]/export */
         if (!uri->server) {
             ret = -EINVAL;
@@ -107,12 +115,12 @@ static int nbd_parse_uri(const char *filename, QDict *options)
             host = qstring_from_str(uri->server);
         }
 
-        qdict_put(options, "host", host);
-        if (uri->port) {
-            char* port_str = g_strdup_printf("%d", uri->port);
-            qdict_put(options, "port", qstring_from_str(port_str));
-            g_free(port_str);
-        }
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", host);
+
+        port_str = g_strdup_printf("%d", uri->port ?: NBD_DEFAULT_PORT);
+        qdict_put(options, "server.data.port", qstring_from_str(port_str));
+        g_free(port_str);
     }
 
 out:
@@ -123,6 +131,26 @@ static int nbd_parse_uri(const char *filename, QDict *options)
     return ret;
 }
 
+static bool nbd_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *e;
+
+    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+        if (!strcmp(e->key, "host") ||
+            !strcmp(e->key, "port") ||
+            !strcmp(e->key, "path") ||
+            !strcmp(e->key, "export") ||
+            strstart(e->key, "server.", NULL))
+        {
+            error_setg(errp, "Option '%s' cannot be used with a file name",
+                       e->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void nbd_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
@@ -131,12 +159,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
     const char *host_spec;
     const char *unixpath;
 
-    if (qdict_haskey(options, "host")
-        || qdict_haskey(options, "port")
-        || qdict_haskey(options, "path"))
-    {
-        error_setg(errp, "host/port/path and a file name may not be specified "
-                         "at the same time");
+    if (nbd_has_filename_options_conflict(options, errp)) {
         return;
     }
 
@@ -173,7 +196,8 @@ static void nbd_parse_filename(const char *filename, QDict *options,
 
     /* are we a UNIX or TCP socket? */
     if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "path", qstring_from_str(unixpath));
+        qdict_put(options, "server.type", qstring_from_str("unix"));
+        qdict_put(options, "server.data.path", qstring_from_str(unixpath));
     } else {
         InetSocketAddress *addr = NULL;
 
@@ -182,8 +206,9 @@ static void nbd_parse_filename(const char *filename, QDict *options,
             goto out;
         }
 
-        qdict_put(options, "host", qstring_from_str(addr->host));
-        qdict_put(options, "port", qstring_from_str(addr->port));
+        qdict_put(options, "server.type", qstring_from_str("inet"));
+        qdict_put(options, "server.data.host", qstring_from_str(addr->host));
+        qdict_put(options, "server.data.port", qstring_from_str(addr->port));
         qapi_free_InetSocketAddress(addr);
     }
 
@@ -191,51 +216,85 @@ static void nbd_parse_filename(const char *filename, QDict *options,
     g_free(file);
 }
 
-static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
+static bool nbd_process_legacy_socket_options(QDict *output_options,
+                                              QemuOpts *legacy_opts,
+                                              Error **errp)
 {
-    SocketAddress *saddr;
+    const char *path = qemu_opt_get(legacy_opts, "path");
+    const char *host = qemu_opt_get(legacy_opts, "host");
+    const char *port = qemu_opt_get(legacy_opts, "port");
+    const QDictEntry *e;
 
-    s->path = g_strdup(qemu_opt_get(opts, "path"));
-    s->host = g_strdup(qemu_opt_get(opts, "host"));
+    if (!path && !host && !port) {
+        return true;
+    }
 
-    if (!s->path == !s->host) {
-        if (s->path) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
+    for (e = qdict_first(output_options); e; e = qdict_next(output_options, e))
+    {
+        if (strstart(e->key, "server.", NULL)) {
+            error_setg(errp, "Cannot use 'server' and path/host/port at the "
+                       "same time");
+            return false;
         }
-        return NULL;
     }
 
-    saddr = g_new0(SocketAddress, 1);
+    if (path && host) {
+        error_setg(errp, "path and host may not be used at the same time");
+        return false;
+    } else if (path) {
+        if (port) {
+            error_setg(errp, "port may not be used without host");
+            return false;
+        }
 
-    if (s->path) {
-        UnixSocketAddress *q_unix;
-        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
-        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
-        q_unix->path = g_strdup(s->path);
-    } else {
-        InetSocketAddress *inet;
+        qdict_put(output_options, "server.type", qstring_from_str("unix"));
+        qdict_put(output_options, "server.data.path", qstring_from_str(path));
+    } else if (host) {
+        qdict_put(output_options, "server.type", qstring_from_str("inet"));
+        qdict_put(output_options, "server.data.host", qstring_from_str(host));
+        qdict_put(output_options, "server.data.port",
+                  qstring_from_str(port ?: stringify(NBD_DEFAULT_PORT)));
+    }
 
-        s->port = g_strdup(qemu_opt_get(opts, "port"));
+    return true;
+}
 
-        saddr->type = SOCKET_ADDRESS_KIND_INET;
-        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
-        inet->host = g_strdup(s->host);
-        inet->port = g_strdup(s->port);
-        if (!inet->port) {
-            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
-        }
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, Error **errp)
+{
+    SocketAddress *saddr = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_err = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "NBD server address missing");
+        goto done;
     }
 
-    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto done;
+    }
 
-    s->export = g_strdup(qemu_opt_get(opts, "export"));
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto done;
+    }
+
+    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
 
+done:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
     return saddr;
 }
 
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
     return &s->client;
@@ -248,6 +307,7 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
     Error *local_err = NULL;
 
     sioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
 
     qio_channel_socket_connect_sync(sioc,
                                     saddr,
@@ -332,7 +392,6 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts = NULL;
     Error *local_err = NULL;
     QIOChannelSocket *sioc = NULL;
-    SocketAddress *saddr = NULL;
     QCryptoTLSCreds *tlscreds = NULL;
     const char *hostname = NULL;
     int ret = -EINVAL;
@@ -344,12 +403,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto error;
     }
 
+    /* Translate @host, @port, and @path to a SocketAddress */
+    if (!nbd_process_legacy_socket_options(options, opts, errp)) {
+        goto error;
+    }
+
     /* Pop the config into our state object. Exit if invalid. */
-    saddr = nbd_config(s, opts, errp);
-    if (!saddr) {
+    s->saddr = nbd_config(s, options, errp);
+    if (!s->saddr) {
         goto error;
     }
 
+    s->export = g_strdup(qemu_opt_get(opts, "export"));
+
     s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
     if (s->tlscredsid) {
         tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
@@ -357,17 +423,17 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
             goto error;
         }
 
-        if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+        if (s->saddr->type != SOCKET_ADDRESS_KIND_INET) {
             error_setg(errp, "TLS only supported over IP sockets");
             goto error;
         }
-        hostname = saddr->u.inet.data->host;
+        hostname = s->saddr->u.inet.data->host;
     }
 
     /* establish TCP connection, return error if it fails
      * TODO: Configurable retry-until-timeout behaviour.
      */
-    sioc = nbd_establish_connection(saddr, errp);
+    sioc = nbd_establish_connection(s->saddr, errp);
     if (!sioc) {
         ret = -ECONNREFUSED;
         goto error;
@@ -384,13 +450,10 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         object_unref(OBJECT(tlscreds));
     }
     if (ret < 0) {
-        g_free(s->path);
-        g_free(s->host);
-        g_free(s->port);
+        qapi_free_SocketAddress(s->saddr);
         g_free(s->export);
         g_free(s->tlscredsid);
     }
-    qapi_free_SocketAddress(saddr);
     qemu_opts_del(opts);
     return ret;
 }
@@ -403,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+    bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
     bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
 }
 
@@ -412,9 +476,7 @@ static void nbd_close(BlockDriverState *bs)
 
     nbd_client_close(bs);
 
-    g_free(s->path);
-    g_free(s->host);
-    g_free(s->port);
+    qapi_free_SocketAddress(s->saddr);
     g_free(s->export);
     g_free(s->tlscredsid);
 }
@@ -441,45 +503,52 @@ static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
 {
     BDRVNBDState *s = bs->opaque;
     QDict *opts = qdict_new();
+    QObject *saddr_qdict;
+    Visitor *ov;
+    const char *host = NULL, *port = NULL, *path = NULL;
+
+    if (s->saddr->type == SOCKET_ADDRESS_KIND_INET) {
+        const InetSocketAddress *inet = s->saddr->u.inet.data;
+        if (!inet->has_ipv4 && !inet->has_ipv6 && !inet->has_to) {
+            host = inet->host;
+            port = inet->port;
+        }
+    } else if (s->saddr->type == SOCKET_ADDRESS_KIND_UNIX) {
+        path = s->saddr->u.q_unix.data->path;
+    }
 
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
+    qdict_put(opts, "driver", qstring_from_str("nbd"));
 
-    if (s->path && s->export) {
+    if (path && s->export) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", s->export, s->path);
-    } else if (s->path && !s->export) {
+                 "nbd+unix:///%s?socket=%s", s->export, path);
+    } else if (path && !s->export) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", s->path);
-    } else if (!s->path && s->export && s->port) {
+                 "nbd+unix://?socket=%s", path);
+    } else if (host && s->export) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", s->host, s->port, s->export);
-    } else if (!s->path && s->export && !s->port) {
+                 "nbd://%s:%s/%s", host, port, s->export);
+    } else if (host && !s->export) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", s->host, s->export);
-    } else if (!s->path && !s->export && s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", s->host, s->port);
-    } else if (!s->path && !s->export && !s->port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", s->host);
+                 "nbd://%s:%s", host, port);
     }
 
-    if (s->path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
-    } else if (s->port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
-    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
-    }
+    ov = qobject_output_visitor_new(&saddr_qdict);
+    visit_type_SocketAddress(ov, NULL, &s->saddr, &error_abort);
+    visit_complete(ov, &saddr_qdict);
+    visit_free(ov);
+    assert(qobject_type(saddr_qdict) == QTYPE_QDICT);
+
+    qdict_put_obj(opts, "server", saddr_qdict);
+
     if (s->export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
+        qdict_put(opts, "export", qstring_from_str(s->export));
     }
     if (s->tlscredsid) {
-        qdict_put_obj(opts, "tls-creds",
-                      QOBJECT(qstring_from_str(s->tlscredsid)));
+        qdict_put(opts, "tls-creds", qstring_from_str(s->tlscredsid));
     }
 
+    qdict_flatten(opts);
     bs->full_open_options = opts;
 }
 
@@ -491,6 +560,7 @@ static BlockDriver bdrv_nbd = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -509,6 +579,7 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
@@ -527,6 +598,7 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_file_open             = nbd_open,
     .bdrv_co_preadv             = nbd_client_co_preadv,
     .bdrv_co_pwritev            = nbd_client_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = nbd_client_co_pwrite_zeroes,
     .bdrv_close                 = nbd_close,
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
diff --git a/block/nfs.c b/block/nfs.c
index 8602a44211..a490660027 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -35,8 +35,15 @@
 #include "qemu/uri.h"
 #include "qemu/cutils.h"
 #include "sysemu/sysemu.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi-visit.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include <nfsc/libnfs.h>
 
+
 #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
 #define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
 #define QEMU_NFS_MAX_DEBUG_LEVEL 2
@@ -49,18 +56,137 @@ typedef struct NFSClient {
     AioContext *aio_context;
     blkcnt_t st_blocks;
     bool cache_used;
+    NFSServer *server;
+    char *path;
+    int64_t uid, gid, tcp_syncnt, readahead, pagecache, debug;
 } NFSClient;
 
 typedef struct NFSRPC {
+    BlockDriverState *bs;
     int ret;
     int complete;
     QEMUIOVector *iov;
     struct stat *st;
     Coroutine *co;
-    QEMUBH *bh;
     NFSClient *client;
 } NFSRPC;
 
+static int nfs_parse_uri(const char *filename, QDict *options, Error **errp)
+{
+    URI *uri = NULL;
+    QueryParams *qp = NULL;
+    int ret = -EINVAL, i;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        error_setg(errp, "Invalid URI specified");
+        goto out;
+    }
+    if (strcmp(uri->scheme, "nfs") != 0) {
+        error_setg(errp, "URI scheme must be 'nfs'");
+        goto out;
+    }
+
+    if (!uri->server) {
+        error_setg(errp, "missing hostname in URI");
+        goto out;
+    }
+
+    if (!uri->path) {
+        error_setg(errp, "missing file path in URI");
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (!qp) {
+        error_setg(errp, "could not parse query parameters");
+        goto out;
+    }
+
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
+    qdict_put(options, "server.type", qstring_from_str("inet"));
+    qdict_put(options, "path", qstring_from_str(uri->path));
+
+    for (i = 0; i < qp->n; i++) {
+        if (!qp->p[i].value) {
+            error_setg(errp, "Value for NFS parameter expected: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+        if (parse_uint_full(qp->p[i].value, NULL, 0)) {
+            error_setg(errp, "Illegal value for NFS parameter: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+        if (!strcmp(qp->p[i].name, "uid")) {
+            qdict_put(options, "user",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "gid")) {
+            qdict_put(options, "group",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+            qdict_put(options, "tcp-syn-count",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "readahead")) {
+            qdict_put(options, "readahead-size",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "pagecache")) {
+            qdict_put(options, "page-cache-size",
+                      qstring_from_str(qp->p[i].value));
+        } else if (!strcmp(qp->p[i].name, "debug")) {
+            qdict_put(options, "debug",
+                      qstring_from_str(qp->p[i].value));
+        } else {
+            error_setg(errp, "Unknown NFS parameter name: %s",
+                       qp->p[i].name);
+            goto out;
+        }
+    }
+    ret = 0;
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    if (uri) {
+        uri_free(uri);
+    }
+    return ret;
+}
+
+static bool nfs_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *qe;
+
+    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+        if (!strcmp(qe->key, "host") ||
+            !strcmp(qe->key, "path") ||
+            !strcmp(qe->key, "user") ||
+            !strcmp(qe->key, "group") ||
+            !strcmp(qe->key, "tcp-syn-count") ||
+            !strcmp(qe->key, "readahead-size") ||
+            !strcmp(qe->key, "page-cache-size") ||
+            !strcmp(qe->key, "debug") ||
+            strstart(qe->key, "server.", NULL))
+        {
+            error_setg(errp, "Option %s cannot be used with a filename",
+                       qe->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void nfs_parse_filename(const char *filename, QDict *options,
+                               Error **errp)
+{
+    if (nfs_has_filename_options_conflict(options, errp)) {
+        return;
+    }
+
+    nfs_parse_uri(filename, options, errp);
+}
+
 static void nfs_process_read(void *arg);
 static void nfs_process_write(void *arg);
 
@@ -91,11 +217,12 @@ static void nfs_process_write(void *arg)
     nfs_set_events(client);
 }
 
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
+static void nfs_co_init_task(BlockDriverState *bs, NFSRPC *task)
 {
     *task = (NFSRPC) {
         .co             = qemu_coroutine_self(),
-        .client         = client,
+        .bs             = bs,
+        .client         = bs->opaque,
     };
 }
 
@@ -103,7 +230,6 @@ static void nfs_co_generic_bh_cb(void *opaque)
 {
     NFSRPC *task = opaque;
     task->complete = 1;
-    qemu_bh_delete(task->bh);
     qemu_coroutine_enter(task->co);
 }
 
@@ -113,6 +239,7 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
 {
     NFSRPC *task = private_data;
     task->ret = ret;
+    assert(!task->st);
     if (task->ret > 0 && task->iov) {
         if (task->ret <= task->iov->size) {
             qemu_iovec_from_buf(task->iov, 0, data, task->ret);
@@ -120,19 +247,11 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
             task->ret = -EIO;
         }
     }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
     if (task->ret < 0) {
         error_report("NFS Error: %s", nfs_get_error(nfs));
     }
-    if (task->co) {
-        task->bh = aio_bh_new(task->client->aio_context,
-                              nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
-    } else {
-        task->complete = 1;
-    }
+    aio_bh_schedule_oneshot(task->client->aio_context,
+                            nfs_co_generic_bh_cb, task);
 }
 
 static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
@@ -142,7 +261,7 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
     NFSClient *client = bs->opaque;
     NFSRPC task;
 
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
     task.iov = iov;
 
     if (nfs_pread_async(client->context, client->fh,
@@ -152,8 +271,8 @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
         return -ENOMEM;
     }
 
+    nfs_set_events(client);
     while (!task.complete) {
-        nfs_set_events(client);
         qemu_coroutine_yield();
     }
 
@@ -177,7 +296,7 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
     NFSRPC task;
     char *buf = NULL;
 
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
 
     buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
     if (nb_sectors && buf == NULL) {
@@ -194,8 +313,8 @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
         return -ENOMEM;
     }
 
+    nfs_set_events(client);
     while (!task.complete) {
-        nfs_set_events(client);
         qemu_coroutine_yield();
     }
 
@@ -213,30 +332,59 @@ static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
     NFSClient *client = bs->opaque;
     NFSRPC task;
 
-    nfs_co_init_task(client, &task);
+    nfs_co_init_task(bs, &task);
 
     if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
                         &task) != 0) {
         return -ENOMEM;
     }
 
+    nfs_set_events(client);
     while (!task.complete) {
-        nfs_set_events(client);
         qemu_coroutine_yield();
     }
 
     return task.ret;
 }
 
-/* TODO Convert to fine grained options */
 static QemuOptsList runtime_opts = {
     .name = "nfs",
     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
     .desc = {
         {
-            .name = "filename",
+            .name = "path",
             .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
+            .help = "Path of the image on the host",
+        },
+        {
+            .name = "uid",
+            .type = QEMU_OPT_NUMBER,
+            .help = "UID value to use when talking to the server",
+        },
+        {
+            .name = "gid",
+            .type = QEMU_OPT_NUMBER,
+            .help = "GID value to use when talking to the server",
+        },
+        {
+            .name = "tcp-syncnt",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Number of SYNs to send during the session establish",
+        },
+        {
+            .name = "readahead",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the readahead size in bytes",
+        },
+        {
+            .name = "pagecache",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the pagecache size in bytes",
+        },
+        {
+            .name = "debug",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Set the NFS debug level (max 2)",
         },
         { /* end of list */ }
     },
@@ -279,25 +427,65 @@ static void nfs_file_close(BlockDriverState *bs)
     nfs_client_close(client);
 }
 
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
+static NFSServer *nfs_config(QDict *options, Error **errp)
+{
+    NFSServer *server = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_error = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "NFS server address missing");
+        goto out;
+    }
+
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto out;
+    }
+
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_NFSServer(iv, NULL, &server, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto out;
+    }
+
+out:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
+    return server;
+}
+
+
+static int64_t nfs_client_open(NFSClient *client, QDict *options,
                                int flags, Error **errp, int open_flags)
 {
-    int ret = -EINVAL, i;
+    int ret = -EINVAL;
+    QemuOpts *opts = NULL;
+    Error *local_err = NULL;
     struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
     char *file = NULL, *strp = NULL;
 
-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
         goto fail;
     }
-    if (!uri->server) {
-        error_setg(errp, "Invalid URL specified");
+
+    client->path = g_strdup(qemu_opt_get(opts, "path"));
+    if (!client->path) {
+        ret = -EINVAL;
+        error_setg(errp, "No path was specified");
         goto fail;
     }
-    strp = strrchr(uri->path, '/');
+
+    strp = strrchr(client->path, '/');
     if (strp == NULL) {
         error_setg(errp, "Invalid URL specified");
         goto fail;
@@ -305,85 +493,89 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
     file = g_strdup(strp);
     *strp = 0;
 
+    /* Pop the config into our state object, Exit if invalid */
+    client->server = nfs_config(options, errp);
+    if (!client->server) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
     client->context = nfs_init_context();
     if (client->context == NULL) {
         error_setg(errp, "Failed to init NFS context");
         goto fail;
     }
 
-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
+    if (qemu_opt_get(opts, "uid")) {
+        client->uid = qemu_opt_get_number(opts, "uid", 0);
+        nfs_set_uid(client->context, client->uid);
+    }
+
+    if (qemu_opt_get(opts, "gid")) {
+        client->gid = qemu_opt_get_number(opts, "gid", 0);
+        nfs_set_gid(client->context, client->gid);
+    }
+
+    if (qemu_opt_get(opts, "tcp-syncnt")) {
+        client->tcp_syncnt = qemu_opt_get_number(opts, "tcp-syncnt", 0);
+        nfs_set_tcp_syncnt(client->context, client->tcp_syncnt);
+    }
+
+#ifdef LIBNFS_FEATURE_READAHEAD
+    if (qemu_opt_get(opts, "readahead")) {
+        if (open_flags & BDRV_O_NOCACHE) {
+            error_setg(errp, "Cannot enable NFS readahead "
+                             "if cache.direct = on");
             goto fail;
         }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
-            error_setg(errp, "Illegal value for NFS parameter: %s",
-                       qp->p[i].name);
-            goto fail;
+        client->readahead = qemu_opt_get_number(opts, "readahead", 0);
+        if (client->readahead > QEMU_NFS_MAX_READAHEAD_SIZE) {
+            error_report("NFS Warning: Truncating NFS readahead "
+                         "size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+            client->readahead = QEMU_NFS_MAX_READAHEAD_SIZE;
         }
-        if (!strcmp(qp->p[i].name, "uid")) {
-            nfs_set_uid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "gid")) {
-            nfs_set_gid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            nfs_set_tcp_syncnt(client->context, val);
-#ifdef LIBNFS_FEATURE_READAHEAD
-        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            if (open_flags & BDRV_O_NOCACHE) {
-                error_setg(errp, "Cannot enable NFS readahead "
-                                 "if cache.direct = on");
-                goto fail;
-            }
-            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
-                error_report("NFS Warning: Truncating NFS readahead"
-                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
-                val = QEMU_NFS_MAX_READAHEAD_SIZE;
-            }
-            nfs_set_readahead(client->context, val);
+        nfs_set_readahead(client->context, client->readahead);
 #ifdef LIBNFS_FEATURE_PAGECACHE
-            nfs_set_pagecache_ttl(client->context, 0);
+        nfs_set_pagecache_ttl(client->context, 0);
 #endif
-            client->cache_used = true;
+        client->cache_used = true;
+    }
 #endif
+
 #ifdef LIBNFS_FEATURE_PAGECACHE
-            nfs_set_pagecache_ttl(client->context, 0);
-        } else if (!strcmp(qp->p[i].name, "pagecache")) {
-            if (open_flags & BDRV_O_NOCACHE) {
-                error_setg(errp, "Cannot enable NFS pagecache "
-                                 "if cache.direct = on");
-                goto fail;
-            }
-            if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
-                error_report("NFS Warning: Truncating NFS pagecache"
-                             " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
-                val = QEMU_NFS_MAX_PAGECACHE_SIZE;
-            }
-            nfs_set_pagecache(client->context, val);
-            nfs_set_pagecache_ttl(client->context, 0);
-            client->cache_used = true;
+    if (qemu_opt_get(opts, "pagecache")) {
+        if (open_flags & BDRV_O_NOCACHE) {
+            error_setg(errp, "Cannot enable NFS pagecache "
+                             "if cache.direct = on");
+            goto fail;
+        }
+        client->pagecache = qemu_opt_get_number(opts, "pagecache", 0);
+        if (client->pagecache > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+            error_report("NFS Warning: Truncating NFS pagecache "
+                         "size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+            client->pagecache = QEMU_NFS_MAX_PAGECACHE_SIZE;
+        }
+        nfs_set_pagecache(client->context, client->pagecache);
+        nfs_set_pagecache_ttl(client->context, 0);
+        client->cache_used = true;
+    }
 #endif
+
 #ifdef LIBNFS_FEATURE_DEBUG
-        } else if (!strcmp(qp->p[i].name, "debug")) {
-            /* limit the maximum debug level to avoid potential flooding
-             * of our log files. */
-            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
-                error_report("NFS Warning: Limiting NFS debug level"
-                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
-                val = QEMU_NFS_MAX_DEBUG_LEVEL;
-            }
-            nfs_set_debug(client->context, val);
-#endif
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
+    if (qemu_opt_get(opts, "debug")) {
+        client->debug = qemu_opt_get_number(opts, "debug", 0);
+        /* limit the maximum debug level to avoid potential flooding
+         * of our log files. */
+        if (client->debug > QEMU_NFS_MAX_DEBUG_LEVEL) {
+            error_report("NFS Warning: Limiting NFS debug level "
+                         "to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+            client->debug = QEMU_NFS_MAX_DEBUG_LEVEL;
         }
+        nfs_set_debug(client->context, client->debug);
     }
+#endif
 
-    ret = nfs_mount(client->context, uri->server, uri->path);
+    ret = nfs_mount(client->context, client->server->host, client->path);
     if (ret < 0) {
         error_setg(errp, "Failed to mount nfs share: %s",
                    nfs_get_error(client->context));
@@ -416,14 +608,13 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
     ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
     client->st_blocks = st.st_blocks;
     client->has_zero_init = S_ISREG(st.st_mode);
+    *strp = '/';
     goto out;
+
 fail:
     nfs_client_close(client);
 out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
+    qemu_opts_del(opts);
     g_free(file);
     return ret;
 }
@@ -432,28 +623,17 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp) {
     NFSClient *client = bs->opaque;
     int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;
 
     client->aio_context = bdrv_get_aio_context(bs);
 
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
+    ret = nfs_client_open(client, options,
                           (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
                           errp, bs->open_flags);
     if (ret < 0) {
-        goto out;
+        return ret;
     }
     bs->total_sectors = ret;
     ret = 0;
-out:
-    qemu_opts_del(opts);
     return ret;
 }
 
@@ -475,6 +655,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
     int ret = 0;
     int64_t total_size = 0;
     NFSClient *client = g_new0(NFSClient, 1);
+    QDict *options = NULL;
 
     client->aio_context = qemu_get_aio_context();
 
@@ -482,13 +663,20 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
     total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
                           BDRV_SECTOR_SIZE);
 
-    ret = nfs_client_open(client, url, O_CREAT, errp, 0);
+    options = qdict_new();
+    ret = nfs_parse_uri(url, options, errp);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = nfs_client_open(client, options, O_CREAT, errp, 0);
     if (ret < 0) {
         goto out;
     }
     ret = nfs_ftruncate(client->context, client->fh, total_size);
     nfs_client_close(client);
 out:
+    QDECREF(options);
     g_free(client);
     return ret;
 }
@@ -499,6 +687,22 @@ static int nfs_has_zero_init(BlockDriverState *bs)
     return client->has_zero_init;
 }
 
+static void
+nfs_get_allocated_file_size_cb(int ret, struct nfs_context *nfs, void *data,
+                               void *private_data)
+{
+    NFSRPC *task = private_data;
+    task->ret = ret;
+    if (task->ret == 0) {
+        memcpy(task->st, data, sizeof(struct stat));
+    }
+    if (task->ret < 0) {
+        error_report("NFS Error: %s", nfs_get_error(nfs));
+    }
+    task->complete = 1;
+    bdrv_wakeup(task->bs);
+}
+
 static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
 {
     NFSClient *client = bs->opaque;
@@ -510,16 +714,15 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
         return client->st_blocks * 512;
     }
 
+    task.bs = bs;
     task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
+    if (nfs_fstat_async(client->context, client->fh, nfs_get_allocated_file_size_cb,
                         &task) != 0) {
         return -ENOMEM;
     }
 
-    while (!task.complete) {
-        nfs_set_events(client);
-        aio_poll(client->aio_context, true);
-    }
+    nfs_set_events(client);
+    BDRV_POLL_WHILE(bs, !task.complete);
 
     return (task.ret < 0 ? task.ret : st.st_blocks * 512);
 }
@@ -564,6 +767,67 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
     return 0;
 }
 
+static void nfs_refresh_filename(BlockDriverState *bs, QDict *options)
+{
+    NFSClient *client = bs->opaque;
+    QDict *opts = qdict_new();
+    QObject *server_qdict;
+    Visitor *ov;
+
+    qdict_put(opts, "driver", qstring_from_str("nfs"));
+
+    if (client->uid && !client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?uid=%" PRId64, client->server->host, client->path,
+                 client->uid);
+    } else if (!client->uid && client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?gid=%" PRId64, client->server->host, client->path,
+                 client->gid);
+    } else if (client->uid && client->gid) {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s?uid=%" PRId64 "&gid=%" PRId64,
+                 client->server->host, client->path, client->uid, client->gid);
+    } else {
+        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+                 "nfs://%s%s", client->server->host, client->path);
+    }
+
+    ov = qobject_output_visitor_new(&server_qdict);
+    visit_type_NFSServer(ov, NULL, &client->server, &error_abort);
+    visit_complete(ov, &server_qdict);
+    assert(qobject_type(server_qdict) == QTYPE_QDICT);
+
+    qdict_put_obj(opts, "server", server_qdict);
+    qdict_put(opts, "path", qstring_from_str(client->path));
+
+    if (client->uid) {
+        qdict_put(opts, "uid", qint_from_int(client->uid));
+    }
+    if (client->gid) {
+        qdict_put(opts, "gid", qint_from_int(client->gid));
+    }
+    if (client->tcp_syncnt) {
+        qdict_put(opts, "tcp-syncnt",
+                      qint_from_int(client->tcp_syncnt));
+    }
+    if (client->readahead) {
+        qdict_put(opts, "readahead",
+                      qint_from_int(client->readahead));
+    }
+    if (client->pagecache) {
+        qdict_put(opts, "pagecache",
+                      qint_from_int(client->pagecache));
+    }
+    if (client->debug) {
+        qdict_put(opts, "debug", qint_from_int(client->debug));
+    }
+
+    visit_free(ov);
+    qdict_flatten(opts);
+    bs->full_open_options = opts;
+}
+
 #ifdef LIBNFS_FEATURE_PAGECACHE
 static void nfs_invalidate_cache(BlockDriverState *bs,
                                  Error **errp)
@@ -578,7 +842,7 @@ static BlockDriver bdrv_nfs = {
     .protocol_name                  = "nfs",
 
     .instance_size                  = sizeof(NFSClient),
-    .bdrv_needs_filename            = true,
+    .bdrv_parse_filename            = nfs_parse_filename,
     .create_opts                    = &nfs_create_opts,
 
     .bdrv_has_zero_init             = nfs_has_zero_init,
@@ -596,6 +860,7 @@ static BlockDriver bdrv_nfs = {
 
     .bdrv_detach_aio_context        = nfs_detach_aio_context,
     .bdrv_attach_aio_context        = nfs_attach_aio_context,
+    .bdrv_refresh_filename          = nfs_refresh_filename,
 
 #ifdef LIBNFS_FEATURE_PAGECACHE
     .bdrv_invalidate_cache          = nfs_invalidate_cache,
diff --git a/block/null.c b/block/null.c
index b511010ba5..b300390944 100644
--- a/block/null.c
+++ b/block/null.c
@@ -124,7 +124,6 @@ static coroutine_fn int null_co_flush(BlockDriverState *bs)
 
 typedef struct {
     BlockAIOCB common;
-    QEMUBH *bh;
     QEMUTimer timer;
 } NullAIOCB;
 
@@ -136,7 +135,6 @@ static void null_bh_cb(void *opaque)
 {
     NullAIOCB *acb = opaque;
     acb->common.cb(acb->common.opaque, 0);
-    qemu_bh_delete(acb->bh);
     qemu_aio_unref(acb);
 }
 
@@ -164,8 +162,7 @@ static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
         timer_mod_ns(&acb->timer,
                      qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
     } else {
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-        qemu_bh_schedule(acb->bh);
+        aio_bh_schedule_oneshot(bdrv_get_aio_context(bs), null_bh_cb, acb);
     }
     return &acb->common;
 }
diff --git a/block/qapi.c b/block/qapi.c
index 6f947e3e66..a62e862f3c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -29,7 +29,7 @@
 #include "block/write-threshold.h"
 #include "qmp-commands.h"
 #include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/types.h"
 #include "sysemu/block-backend.h"
 #include "qemu/cutils.h"
@@ -691,13 +691,14 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
                                    ImageInfoSpecific *info_spec)
 {
     QObject *obj, *data;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
     visit_complete(v, &obj);
     assert(qobject_type(obj) == QTYPE_QDICT);
     data = qdict_get(qobject_to_qdict(obj), "data");
     dump_qobject(func_fprintf, f, 1, data);
+    qobject_decref(obj);
     visit_free(v);
 }
 
diff --git a/block/qcow.c b/block/qcow.c
index 6f9b2e2d26..7540f43f46 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -153,7 +153,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EINVAL;
         goto fail;
     }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
+                                 QCRYPTO_CIPHER_MODE_CBC)) {
         error_setg(errp, "AES cipher not available");
         ret = -EINVAL;
         goto fail;
@@ -913,75 +914,32 @@ static int qcow_make_empty(BlockDriverState *bs)
     return 0;
 }
 
-typedef struct QcowWriteCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    const uint8_t *buf;
-    int nb_sectors;
-    int ret;
-} QcowWriteCo;
-
-static void qcow_write_co_entry(void *opaque)
-{
-    QcowWriteCo *co = opaque;
-    QEMUIOVector qiov;
-
-    struct iovec iov = (struct iovec) {
-        .iov_base   = (uint8_t*) co->buf,
-        .iov_len    = co->nb_sectors * BDRV_SECTOR_SIZE,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, &qiov);
-}
-
-/* Wrapper for non-coroutine contexts */
-static int qcow_write(BlockDriverState *bs, int64_t sector_num,
-                      const uint8_t *buf, int nb_sectors)
-{
-    Coroutine *co;
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    QcowWriteCo data = {
-        .bs         = bs,
-        .sector_num = sector_num,
-        .buf        = buf,
-        .nb_sectors = nb_sectors,
-        .ret        = -EINPROGRESS,
-    };
-    co = qemu_coroutine_create(qcow_write_co_entry, &data);
-    qemu_coroutine_enter(co);
-    while (data.ret == -EINPROGRESS) {
-        aio_poll(aio_context, true);
-    }
-    return data.ret;
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                           uint64_t bytes, QEMUIOVector *qiov)
 {
     BDRVQcowState *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
     z_stream strm;
     int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow_write_compressed(bs, sector_num,
-                                        pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+    buf = qemu_blockalign(bs, s->cluster_size);
+    if (bytes != s->cluster_size) {
+        if (bytes > s->cluster_size ||
+            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
         }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf + bytes, 0, s->cluster_size - bytes);
     }
+    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
 
     out_buf = g_malloc(s->cluster_size);
 
@@ -1012,27 +970,35 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
-        ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        if (cluster_offset == 0) {
-            ret = -EIO;
-            goto fail;
-        }
-
-        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+        ret = qcow_co_writev(bs, offset >> BDRV_SECTOR_BITS,
+                             bytes >> BDRV_SECTOR_BITS, qiov);
         if (ret < 0) {
             goto fail;
         }
+        goto success;
     }
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset = get_cluster_offset(bs, offset, 2, out_len, 0, 0);
+    qemu_co_mutex_unlock(&s->lock);
+    if (cluster_offset == 0) {
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
 
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
     ret = 0;
 fail:
+    qemu_vfree(buf);
     g_free(out_buf);
     return ret;
 }
@@ -1085,7 +1051,7 @@ static BlockDriver bdrv_qcow = {
 
     .bdrv_set_key           = qcow_set_key,
     .bdrv_make_empty        = qcow_make_empty,
-    .bdrv_write_compressed  = qcow_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow_co_pwritev_compressed,
     .bdrv_get_info          = qcow_get_info,
 
     .create_opts            = &qcow_create_opts,
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 6eaefeddc4..1d25147392 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,7 +22,6 @@
  * THE SOFTWARE.
  */
 
-/* Needed for CONFIG_MADVISE */
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu-common.h"
@@ -66,7 +65,8 @@ static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
 static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
                                       int i, int num_tables)
 {
-#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
+#ifdef CONFIG_LINUX
     BDRVQcow2State *s = bs->opaque;
     void *t = qcow2_cache_get_table_addr(bs, c, i);
     int align = getpagesize();
@@ -74,7 +74,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
     size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
     size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
     if (length > 0) {
-        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+        madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
     }
 #endif
 }
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f94183529c..928c1e298d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -83,7 +83,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     }
     memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
 
-    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+    if (s->l1_size) {
+        memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+    }
 
     /* write new table (align to cluster) */
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
@@ -427,7 +429,7 @@ static int coroutine_fn do_perform_cow(BlockDriverState *bs,
 
     if (bs->encrypted) {
         Error *err = NULL;
-        int64_t sector = (cluster_offset + offset_in_cluster)
+        int64_t sector = (src_cluster_offset + offset_in_cluster)
                          >> BDRV_SECTOR_BITS;
         assert(s->cipher);
         assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
@@ -1556,7 +1558,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
  * clusters.
  */
 static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
-                          uint64_t nb_clusters)
+                          uint64_t nb_clusters, int flags)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_table;
@@ -1580,7 +1582,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
 
         /* Update L2 entries */
         qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (old_offset & QCOW_OFLAG_COMPRESSED) {
+        if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
         } else {
@@ -1593,7 +1595,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
     return nb_clusters;
 }
 
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t nb_clusters;
@@ -1610,7 +1613,7 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
     s->cache_discards = true;
 
     while (nb_clusters > 0) {
-        ret = zero_single_l2(bs, offset, nb_clusters);
+        ret = zero_single_l2(bs, offset, nb_clusters, flags);
         if (ret < 0) {
             goto fail;
         }
diff --git a/block/qcow2.c b/block/qcow2.c
index 91ef4dfefc..96fb8a8f16 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -668,6 +668,14 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
     r->cache_clean_interval =
         qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
                             s->cache_clean_interval);
+#ifndef CONFIG_LINUX
+    if (r->cache_clean_interval != 0) {
+        error_setg(errp, QCOW2_OPT_CACHE_CLEAN_INTERVAL
+                   " not supported on this host");
+        ret = -EINVAL;
+        goto fail;
+    }
+#endif
     if (r->cache_clean_interval > UINT_MAX) {
         error_setg(errp, "Cache clean interval too big");
         ret = -EINVAL;
@@ -959,7 +967,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EINVAL;
         goto fail;
     }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
+                                 QCRYPTO_CIPHER_MODE_CBC)) {
         error_setg(errp, "AES cipher not available");
         ret = -EINVAL;
         goto fail;
@@ -1154,6 +1163,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
 
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
 
     /* Repair image if dirty */
     if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
@@ -1204,6 +1214,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
         bs->bl.request_alignment = BDRV_SECTOR_SIZE;
     }
     bs->bl.pwrite_zeroes_alignment = s->cluster_size;
+    bs->bl.pdiscard_alignment = s->cluster_size;
 }
 
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1804,7 +1815,10 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
         .magic  = cpu_to_be32(magic),
         .len    = cpu_to_be32(len),
     };
-    memcpy(buf + sizeof(QCowExtension), s, len);
+
+    if (len) {
+        memcpy(buf + sizeof(QCowExtension), s, len);
+    }
 
     return ext_len;
 }
@@ -2473,7 +2487,7 @@ static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
     trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
 
     /* Whatever is left can use real zero clusters */
-    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS);
+    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
@@ -2485,6 +2499,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
     int ret;
     BDRVQcow2State *s = bs->opaque;
 
+    if (!QEMU_IS_ALIGNED(offset | count, s->cluster_size)) {
+        assert(count < s->cluster_size);
+        return -ENOTSUP;
+    }
+
     qemu_co_mutex_lock(&s->lock);
     ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
                                  QCOW2_DISCARD_REQUEST, false);
@@ -2533,84 +2552,39 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
     return 0;
 }
 
-typedef struct Qcow2WriteCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    const uint8_t *buf;
-    int nb_sectors;
-    int ret;
-} Qcow2WriteCo;
-
-static void qcow2_write_co_entry(void *opaque)
-{
-    Qcow2WriteCo *co = opaque;
-    QEMUIOVector qiov;
-    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-    struct iovec iov = (struct iovec) {
-        .iov_base   = (uint8_t*) co->buf,
-        .iov_len    = bytes,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    co->ret = qcow2_co_pwritev(co->bs, offset, bytes, &qiov, 0);
-}
-
-/* Wrapper for non-coroutine contexts */
-static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
-                       const uint8_t *buf, int nb_sectors)
-{
-    Coroutine *co;
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    Qcow2WriteCo data = {
-        .bs         = bs,
-        .sector_num = sector_num,
-        .buf        = buf,
-        .nb_sectors = nb_sectors,
-        .ret        = -EINPROGRESS,
-    };
-    co = qemu_coroutine_create(qcow2_write_co_entry, &data);
-    qemu_coroutine_enter(co);
-    while (data.ret == -EINPROGRESS) {
-        aio_poll(aio_context, true);
-    }
-    return data.ret;
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                  const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
     z_stream strm;
     int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
-    if (nb_sectors == 0) {
+    if (bytes == 0) {
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
         cluster_offset = bdrv_getlength(bs->file->bs);
         return bdrv_truncate(bs->file->bs, cluster_offset);
     }
 
-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow2_write_compressed(bs, sector_num,
-                                         pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+    buf = qemu_blockalign(bs, s->cluster_size);
+    if (bytes != s->cluster_size) {
+        if (bytes > s->cluster_size ||
+            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
         }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf + bytes, 0, s->cluster_size - bytes);
     }
+    qemu_iovec_to_buf(qiov, 0, buf, bytes);
 
     out_buf = g_malloc(s->cluster_size);
 
@@ -2641,33 +2615,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
-        ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
+        ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-    } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-            sector_num << 9, out_len);
-        if (!cluster_offset) {
-            ret = -EIO;
-            goto fail;
-        }
-        cluster_offset &= s->cluster_offset_mask;
+        goto success;
+    }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset =
+        qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
+    if (!cluster_offset) {
+        qemu_co_mutex_unlock(&s->lock);
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
 
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
+    qemu_co_mutex_unlock(&s->lock);
+    if (ret < 0) {
+        goto fail;
     }
 
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
     ret = 0;
 fail:
+    qemu_vfree(buf);
     g_free(out_buf);
     return ret;
 }
@@ -2823,7 +2808,8 @@ static int qcow2_make_empty(BlockDriverState *bs)
 {
     BDRVQcow2State *s = bs->opaque;
     uint64_t start_sector;
-    int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
+    int sector_step = (QEMU_ALIGN_DOWN(INT_MAX, s->cluster_size) /
+                       BDRV_SECTOR_SIZE);
     int l1_clusters, ret = 0;
 
     l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
@@ -3412,7 +3398,7 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
     .bdrv_co_pdiscard       = qcow2_co_pdiscard,
     .bdrv_truncate          = qcow2_truncate,
-    .bdrv_write_compressed  = qcow2_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
     .bdrv_make_empty        = qcow2_make_empty,
 
     .bdrv_snapshot_create   = qcow2_snapshot_create,
diff --git a/block/qcow2.h b/block/qcow2.h
index b36a7bf8ad..182341483a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -473,8 +473,6 @@ static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
     return r1 > r2 ? r1 - r2 : r2 - r1;
 }
 
-// FIXME Need qcow2_ prefix to global functions
-
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                   int64_t sector_num, int nb_sectors);
@@ -530,7 +528,6 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           uint8_t *out_buf, const uint8_t *in_buf,
@@ -548,7 +545,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
     int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
+                        int flags);
 
 int qcow2_expand_zero_clusters(BlockDriverState *bs,
                                BlockDriverAmendStatusCB *status_cb,
diff --git a/block/qed-table.c b/block/qed-table.c
index 1a731dff51..ed443e2b70 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -174,9 +174,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
 
     qed_read_table(s, s->header.l1_table_offset,
                    s->l1_table, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
 
     return ret;
 }
@@ -195,9 +193,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
     int ret = -EINPROGRESS;
 
     qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
 
     return ret;
 }
@@ -268,9 +264,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
     int ret = -EINPROGRESS;
 
     qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
 
     return ret;
 }
@@ -290,9 +284,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
     int ret = -EINPROGRESS;
 
     qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
+    BDRV_POLL_WHILE(s->bs, ret == -EINPROGRESS);
 
     return ret;
 }
diff --git a/block/qed.c b/block/qed.c
index 426f3cb447..1a7ef0a9ce 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -336,7 +336,7 @@ static void qed_need_check_timer_cb(void *opaque)
     qed_plug_allocating_write_reqs(s);
 
     /* Ensure writes are on disk before clearing flag */
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+    bdrv_aio_flush(s->bs->file->bs, qed_clear_need_check, s);
 }
 
 static void qed_start_need_check_timer(BDRVQEDState *s)
@@ -378,6 +378,19 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
     }
 }
 
+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+    BDRVQEDState *s = bs->opaque;
+
+    /* Fire the timer immediately in order to start doing I/O as soon as the
+     * header is flushed.
+     */
+    if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+        qed_cancel_need_check_timer(s);
+        qed_need_check_timer_cb(s);
+    }
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp)
 {
@@ -909,7 +922,6 @@ static void qed_aio_complete_bh(void *opaque)
     void *user_opaque = acb->common.opaque;
     int ret = acb->bh_ret;
 
-    qemu_bh_delete(acb->bh);
     qemu_aio_unref(acb);
 
     /* Invoke callback */
@@ -934,9 +946,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
 
     /* Arrange for a bh to invoke the completion function */
     acb->bh_ret = ret;
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         qed_aio_complete_bh, acb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                            qed_aio_complete_bh, acb);
 
     /* Start next allocating write request waiting behind this one.  Note that
      * requests enqueue themselves when they first hit an unallocated cluster
@@ -1670,6 +1681,7 @@ static BlockDriver bdrv_qed = {
     .bdrv_check               = bdrv_qed_check,
     .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
     .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+    .bdrv_drain               = bdrv_qed_drain,
 };
 
 static void bdrv_qed_init(void)
diff --git a/block/qed.h b/block/qed.h
index 22b3198751..9676ab9479 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -130,7 +130,6 @@ enum {
 
 typedef struct QEDAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int bh_ret;                     /* final return status for completion bh */
     QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
     int flags;                      /* QED_AIOCB_* bits ORed together */
diff --git a/block/quorum.c b/block/quorum.c
index 9cf876fb34..d122299352 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -130,7 +130,7 @@ struct QuorumAIOCB {
 
     bool is_read;
     int vote_ret;
-    int child_iter;             /* which child to read in fifo pattern */
+    int children_read;          /* how many children have been read from */
 };
 
 static bool quorum_vote(QuorumAIOCB *acb);
@@ -156,22 +156,7 @@ static AIOCBInfo quorum_aiocb_info = {
 
 static void quorum_aio_finalize(QuorumAIOCB *acb)
 {
-    int i, ret = 0;
-
-    if (acb->vote_ret) {
-        ret = acb->vote_ret;
-    }
-
-    acb->common.cb(acb->common.opaque, ret);
-
-    if (acb->is_read) {
-        /* on the quorum case acb->child_iter == s->num_children - 1 */
-        for (i = 0; i <= acb->child_iter; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    }
-
+    acb->common.cb(acb->common.opaque, acb->vote_ret);
     g_free(acb->qcrs);
     qemu_aio_unref(acb);
 }
@@ -283,39 +268,52 @@ static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
     }
 }
 
-static void quorum_aio_cb(void *opaque, int ret)
+static void quorum_report_bad_acb(QuorumChildRequest *sacb, int ret)
+{
+    QuorumAIOCB *acb = sacb->parent;
+    QuorumOpType type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
+    quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+                      sacb->aiocb->bs->node_name, ret);
+}
+
+static void quorum_fifo_aio_cb(void *opaque, int ret)
 {
     QuorumChildRequest *sacb = opaque;
     QuorumAIOCB *acb = sacb->parent;
     BDRVQuorumState *s = acb->common.bs->opaque;
-    bool rewrite = false;
 
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        QuorumOpType type;
-        type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
-                          sacb->aiocb->bs->node_name, ret);
-    }
+    assert(acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO);
+
+    if (ret < 0) {
+        quorum_report_bad_acb(sacb, ret);
 
-    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
         /* We try to read next child in FIFO order if we fail to read */
-        if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
-            acb->child_iter++;
+        if (acb->children_read < s->num_children) {
             read_fifo_child(acb);
             return;
         }
-
-        if (ret == 0) {
-            quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
-        }
-        acb->vote_ret = ret;
-        quorum_aio_finalize(acb);
-        return;
     }
 
+    acb->vote_ret = ret;
+
+    /* FIXME: rewrite failed children if acb->children_read > 1? */
+    quorum_aio_finalize(acb);
+}
+
+static void quorum_aio_cb(void *opaque, int ret)
+{
+    QuorumChildRequest *sacb = opaque;
+    QuorumAIOCB *acb = sacb->parent;
+    BDRVQuorumState *s = acb->common.bs->opaque;
+    bool rewrite = false;
+    int i;
+
     sacb->ret = ret;
+    if (ret == 0) {
+        acb->success_count++;
+    } else {
+        quorum_report_bad_acb(sacb, ret);
+    }
     acb->count++;
     assert(acb->count <= s->num_children);
     assert(acb->success_count <= s->num_children);
@@ -326,6 +324,10 @@ static void quorum_aio_cb(void *opaque, int ret)
     /* Do the vote on read */
     if (acb->is_read) {
         rewrite = quorum_vote(acb);
+        for (i = 0; i < s->num_children; i++) {
+            qemu_vfree(acb->qcrs[i].buf);
+            qemu_iovec_destroy(&acb->qcrs[i].qiov);
+        }
     } else {
         quorum_has_too_much_io_failed(acb);
     }
@@ -653,6 +655,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
     BDRVQuorumState *s = acb->common.bs->opaque;
     int i;
 
+    acb->children_read = s->num_children;
     for (i = 0; i < s->num_children; i++) {
         acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
         qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
@@ -671,16 +674,11 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
 static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
 {
     BDRVQuorumState *s = acb->common.bs->opaque;
+    int n = acb->children_read++;
 
-    acb->qcrs[acb->child_iter].buf =
-        qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
-    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
-                     acb->qcrs[acb->child_iter].buf);
-    acb->qcrs[acb->child_iter].aiocb =
-        bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
-                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
-                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+    acb->qcrs[n].aiocb = bdrv_aio_readv(s->children[n], acb->sector_num,
+                                        acb->qiov, acb->nb_sectors,
+                                        quorum_fifo_aio_cb, &acb->qcrs[n]);
 
     return &acb->common;
 }
@@ -696,13 +694,12 @@ static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
     QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
                                       nb_sectors, cb, opaque);
     acb->is_read = true;
+    acb->children_read = 0;
 
     if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        acb->child_iter = s->num_children - 1;
         return read_quorum_children(acb);
     }
 
-    acb->child_iter = 0;
     return read_fifo_child(acb);
 }
 
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6ed7547392..28b47d977b 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -143,6 +143,7 @@ typedef struct BDRVRawState {
     bool has_discard:1;
     bool has_write_zeroes:1;
     bool discard_zeroes:1;
+    bool use_linux_aio:1;
     bool has_fallocate;
     bool needs_alignment;
 } BDRVRawState;
@@ -367,18 +368,6 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
     }
 }
 
-#ifdef CONFIG_LINUX_AIO
-static bool raw_use_aio(int bdrv_flags)
-{
-    /*
-     * Currently Linux do AIO only for files opened with O_DIRECT
-     * specified so check NOCACHE flag too
-     */
-    return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                         (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
-}
-#endif
-
 static void raw_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
@@ -399,6 +388,11 @@ static QemuOptsList raw_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "File name of the image",
         },
+        {
+            .name = "aio",
+            .type = QEMU_OPT_STRING,
+            .help = "host AIO implementation (threads, native)",
+        },
         { /* end of list */ }
     },
 };
@@ -410,6 +404,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *filename = NULL;
+    BlockdevAioOptions aio, aio_default;
     int fd, ret;
     struct stat st;
 
@@ -429,6 +424,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
         goto fail;
     }
 
+    aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
+                  ? BLOCKDEV_AIO_OPTIONS_NATIVE
+                  : BLOCKDEV_AIO_OPTIONS_THREADS;
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+    s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
+
     s->open_flags = open_flags;
     raw_parse_flags(bdrv_flags, &s->open_flags);
 
@@ -436,6 +443,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     fd = qemu_open(filename, s->open_flags, 0644);
     if (fd < 0) {
         ret = -errno;
+        error_setg_errno(errp, errno, "Could not open '%s'", filename);
         if (ret == -EROFS) {
             ret = -EACCES;
         }
@@ -444,14 +452,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     s->fd = fd;
 
 #ifdef CONFIG_LINUX_AIO
-    if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+     /* Currently Linux does AIO only for files opened with O_DIRECT */
+    if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
         error_setg(errp, "aio=native was specified, but it requires "
                          "cache.direct=on, which was not specified.");
         ret = -EINVAL;
         goto fail;
     }
 #else
-    if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+    if (s->use_linux_aio) {
         error_setg(errp, "aio=native was specified, but is not supported "
                          "in this build.");
         ret = -EINVAL;
@@ -533,7 +542,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
     BDRVRawState *s;
-    BDRVRawReopenState *raw_s;
+    BDRVRawReopenState *rs;
     int ret = 0;
     Error *local_err = NULL;
 
@@ -543,15 +552,15 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     s = state->bs->opaque;
 
     state->opaque = g_new0(BDRVRawReopenState, 1);
-    raw_s = state->opaque;
+    rs = state->opaque;
 
     if (s->type == FTYPE_CD) {
-        raw_s->open_flags |= O_NONBLOCK;
+        rs->open_flags |= O_NONBLOCK;
     }
 
-    raw_parse_flags(state->flags, &raw_s->open_flags);
+    raw_parse_flags(state->flags, &rs->open_flags);
 
-    raw_s->fd = -1;
+    rs->fd = -1;
 
     int fcntl_flags = O_APPEND | O_NONBLOCK;
 #ifdef O_NOATIME
@@ -560,35 +569,35 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 
 #ifdef O_ASYNC
     /* Not all operating systems have O_ASYNC, and those that don't
-     * will not let us track the state into raw_s->open_flags (typically
+     * will not let us track the state into rs->open_flags (typically
      * you achieve the same effect with an ioctl, for example I_SETSIG
      * on Solaris). But we do not use O_ASYNC, so that's fine.
      */
     assert((s->open_flags & O_ASYNC) == 0);
 #endif
 
-    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+    if ((rs->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
         /* dup the original fd */
-        raw_s->fd = qemu_dup(s->fd);
-        if (raw_s->fd >= 0) {
-            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+        rs->fd = qemu_dup(s->fd);
+        if (rs->fd >= 0) {
+            ret = fcntl_setfl(rs->fd, rs->open_flags);
             if (ret) {
-                qemu_close(raw_s->fd);
-                raw_s->fd = -1;
+                qemu_close(rs->fd);
+                rs->fd = -1;
             }
         }
     }
 
     /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
-    if (raw_s->fd == -1) {
+    if (rs->fd == -1) {
         const char *normalized_filename = state->bs->filename;
         ret = raw_normalize_devicepath(&normalized_filename);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not normalize device path");
         } else {
-            assert(!(raw_s->open_flags & O_CREAT));
-            raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
-            if (raw_s->fd == -1) {
+            assert(!(rs->open_flags & O_CREAT));
+            rs->fd = qemu_open(normalized_filename, rs->open_flags);
+            if (rs->fd == -1) {
                 error_setg_errno(errp, errno, "Could not reopen file");
                 ret = -1;
             }
@@ -597,11 +606,11 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 
     /* Fail already reopen_prepare() if we can't get a working O_DIRECT
      * alignment with the new fd. */
-    if (raw_s->fd != -1) {
-        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+    if (rs->fd != -1) {
+        raw_probe_alignment(state->bs, rs->fd, &local_err);
         if (local_err) {
-            qemu_close(raw_s->fd);
-            raw_s->fd = -1;
+            qemu_close(rs->fd);
+            rs->fd = -1;
             error_propagate(errp, local_err);
             ret = -EINVAL;
         }
@@ -612,13 +621,13 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 
 static void raw_reopen_commit(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;
     BDRVRawState *s = state->bs->opaque;
 
-    s->open_flags = raw_s->open_flags;
+    s->open_flags = rs->open_flags;
 
     qemu_close(s->fd);
-    s->fd = raw_s->fd;
+    s->fd = rs->fd;
 
     g_free(state->opaque);
     state->opaque = NULL;
@@ -627,16 +636,16 @@ static void raw_reopen_commit(BDRVReopenState *state)
 
 static void raw_reopen_abort(BDRVReopenState *state)
 {
-    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawReopenState *rs = state->opaque;
 
      /* nothing to do if NULL, we didn't get far enough */
-    if (raw_s == NULL) {
+    if (rs == NULL) {
         return;
     }
 
-    if (raw_s->fd >= 0) {
-        qemu_close(raw_s->fd);
-        raw_s->fd = -1;
+    if (rs->fd >= 0) {
+        qemu_close(rs->fd);
+        rs->fd = -1;
     }
     g_free(state->opaque);
     state->opaque = NULL;
@@ -1256,7 +1265,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
         if (!bdrv_qiov_is_aligned(bs, qiov)) {
             type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
-        } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+        } else if (s->use_linux_aio) {
             LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
             assert(qiov->size == bytes);
             return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
@@ -1285,7 +1294,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
 static void raw_aio_plug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+    BDRVRawState *s = bs->opaque;
+    if (s->use_linux_aio) {
         LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
         laio_io_plug(bs, aio);
     }
@@ -1295,7 +1305,8 @@ static void raw_aio_plug(BlockDriverState *bs)
 static void raw_aio_unplug(BlockDriverState *bs)
 {
 #ifdef CONFIG_LINUX_AIO
-    if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+    BDRVRawState *s = bs->opaque;
+    if (s->use_linux_aio) {
         LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
         laio_io_unplug(bs, aio);
     }
@@ -2058,13 +2069,23 @@ static bool hdev_is_sg(BlockDriverState *bs)
 
 #if defined(__linux__)
 
+    BDRVRawState *s = bs->opaque;
     struct stat st;
     struct sg_scsi_id scsiid;
     int sg_version;
+    int ret;
 
-    if (stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) &&
-        !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) &&
-        !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) {
+    if (stat(bs->filename, &st) < 0 || !S_ISCHR(st.st_mode)) {
+        return false;
+    }
+
+    ret = ioctl(s->fd, SG_GET_VERSION_NUM, &sg_version);
+    if (ret < 0) {
+        return false;
+    }
+
+    ret = ioctl(s->fd, SG_GET_SCSI_ID, &scsiid);
+    if (ret >= 0) {
         DPRINTF("SG device found: type=%d, version=%d\n",
             scsiid.scsi_type, sg_version);
         return true;
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 56f45fea9e..800fabdd72 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -32,6 +32,7 @@
 #include "block/thread-pool.h"
 #include "qemu/iov.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/util.h"
 #include <windows.h>
 #include <winioctl.h>
 
@@ -252,7 +253,8 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
     }
 }
 
-static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
+static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
+                            DWORD *overlapped)
 {
     assert(access_flags != NULL);
     assert(overlapped != NULL);
@@ -264,7 +266,7 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
     }
 
     *overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NATIVE_AIO) {
+    if (use_aio) {
         *overlapped |= FILE_FLAG_OVERLAPPED;
     }
     if (flags & BDRV_O_NOCACHE) {
@@ -292,10 +294,35 @@ static QemuOptsList raw_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "File name of the image",
         },
+        {
+            .name = "aio",
+            .type = QEMU_OPT_STRING,
+            .help = "host AIO implementation (threads, native)",
+        },
         { /* end of list */ }
     },
 };
 
+static bool get_aio_option(QemuOpts *opts, int flags, Error **errp)
+{
+    BlockdevAioOptions aio, aio_default;
+
+    aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE
+                                              : BLOCKDEV_AIO_OPTIONS_THREADS;
+    aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
+                          BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp);
+
+    switch (aio) {
+    case BLOCKDEV_AIO_OPTIONS_NATIVE:
+        return true;
+    case BLOCKDEV_AIO_OPTIONS_THREADS:
+        return false;
+    default:
+        error_setg(errp, "Invalid AIO option");
+    }
+    return false;
+}
+
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -305,6 +332,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     QemuOpts *opts;
     Error *local_err = NULL;
     const char *filename;
+    bool use_aio;
     int ret;
 
     s->type = FTYPE_FILE;
@@ -319,7 +347,14 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
 
     filename = qemu_opt_get(opts, "filename");
 
-    raw_parse_flags(flags, &access_flags, &overlapped);
+    use_aio = get_aio_option(opts, flags, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
 
     if (filename[0] && filename[1] == ':') {
         snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
@@ -338,6 +373,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     if (s->hfile == INVALID_HANDLE_VALUE) {
         int err = GetLastError();
 
+        error_setg_win32(errp, err, "Could not open '%s'", filename);
         if (err == ERROR_ACCESS_DENIED) {
             ret = -EACCES;
         } else {
@@ -346,7 +382,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
-    if (flags & BDRV_O_NATIVE_AIO) {
+    if (use_aio) {
         s->aio = win32_aio_init();
         if (s->aio == NULL) {
             CloseHandle(s->hfile);
@@ -647,6 +683,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
 
     Error *local_err = NULL;
     const char *filename;
+    bool use_aio;
 
     QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
                                       &error_abort);
@@ -659,6 +696,16 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
 
     filename = qemu_opt_get(opts, "filename");
 
+    use_aio = get_aio_option(opts, flags, &local_err);
+    if (!local_err && use_aio) {
+        error_setg(&local_err, "AIO is not supported on Windows host devices");
+    }
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto done;
+    }
+
     if (strstart(filename, "/dev/cdrom", NULL)) {
         if (find_cdrom(device_name, sizeof(device_name)) < 0) {
             error_setg(errp, "Could not open CD-ROM drive");
@@ -677,7 +724,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
     }
     s->type = find_device_type(bs, filename);
 
-    raw_parse_flags(flags, &access_flags, &overlapped);
+    raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
 
     create_flags = OPEN_EXISTING;
 
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 588d4080f9..8a5b9b0424 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -31,6 +31,30 @@
 #include "qapi/error.h"
 #include "qemu/option.h"
 
+typedef struct BDRVRawState {
+    uint64_t offset;
+    uint64_t size;
+    bool has_size;
+} BDRVRawState;
+
+static QemuOptsList raw_runtime_opts = {
+    .name = "raw",
+    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
+    .desc = {
+        {
+            .name = "offset",
+            .type = QEMU_OPT_SIZE,
+            .help = "offset in the disk where the image starts",
+        },
+        {
+            .name = "size",
+            .type = QEMU_OPT_SIZE,
+            .help = "virtual disk size",
+        },
+        { /* end of list */ }
+    },
+};
+
 static QemuOptsList raw_create_opts = {
     .name = "raw-create-opts",
     .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -44,16 +68,116 @@ static QemuOptsList raw_create_opts = {
     }
 };
 
+static int raw_read_options(QDict *options, BlockDriverState *bs,
+    BDRVRawState *s, Error **errp)
+{
+    Error *local_err = NULL;
+    QemuOpts *opts = NULL;
+    int64_t real_size = 0;
+    int ret;
+
+    real_size = bdrv_getlength(bs->file->bs);
+    if (real_size < 0) {
+        error_setg_errno(errp, -real_size, "Could not get image size");
+        return real_size;
+    }
+
+    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    s->offset = qemu_opt_get_size(opts, "offset", 0);
+    if (s->offset > real_size) {
+        error_setg(errp, "Offset (%" PRIu64 ") cannot be greater than "
+            "size of the containing file (%" PRId64 ")",
+            s->offset, real_size);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    if (qemu_opt_find(opts, "size") != NULL) {
+        s->size = qemu_opt_get_size(opts, "size", 0);
+        s->has_size = true;
+    } else {
+        s->has_size = false;
+        s->size = real_size - s->offset;
+    }
+
+    /* Check size and offset */
+    if ((real_size - s->offset) < s->size) {
+        error_setg(errp, "The sum of offset (%" PRIu64 ") and size "
+            "(%" PRIu64 ") has to be smaller or equal to the "
+            " actual size of the containing file (%" PRId64 ")",
+            s->offset, s->size, real_size);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding
+     * up and leaking out of the specified area. */
+    if (s->has_size && !QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) {
+        error_setg(errp, "Specified size is not multiple of %llu",
+            BDRV_SECTOR_SIZE);
+        ret = -EINVAL;
+        goto end;
+    }
+
+    ret = 0;
+
+end:
+
+    qemu_opts_del(opts);
+
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *reopen_state,
                               BlockReopenQueue *queue, Error **errp)
 {
-    return 0;
+    assert(reopen_state != NULL);
+    assert(reopen_state->bs != NULL);
+
+    reopen_state->opaque = g_new0(BDRVRawState, 1);
+
+    return raw_read_options(
+        reopen_state->options,
+        reopen_state->bs,
+        reopen_state->opaque,
+        errp);
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawState *new_s = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
+
+    memcpy(s, new_s, sizeof(BDRVRawState));
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    g_free(state->opaque);
+    state->opaque = NULL;
 }
 
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
                                       uint64_t bytes, QEMUIOVector *qiov,
                                       int flags)
 {
+    BDRVRawState *s = bs->opaque;
+
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
+
     BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
     return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
@@ -62,11 +186,23 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
                                        uint64_t bytes, QEMUIOVector *qiov,
                                        int flags)
 {
+    BDRVRawState *s = bs->opaque;
     void *buf = NULL;
     BlockDriver *drv;
     QEMUIOVector local_qiov;
     int ret;
 
+    if (s->has_size && (offset > s->size || bytes > (s->size - offset))) {
+        /* There's not enough space for the data. Don't write anything and just
+         * fail to prevent leaking out of the size specified in options. */
+        return -ENOSPC;
+    }
+
+    if (offset > UINT64_MAX - s->offset) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
     if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
         /* Handling partial writes would be a pain - so we just
          * require that guests have 512-byte request alignment if
@@ -101,6 +237,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
         qiov = &local_qiov;
     }
 
+    offset += s->offset;
+
     BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
     ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 
@@ -117,8 +255,10 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
                                             int nb_sectors, int *pnum,
                                             BlockDriverState **file)
 {
+    BDRVRawState *s = bs->opaque;
     *pnum = nb_sectors;
     *file = bs->file->bs;
+    sector_num += s->offset / BDRV_SECTOR_SIZE;
     return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
            (sector_num << BDRV_SECTOR_BITS);
 }
@@ -127,18 +267,49 @@ static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
                                              int64_t offset, int count,
                                              BdrvRequestFlags flags)
 {
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
     return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }
 
 static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
                                         int64_t offset, int count)
 {
+    BDRVRawState *s = bs->opaque;
+    if (offset > UINT64_MAX - s->offset) {
+        return -EINVAL;
+    }
+    offset += s->offset;
     return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }
 
 static int64_t raw_getlength(BlockDriverState *bs)
 {
-    return bdrv_getlength(bs->file->bs);
+    int64_t len;
+    BDRVRawState *s = bs->opaque;
+
+    /* Update size. It should not change unless the file was externally
+     * modified. */
+    len = bdrv_getlength(bs->file->bs);
+    if (len < 0) {
+        return len;
+    }
+
+    if (len < s->offset) {
+        s->size = 0;
+    } else {
+        if (s->has_size) {
+            /* Try to honour the size */
+            s->size = MIN(s->size, len - s->offset);
+        } else {
+            s->size = len - s->offset;
+        }
+    }
+
+    return s->size;
 }
 
 static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -158,6 +329,18 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
+    BDRVRawState *s = bs->opaque;
+
+    if (s->has_size) {
+        return -ENOTSUP;
+    }
+
+    if (INT64_MAX - offset < s->offset) {
+        return -EINVAL;
+    }
+
+    s->size = offset;
+    offset += s->offset;
     return bdrv_truncate(bs->file->bs, offset);
 }
 
@@ -176,12 +359,13 @@ static void raw_lock_medium(BlockDriverState *bs, bool locked)
     bdrv_lock_medium(bs->file->bs, locked);
 }
 
-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                 unsigned long int req, void *buf,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque)
+static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
-    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
+    BDRVRawState *s = bs->opaque;
+    if (s->offset || s->has_size) {
+        return -ENOTSUP;
+    }
+    return bdrv_co_ioctl(bs->file->bs, req, buf);
 }
 
 static int raw_has_zero_init(BlockDriverState *bs)
@@ -197,6 +381,9 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
 static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
     bs->sg = bs->file->bs->sg;
     bs->supported_write_flags = BDRV_REQ_FUA &
         bs->file->bs->supported_write_flags;
@@ -214,6 +401,16 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                 bs->file->bs->filename);
     }
 
+    ret = raw_read_options(options, bs, s, errp);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (bs->sg && (s->offset || s->has_size)) {
+        error_setg(errp, "Cannot use offset/size with SCSI generic devices");
+        return -EINVAL;
+    }
+
     return 0;
 }
 
@@ -231,18 +428,37 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
 
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
-    return bdrv_probe_blocksizes(bs->file->bs, bsz);
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+        return -ENOTSUP;
+    }
+
+    return 0;
 }
 
 static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
 {
+    BDRVRawState *s = bs->opaque;
+    if (s->offset || s->has_size) {
+        return -ENOTSUP;
+    }
     return bdrv_probe_geometry(bs->file->bs, geo);
 }
 
 BlockDriver bdrv_raw = {
     .format_name          = "raw",
+    .instance_size        = sizeof(BDRVRawState),
     .bdrv_probe           = &raw_probe,
     .bdrv_reopen_prepare  = &raw_reopen_prepare,
+    .bdrv_reopen_commit   = &raw_reopen_commit,
+    .bdrv_reopen_abort    = &raw_reopen_abort,
     .bdrv_open            = &raw_open,
     .bdrv_close           = &raw_close,
     .bdrv_create          = &raw_create,
@@ -261,7 +477,7 @@ BlockDriver bdrv_raw = {
     .bdrv_media_changed   = &raw_media_changed,
     .bdrv_eject           = &raw_eject,
     .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
+    .bdrv_co_ioctl        = &raw_co_ioctl,
     .create_opts          = &raw_create_opts,
     .bdrv_has_zero_init   = &raw_has_zero_init
 };
diff --git a/block/rbd.c b/block/rbd.c
index 0106fea45f..a57b3e3c5d 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -71,7 +71,6 @@ typedef enum {
 
 typedef struct RBDAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int64_t ret;
     QEMUIOVector *qiov;
     char *bounce;
@@ -366,45 +365,44 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
         rados_conf_read_file(cluster, NULL);
     } else if (conf[0] != '\0' &&
                qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        rados_shutdown(cluster);
         error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
     }
 
     if (conf[0] != '\0' &&
         qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        rados_shutdown(cluster);
         error_propagate(errp, local_err);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
     }
 
     if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
-        rados_shutdown(cluster);
-        return -EIO;
+        ret = -EIO;
+        goto shutdown;
     }
 
     ret = rados_connect(cluster);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error connecting");
-        rados_shutdown(cluster);
-        return ret;
+        goto shutdown;
     }
 
     ret = rados_ioctx_create(cluster, pool, &io_ctx);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error opening pool %s", pool);
-        rados_shutdown(cluster);
-        return ret;
+        goto shutdown;
     }
 
     ret = rbd_create(io_ctx, name, bytes, &obj_order);
-    rados_ioctx_destroy(io_ctx);
-    rados_shutdown(cluster);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "error rbd create");
-        return ret;
     }
 
+    rados_ioctx_destroy(io_ctx);
+
+shutdown:
+    rados_shutdown(cluster);
     return ret;
 }
 
@@ -602,7 +600,6 @@ static const AIOCBInfo rbd_aiocb_info = {
 static void rbd_finish_bh(void *opaque)
 {
     RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
     qemu_rbd_complete_aio(rcb);
 }
 
@@ -621,9 +618,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
     rcb->ret = rbd_aio_get_return_value(c);
     rbd_aio_release(c);
 
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
+    aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
+                            rbd_finish_bh, rcb);
 }
 
 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -679,7 +675,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
-    acb->bh = NULL;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -737,7 +732,7 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                       void *opaque)
 {
     return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                          RBD_AIO_READ);
 }
 
@@ -749,7 +744,7 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                        void *opaque)
 {
     return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
-                         nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
+                         (int64_t) nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
                          RBD_AIO_WRITE);
 }
 
diff --git a/block/replication.c b/block/replication.c
new file mode 100644
index 0000000000..729dd12499
--- /dev/null
+++ b/block/replication.c
@@ -0,0 +1,673 @@
+/*
+ * Replication Block filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Wen Congyang <wency@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+#include "block/block_backup.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+typedef struct BDRVReplicationState {
+    ReplicationMode mode;
+    int replication_state;
+    BdrvChild *active_disk;
+    BdrvChild *hidden_disk;
+    BdrvChild *secondary_disk;
+    char *top_id;
+    ReplicationState *rs;
+    Error *blocker;
+    int orig_hidden_flags;
+    int orig_secondary_flags;
+    int error;
+} BDRVReplicationState;
+
+enum {
+    BLOCK_REPLICATION_NONE,             /* block replication is not started */
+    BLOCK_REPLICATION_RUNNING,          /* block replication is running */
+    BLOCK_REPLICATION_FAILOVER,         /* failover is running in background */
+    BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+    BLOCK_REPLICATION_DONE,             /* block replication is done */
+};
+
+static void replication_start(ReplicationState *rs, ReplicationMode mode,
+                              Error **errp);
+static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
+static void replication_get_error(ReplicationState *rs, Error **errp);
+static void replication_stop(ReplicationState *rs, bool failover,
+                             Error **errp);
+
+#define REPLICATION_MODE        "mode"
+#define REPLICATION_TOP_ID      "top-id"
+static QemuOptsList replication_runtime_opts = {
+    .name = "replication",
+    .head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
+    .desc = {
+        {
+            .name = REPLICATION_MODE,
+            .type = QEMU_OPT_STRING,
+        },
+        {
+            .name = REPLICATION_TOP_ID,
+            .type = QEMU_OPT_STRING,
+        },
+        { /* end of list */ }
+    },
+};
+
+static ReplicationOps replication_ops = {
+    .start = replication_start,
+    .checkpoint = replication_do_checkpoint,
+    .get_error = replication_get_error,
+    .stop = replication_stop,
+};
+
+static int replication_open(BlockDriverState *bs, QDict *options,
+                            int flags, Error **errp)
+{
+    int ret;
+    BDRVReplicationState *s = bs->opaque;
+    Error *local_err = NULL;
+    QemuOpts *opts = NULL;
+    const char *mode;
+    const char *top_id;
+
+    ret = -EINVAL;
+    opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        goto fail;
+    }
+
+    mode = qemu_opt_get(opts, REPLICATION_MODE);
+    if (!mode) {
+        error_setg(&local_err, "Missing the option mode");
+        goto fail;
+    }
+
+    if (!strcmp(mode, "primary")) {
+        s->mode = REPLICATION_MODE_PRIMARY;
+        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
+        if (top_id) {
+            error_setg(&local_err, "The primary side does not support option top-id");
+            goto fail;
+        }
+    } else if (!strcmp(mode, "secondary")) {
+        s->mode = REPLICATION_MODE_SECONDARY;
+        top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
+        s->top_id = g_strdup(top_id);
+        if (!s->top_id) {
+            error_setg(&local_err, "Missing the option top-id");
+            goto fail;
+        }
+    } else {
+        error_setg(&local_err,
+                   "The option mode's value should be primary or secondary");
+        goto fail;
+    }
+
+    s->rs = replication_new(bs, &replication_ops);
+
+    ret = 0;
+
+fail:
+    qemu_opts_del(opts);
+    error_propagate(errp, local_err);
+
+    return ret;
+}
+
+static void replication_close(BlockDriverState *bs)
+{
+    BDRVReplicationState *s = bs->opaque;
+
+    if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+        replication_stop(s->rs, false, NULL);
+    }
+    if (s->replication_state == BLOCK_REPLICATION_FAILOVER) {
+        block_job_cancel_sync(s->active_disk->bs->job);
+    }
+
+    if (s->mode == REPLICATION_MODE_SECONDARY) {
+        g_free(s->top_id);
+    }
+
+    replication_remove(s->rs);
+}
+
+static int64_t replication_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+static int replication_get_io_status(BDRVReplicationState *s)
+{
+    switch (s->replication_state) {
+    case BLOCK_REPLICATION_NONE:
+        return -EIO;
+    case BLOCK_REPLICATION_RUNNING:
+        return 0;
+    case BLOCK_REPLICATION_FAILOVER:
+        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
+    case BLOCK_REPLICATION_FAILOVER_FAILED:
+        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 1;
+    case BLOCK_REPLICATION_DONE:
+        /*
+         * active commit job completes, and active disk and secondary_disk
+         * is swapped, so we can operate bs->file directly
+         */
+        return s->mode == REPLICATION_MODE_PRIMARY ? -EIO : 0;
+    default:
+        abort();
+    }
+}
+
+static int replication_return_value(BDRVReplicationState *s, int ret)
+{
+    if (s->mode == REPLICATION_MODE_SECONDARY) {
+        return ret;
+    }
+
+    if (ret < 0) {
+        s->error = ret;
+        ret = 0;
+    }
+
+    return ret;
+}
+
+static coroutine_fn int replication_co_readv(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int remaining_sectors,
+                                             QEMUIOVector *qiov)
+{
+    BDRVReplicationState *s = bs->opaque;
+    BdrvChild *child = s->secondary_disk;
+    BlockJob *job = NULL;
+    CowRequest req;
+    int ret;
+
+    if (s->mode == REPLICATION_MODE_PRIMARY) {
+        /* We only use it to forward primary write requests */
+        return -EIO;
+    }
+
+    ret = replication_get_io_status(s);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (child && child->bs) {
+        job = child->bs->job;
+    }
+
+    if (job) {
+        backup_wait_for_overlapping_requests(child->bs->job, sector_num,
+                                             remaining_sectors);
+        backup_cow_request_begin(&req, child->bs->job, sector_num,
+                                 remaining_sectors);
+        ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors,
+                            qiov);
+        backup_cow_request_end(&req);
+        goto out;
+    }
+
+    ret = bdrv_co_readv(bs->file, sector_num, remaining_sectors, qiov);
+out:
+    return replication_return_value(s, ret);
+}
+
+static coroutine_fn int replication_co_writev(BlockDriverState *bs,
+                                              int64_t sector_num,
+                                              int remaining_sectors,
+                                              QEMUIOVector *qiov)
+{
+    BDRVReplicationState *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    uint64_t bytes_done = 0;
+    BdrvChild *top = bs->file;
+    BdrvChild *base = s->secondary_disk;
+    BdrvChild *target;
+    int ret, n;
+
+    ret = replication_get_io_status(s);
+    if (ret < 0) {
+        goto out;
+    }
+
+    if (ret == 0) {
+        ret = bdrv_co_writev(top, sector_num,
+                             remaining_sectors, qiov);
+        return replication_return_value(s, ret);
+    }
+
+    /*
+     * Failover failed, only write to active disk if the sectors
+     * have already been allocated in active disk/hidden disk.
+     */
+    qemu_iovec_init(&hd_qiov, qiov->niov);
+    while (remaining_sectors > 0) {
+        ret = bdrv_is_allocated_above(top->bs, base->bs, sector_num,
+                                      remaining_sectors, &n);
+        if (ret < 0) {
+            goto out1;
+        }
+
+        qemu_iovec_reset(&hd_qiov);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, n * BDRV_SECTOR_SIZE);
+
+        target = ret ? top : base;
+        ret = bdrv_co_writev(target, sector_num, n, &hd_qiov);
+        if (ret < 0) {
+            goto out1;
+        }
+
+        remaining_sectors -= n;
+        sector_num += n;
+        bytes_done += n * BDRV_SECTOR_SIZE;
+    }
+
+out1:
+    qemu_iovec_destroy(&hd_qiov);
+out:
+    return ret;
+}
+
+static bool replication_recurse_is_first_non_filter(BlockDriverState *bs,
+                                                    BlockDriverState *candidate)
+{
+    return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
+}
+
+static void secondary_do_checkpoint(BDRVReplicationState *s, Error **errp)
+{
+    Error *local_err = NULL;
+    int ret;
+
+    if (!s->secondary_disk->bs->job) {
+        error_setg(errp, "Backup job was cancelled unexpectedly");
+        return;
+    }
+
+    backup_do_checkpoint(s->secondary_disk->bs->job, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    ret = s->active_disk->bs->drv->bdrv_make_empty(s->active_disk->bs);
+    if (ret < 0) {
+        error_setg(errp, "Cannot make active disk empty");
+        return;
+    }
+
+    ret = s->hidden_disk->bs->drv->bdrv_make_empty(s->hidden_disk->bs);
+    if (ret < 0) {
+        error_setg(errp, "Cannot make hidden disk empty");
+        return;
+    }
+}
+
+static void reopen_backing_file(BlockDriverState *bs, bool writable,
+                                Error **errp)
+{
+    BDRVReplicationState *s = bs->opaque;
+    BlockReopenQueue *reopen_queue = NULL;
+    int orig_hidden_flags, orig_secondary_flags;
+    int new_hidden_flags, new_secondary_flags;
+    Error *local_err = NULL;
+
+    if (writable) {
+        orig_hidden_flags = s->orig_hidden_flags =
+                                bdrv_get_flags(s->hidden_disk->bs);
+        new_hidden_flags = (orig_hidden_flags | BDRV_O_RDWR) &
+                                                    ~BDRV_O_INACTIVE;
+        orig_secondary_flags = s->orig_secondary_flags =
+                                bdrv_get_flags(s->secondary_disk->bs);
+        new_secondary_flags = (orig_secondary_flags | BDRV_O_RDWR) &
+                                                     ~BDRV_O_INACTIVE;
+    } else {
+        orig_hidden_flags = (s->orig_hidden_flags | BDRV_O_RDWR) &
+                                                    ~BDRV_O_INACTIVE;
+        new_hidden_flags = s->orig_hidden_flags;
+        orig_secondary_flags = (s->orig_secondary_flags | BDRV_O_RDWR) &
+                                                    ~BDRV_O_INACTIVE;
+        new_secondary_flags = s->orig_secondary_flags;
+    }
+
+    if (orig_hidden_flags != new_hidden_flags) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, s->hidden_disk->bs, NULL,
+                                         new_hidden_flags);
+    }
+
+    if (!(orig_secondary_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, s->secondary_disk->bs,
+                                         NULL, new_secondary_flags);
+    }
+
+    if (reopen_queue) {
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs),
+                             reopen_queue, &local_err);
+        error_propagate(errp, local_err);
+    }
+}
+
+static void backup_job_cleanup(BlockDriverState *bs)
+{
+    BDRVReplicationState *s = bs->opaque;
+    BlockDriverState *top_bs;
+
+    top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
+    if (!top_bs) {
+        return;
+    }
+    bdrv_op_unblock_all(top_bs, s->blocker);
+    error_free(s->blocker);
+    reopen_backing_file(bs, false, NULL);
+}
+
+static void backup_job_completed(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    BDRVReplicationState *s = bs->opaque;
+
+    if (s->replication_state != BLOCK_REPLICATION_FAILOVER) {
+        /* The backup job is cancelled unexpectedly */
+        s->error = -EIO;
+    }
+
+    backup_job_cleanup(bs);
+}
+
+static bool check_top_bs(BlockDriverState *top_bs, BlockDriverState *bs)
+{
+    BdrvChild *child;
+
+    /* The bs itself is the top_bs */
+    if (top_bs == bs) {
+        return true;
+    }
+
+    /* Iterate over top_bs's children */
+    QLIST_FOREACH(child, &top_bs->children, next) {
+        if (child->bs == bs || check_top_bs(child->bs, bs)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void replication_start(ReplicationState *rs, ReplicationMode mode,
+                              Error **errp)
+{
+    BlockDriverState *bs = rs->opaque;
+    BDRVReplicationState *s;
+    BlockDriverState *top_bs;
+    int64_t active_length, hidden_length, disk_length;
+    AioContext *aio_context;
+    Error *local_err = NULL;
+    BlockJob *job;
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    s = bs->opaque;
+
+    if (s->replication_state != BLOCK_REPLICATION_NONE) {
+        error_setg(errp, "Block replication is running or done");
+        aio_context_release(aio_context);
+        return;
+    }
+
+    if (s->mode != mode) {
+        error_setg(errp, "The parameter mode's value is invalid, needs %d,"
+                   " but got %d", s->mode, mode);
+        aio_context_release(aio_context);
+        return;
+    }
+
+    switch (s->mode) {
+    case REPLICATION_MODE_PRIMARY:
+        break;
+    case REPLICATION_MODE_SECONDARY:
+        s->active_disk = bs->file;
+        if (!s->active_disk || !s->active_disk->bs ||
+                                    !s->active_disk->bs->backing) {
+            error_setg(errp, "Active disk doesn't have backing file");
+            aio_context_release(aio_context);
+            return;
+        }
+
+        s->hidden_disk = s->active_disk->bs->backing;
+        if (!s->hidden_disk->bs || !s->hidden_disk->bs->backing) {
+            error_setg(errp, "Hidden disk doesn't have backing file");
+            aio_context_release(aio_context);
+            return;
+        }
+
+        s->secondary_disk = s->hidden_disk->bs->backing;
+        if (!s->secondary_disk->bs || !bdrv_has_blk(s->secondary_disk->bs)) {
+            error_setg(errp, "The secondary disk doesn't have block backend");
+            aio_context_release(aio_context);
+            return;
+        }
+
+        /* verify the length */
+        active_length = bdrv_getlength(s->active_disk->bs);
+        hidden_length = bdrv_getlength(s->hidden_disk->bs);
+        disk_length = bdrv_getlength(s->secondary_disk->bs);
+        if (active_length < 0 || hidden_length < 0 || disk_length < 0 ||
+            active_length != hidden_length || hidden_length != disk_length) {
+            error_setg(errp, "Active disk, hidden disk, secondary disk's length"
+                       " are not the same");
+            aio_context_release(aio_context);
+            return;
+        }
+
+        if (!s->active_disk->bs->drv->bdrv_make_empty ||
+            !s->hidden_disk->bs->drv->bdrv_make_empty) {
+            error_setg(errp,
+                       "Active disk or hidden disk doesn't support make_empty");
+            aio_context_release(aio_context);
+            return;
+        }
+
+        /* reopen the backing file in r/w mode */
+        reopen_backing_file(bs, true, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            aio_context_release(aio_context);
+            return;
+        }
+
+        /* start backup job now */
+        error_setg(&s->blocker,
+                   "Block device is in use by internal backup job");
+
+        top_bs = bdrv_lookup_bs(s->top_id, s->top_id, NULL);
+        if (!top_bs || !bdrv_is_root_node(top_bs) ||
+            !check_top_bs(top_bs, bs)) {
+            error_setg(errp, "No top_bs or it is invalid");
+            reopen_backing_file(bs, false, NULL);
+            aio_context_release(aio_context);
+            return;
+        }
+        bdrv_op_block_all(top_bs, s->blocker);
+        bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
+
+        job = backup_job_create(NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+                                0, MIRROR_SYNC_MODE_NONE, NULL, false,
+                                BLOCKDEV_ON_ERROR_REPORT,
+                                BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
+                                backup_job_completed, bs, NULL, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            backup_job_cleanup(bs);
+            aio_context_release(aio_context);
+            return;
+        }
+        block_job_start(job);
+        break;
+    default:
+        aio_context_release(aio_context);
+        abort();
+    }
+
+    s->replication_state = BLOCK_REPLICATION_RUNNING;
+
+    if (s->mode == REPLICATION_MODE_SECONDARY) {
+        secondary_do_checkpoint(s, errp);
+    }
+
+    s->error = 0;
+    aio_context_release(aio_context);
+}
+
+static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
+{
+    BlockDriverState *bs = rs->opaque;
+    BDRVReplicationState *s;
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    s = bs->opaque;
+
+    if (s->mode == REPLICATION_MODE_SECONDARY) {
+        secondary_do_checkpoint(s, errp);
+    }
+    aio_context_release(aio_context);
+}
+
+static void replication_get_error(ReplicationState *rs, Error **errp)
+{
+    BlockDriverState *bs = rs->opaque;
+    BDRVReplicationState *s;
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    s = bs->opaque;
+
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+        error_setg(errp, "Block replication is not running");
+        aio_context_release(aio_context);
+        return;
+    }
+
+    if (s->error) {
+        error_setg(errp, "I/O error occurred");
+        aio_context_release(aio_context);
+        return;
+    }
+    aio_context_release(aio_context);
+}
+
+static void replication_done(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    BDRVReplicationState *s = bs->opaque;
+
+    if (ret == 0) {
+        s->replication_state = BLOCK_REPLICATION_DONE;
+
+        /* refresh top bs's filename */
+        bdrv_refresh_filename(bs);
+        s->active_disk = NULL;
+        s->secondary_disk = NULL;
+        s->hidden_disk = NULL;
+        s->error = 0;
+    } else {
+        s->replication_state = BLOCK_REPLICATION_FAILOVER_FAILED;
+        s->error = -EIO;
+    }
+}
+
+static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
+{
+    BlockDriverState *bs = rs->opaque;
+    BDRVReplicationState *s;
+    AioContext *aio_context;
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    s = bs->opaque;
+
+    if (s->replication_state != BLOCK_REPLICATION_RUNNING) {
+        error_setg(errp, "Block replication is not running");
+        aio_context_release(aio_context);
+        return;
+    }
+
+    switch (s->mode) {
+    case REPLICATION_MODE_PRIMARY:
+        s->replication_state = BLOCK_REPLICATION_DONE;
+        s->error = 0;
+        break;
+    case REPLICATION_MODE_SECONDARY:
+        /*
+         * This BDS will be closed, and the job should be completed
+         * before the BDS is closed, because we will access hidden
+         * disk, secondary disk in backup_job_completed().
+         */
+        if (s->secondary_disk->bs->job) {
+            block_job_cancel_sync(s->secondary_disk->bs->job);
+        }
+
+        if (!failover) {
+            secondary_do_checkpoint(s, errp);
+            s->replication_state = BLOCK_REPLICATION_DONE;
+            aio_context_release(aio_context);
+            return;
+        }
+
+        s->replication_state = BLOCK_REPLICATION_FAILOVER;
+        commit_active_start(NULL, s->active_disk->bs, s->secondary_disk->bs,
+                            BLOCK_JOB_INTERNAL, 0, BLOCKDEV_ON_ERROR_REPORT,
+                            replication_done, bs, errp, true);
+        break;
+    default:
+        aio_context_release(aio_context);
+        abort();
+    }
+    aio_context_release(aio_context);
+}
+
+BlockDriver bdrv_replication = {
+    .format_name                = "replication",
+    .protocol_name              = "replication",
+    .instance_size              = sizeof(BDRVReplicationState),
+
+    .bdrv_open                  = replication_open,
+    .bdrv_close                 = replication_close,
+
+    .bdrv_getlength             = replication_getlength,
+    .bdrv_co_readv              = replication_co_readv,
+    .bdrv_co_writev             = replication_co_writev,
+
+    .is_filter                  = true,
+    .bdrv_recurse_is_first_non_filter = replication_recurse_is_first_non_filter,
+
+    .has_variable_length        = true,
+};
+
+static void bdrv_replication_init(void)
+{
+    bdrv_register(&bdrv_replication);
+}
+
+block_init(bdrv_replication_init);
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 66e1cb2b2d..4c9af89180 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -641,6 +641,7 @@ static void restart_co_req(void *opaque)
 
 typedef struct SheepdogReqCo {
     int sockfd;
+    BlockDriverState *bs;
     AioContext *aio_context;
     SheepdogReq *hdr;
     void *data;
@@ -701,6 +702,9 @@ static coroutine_fn void do_co_req(void *opaque)
 
     srco->ret = ret;
     srco->finished = true;
+    if (srco->bs) {
+        bdrv_wakeup(srco->bs);
+    }
 }
 
 /*
@@ -708,13 +712,14 @@ static coroutine_fn void do_co_req(void *opaque)
  *
  * Return 0 on success, -errno in case of error.
  */
-static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+static int do_req(int sockfd, BlockDriverState *bs, SheepdogReq *hdr,
                   void *data, unsigned int *wlen, unsigned int *rlen)
 {
     Coroutine *co;
     SheepdogReqCo srco = {
         .sockfd = sockfd,
-        .aio_context = aio_context,
+        .aio_context = bs ? bdrv_get_aio_context(bs) : qemu_get_aio_context(),
+        .bs = bs,
         .hdr = hdr,
         .data = data,
         .wlen = wlen,
@@ -727,9 +732,14 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
         do_co_req(&srco);
     } else {
         co = qemu_coroutine_create(do_co_req, &srco);
-        qemu_coroutine_enter(co);
-        while (!srco.finished) {
-            aio_poll(aio_context, true);
+        if (bs) {
+            qemu_coroutine_enter(co);
+            BDRV_POLL_WHILE(bs, !srco.finished);
+        } else {
+            qemu_coroutine_enter(co);
+            while (!srco.finished) {
+                aio_poll(qemu_get_aio_context(), true);
+            }
         }
     }
 
@@ -1049,7 +1059,7 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
     const char *host_spec, *vdi_spec;
     int nr_sep, ret;
 
-    strstart(filename, "sheepdog:", (const char **)&filename);
+    strstart(filename, "sheepdog:", &filename);
     p = q = g_strdup(filename);
 
     /* count the number of separators */
@@ -1125,7 +1135,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
     hdr.snapid = snapid;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_setg_errno(errp, -ret, "cannot get vdi info");
         goto out;
@@ -1240,7 +1250,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
     qemu_co_mutex_unlock(&s->lock);
 }
 
-static int read_write_object(int fd, AioContext *aio_context, char *buf,
+static int read_write_object(int fd, BlockDriverState *bs, char *buf,
                              uint64_t oid, uint8_t copies,
                              unsigned int datalen, uint64_t offset,
                              bool write, bool create, uint32_t cache_flags)
@@ -1274,7 +1284,7 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
     hdr.offset = offset;
     hdr.copies = copies;
 
-    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, bs, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
     if (ret) {
         error_report("failed to send a request to the sheep");
         return ret;
@@ -1289,22 +1299,22 @@ static int read_write_object(int fd, AioContext *aio_context, char *buf,
     }
 }
 
-static int read_object(int fd, AioContext *aio_context, char *buf,
+static int read_object(int fd, BlockDriverState *bs, char *buf,
                        uint64_t oid, uint8_t copies,
                        unsigned int datalen, uint64_t offset,
                        uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                              datalen, offset, false,
                              false, cache_flags);
 }
 
-static int write_object(int fd, AioContext *aio_context, char *buf,
+static int write_object(int fd, BlockDriverState *bs, char *buf,
                         uint64_t oid, uint8_t copies,
                         unsigned int datalen, uint64_t offset, bool create,
                         uint32_t cache_flags)
 {
-    return read_write_object(fd, aio_context, buf, oid, copies,
+    return read_write_object(fd, bs, buf, oid, copies,
                              datalen, offset, true,
                              create, cache_flags);
 }
@@ -1331,7 +1341,7 @@ static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
         goto out;
     }
 
-    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, (char *)inode, vid_to_vdi_oid(vid),
                       s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
                       s->cache_flags);
     if (ret < 0) {
@@ -1489,7 +1499,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                       0, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
@@ -1618,7 +1628,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     hdr.copies = s->inode.nr_copies;
     hdr.block_size_shift = s->inode.block_size_shift;
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+    ret = do_req(fd, NULL, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
 
     closesocket(fd);
 
@@ -1886,7 +1896,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
         hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
         hdr.proto_ver = SD_PROTO_VER;
 
-        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+        ret = do_req(fd, NULL, (SheepdogReq *)&hdr,
                      NULL, &wlen, &rlen);
         closesocket(fd);
         if (ret) {
@@ -1951,7 +1961,7 @@ static void sd_close(BlockDriverState *bs)
     hdr.data_length = wlen;
     hdr.flags = SD_FLAG_CMD_WRITE;
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                  s->name, &wlen, &rlen);
 
     closesocket(fd);
@@ -2000,7 +2010,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
     /* we don't need to update entire object */
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                        vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                        datalen, 0, false, s->cache_flags);
     close(fd);
@@ -2070,7 +2080,7 @@ static bool sd_delete(BDRVSheepdogState *s)
         return false;
     }
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                  s->name, &wlen, &rlen);
     closesocket(fd);
     if (ret) {
@@ -2126,7 +2136,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
         goto out;
     }
 
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+    ret = read_object(fd, s->bs, buf, vid_to_vdi_oid(vid),
                       s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
 
     closesocket(fd);
@@ -2411,7 +2421,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
+    ret = write_object(fd, s->bs, (char *)&s->inode,
                        vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
                        datalen, 0, false, s->cache_flags);
     if (ret < 0) {
@@ -2426,7 +2436,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         goto cleanup;
     }
 
-    ret = read_object(fd, s->aio_context, (char *)inode,
+    ret = read_object(fd, s->bs, (char *)inode,
                       vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
                       s->cache_flags);
 
@@ -2528,7 +2538,7 @@ static bool remove_objects(BDRVSheepdogState *s)
             i++;
         }
 
-        ret = write_object(fd, s->aio_context,
+        ret = write_object(fd, s->bs,
                            (char *)&inode->data_vdi_id[start_idx],
                            vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
                            (i - start_idx) * sizeof(uint32_t),
@@ -2600,7 +2610,7 @@ static int sd_snapshot_delete(BlockDriverState *bs,
         return -1;
     }
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+    ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
                  buf, &wlen, &rlen);
     closesocket(fd);
     if (ret) {
@@ -2652,8 +2662,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     req.opcode = SD_OP_READ_VDIS;
     req.data_length = max;
 
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
-                 vdi_inuse, &wlen, &rlen);
+    ret = do_req(fd, s->bs, &req, vdi_inuse, &wlen, &rlen);
 
     closesocket(fd);
     if (ret) {
@@ -2679,7 +2688,7 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
         }
 
         /* we don't need to read entire object */
-        ret = read_object(fd, s->aio_context, (char *)&inode,
+        ret = read_object(fd, s->bs, (char *)&inode,
                           vid_to_vdi_oid(vid),
                           0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
                           s->cache_flags);
@@ -2745,11 +2754,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
 
         create = (offset == 0);
         if (load) {
-            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = read_object(fd, s->bs, (char *)data, vmstate_oid,
                               s->inode.nr_copies, data_len, offset,
                               s->cache_flags);
         } else {
-            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
+            ret = write_object(fd, s->bs, (char *)data, vmstate_oid,
                                s->inode.nr_copies, data_len, offset, create,
                                s->cache_flags);
         }
@@ -2820,8 +2829,9 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
     iov.iov_len = sizeof(zero);
     discard_iov.iov = &iov;
     discard_iov.niov = 1;
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
+    if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
+        return -ENOTSUP;
+    }
     acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
                        count >> BDRV_SECTOR_BITS);
     acb->aiocb_type = AIOCB_DISCARD_OBJ;
diff --git a/block/ssh.c b/block/ssh.c
index 5ce12b633a..15ed2818c5 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -30,10 +30,14 @@
 #include "block/block_int.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
 #include "qemu/sockets.h"
 #include "qemu/uri.h"
+#include "qapi-visit.h"
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 
 /* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
  * this block driver code.
@@ -74,8 +78,9 @@ typedef struct BDRVSSHState {
      */
     LIBSSH2_SFTP_ATTRIBUTES attrs;
 
+    InetSocketAddress *inet;
+
     /* Used to warn if 'flush' is not supported. */
-    char *hostport;
     bool unsafe_flush_warning;
 } BDRVSSHState;
 
@@ -89,7 +94,6 @@ static void ssh_state_init(BDRVSSHState *s)
 
 static void ssh_state_free(BDRVSSHState *s)
 {
-    g_free(s->hostport);
     if (s->sftp_handle) {
         libssh2_sftp_close(s->sftp_handle);
     }
@@ -193,6 +197,7 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
 {
     URI *uri = NULL;
     QueryParams *qp;
+    char *port_str;
     int i;
 
     uri = uri_parse(filename);
@@ -225,11 +230,11 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
         qdict_put(options, "user", qstring_from_str(uri->user));
     }
 
-    qdict_put(options, "host", qstring_from_str(uri->server));
+    qdict_put(options, "server.host", qstring_from_str(uri->server));
 
-    if (uri->port) {
-        qdict_put(options, "port", qint_from_int(uri->port));
-    }
+    port_str = g_strdup_printf("%d", uri->port ?: 22);
+    qdict_put(options, "server.port", qstring_from_str(port_str));
+    g_free(port_str);
 
     qdict_put(options, "path", qstring_from_str(uri->path));
 
@@ -254,15 +259,31 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
     return -EINVAL;
 }
 
+static bool ssh_has_filename_options_conflict(QDict *options, Error **errp)
+{
+    const QDictEntry *qe;
+
+    for (qe = qdict_first(options); qe; qe = qdict_next(options, qe)) {
+        if (!strcmp(qe->key, "host") ||
+            !strcmp(qe->key, "port") ||
+            !strcmp(qe->key, "path") ||
+            !strcmp(qe->key, "user") ||
+            !strcmp(qe->key, "host_key_check") ||
+            strstart(qe->key, "server.", NULL))
+        {
+            error_setg(errp, "Option '%s' cannot be used with a file name",
+                       qe->key);
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static void ssh_parse_filename(const char *filename, QDict *options,
                                Error **errp)
 {
-    if (qdict_haskey(options, "user") ||
-        qdict_haskey(options, "host") ||
-        qdict_haskey(options, "port") ||
-        qdict_haskey(options, "path") ||
-        qdict_haskey(options, "host_key_check")) {
-        error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option");
+    if (ssh_has_filename_options_conflict(options, errp)) {
         return;
     }
 
@@ -540,14 +561,68 @@ static QemuOptsList ssh_runtime_opts = {
     },
 };
 
+static bool ssh_process_legacy_socket_options(QDict *output_opts,
+                                              QemuOpts *legacy_opts,
+                                              Error **errp)
+{
+    const char *host = qemu_opt_get(legacy_opts, "host");
+    const char *port = qemu_opt_get(legacy_opts, "port");
+
+    if (!host && port) {
+        error_setg(errp, "port may not be used without host");
+        return false;
+    }
+
+    if (host) {
+        qdict_put(output_opts, "server.host", qstring_from_str(host));
+        qdict_put(output_opts, "server.port",
+                  qstring_from_str(port ?: stringify(22)));
+    }
+
+    return true;
+}
+
+static InetSocketAddress *ssh_config(QDict *options, Error **errp)
+{
+    InetSocketAddress *inet = NULL;
+    QDict *addr = NULL;
+    QObject *crumpled_addr = NULL;
+    Visitor *iv = NULL;
+    Error *local_error = NULL;
+
+    qdict_extract_subqdict(options, &addr, "server.");
+    if (!qdict_size(addr)) {
+        error_setg(errp, "SSH server address missing");
+        goto out;
+    }
+
+    crumpled_addr = qdict_crumple(addr, errp);
+    if (!crumpled_addr) {
+        goto out;
+    }
+
+    iv = qobject_input_visitor_new(crumpled_addr, true);
+    visit_type_InetSocketAddress(iv, NULL, &inet, &local_error);
+    if (local_error) {
+        error_propagate(errp, local_error);
+        goto out;
+    }
+
+out:
+    QDECREF(addr);
+    qobject_decref(crumpled_addr);
+    visit_free(iv);
+    return inet;
+}
+
 static int connect_to_ssh(BDRVSSHState *s, QDict *options,
                           int ssh_flags, int creat_mode, Error **errp)
 {
     int r, ret;
     QemuOpts *opts = NULL;
     Error *local_err = NULL;
-    const char *host, *user, *path, *host_key_check;
-    int port;
+    const char *user, *path, *host_key_check;
+    long port = 0;
 
     opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -557,15 +632,11 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
         goto err;
     }
 
-    host = qemu_opt_get(opts, "host");
-    if (!host) {
+    if (!ssh_process_legacy_socket_options(options, opts, errp)) {
         ret = -EINVAL;
-        error_setg(errp, "No hostname was specified");
         goto err;
     }
 
-    port = qemu_opt_get_number(opts, "port", 22);
-
     path = qemu_opt_get(opts, "path");
     if (!path) {
         ret = -EINVAL;
@@ -588,12 +659,21 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
         host_key_check = "yes";
     }
 
-    /* Construct the host:port name for inet_connect. */
-    g_free(s->hostport);
-    s->hostport = g_strdup_printf("%s:%d", host, port);
+    /* Pop the config into our state object, Exit if invalid */
+    s->inet = ssh_config(options, errp);
+    if (!s->inet) {
+        ret = -EINVAL;
+        goto err;
+    }
+
+    if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
+        error_setg(errp, "Use only numeric port value");
+        ret = -EINVAL;
+        goto err;
+    }
 
     /* Open the socket and connect. */
-    s->sock = inet_connect(s->hostport, errp);
+    s->sock = inet_connect_saddr(s->inet, errp, NULL, NULL);
     if (s->sock < 0) {
         ret = -EIO;
         goto err;
@@ -619,7 +699,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
     }
 
     /* Check the remote host's key against known_hosts. */
-    ret = check_host_key(s, host, port, host_key_check, errp);
+    ret = check_host_key(s, s->inet->host, port, host_key_check,
+                         errp);
     if (ret < 0) {
         goto err;
     }
@@ -1040,7 +1121,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
 {
     if (!s->unsafe_flush_warning) {
         error_report("warning: ssh server %s does not support fsync",
-                     s->hostport);
+                     s->inet->host);
         if (what) {
             error_report("to support fsync, you need %s", what);
         }
diff --git a/block/stream.c b/block/stream.c
index 31874817c2..1523ba7dfb 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -14,7 +14,7 @@
 #include "qemu/osdep.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -37,6 +37,7 @@ typedef struct StreamBlockJob {
     BlockDriverState *base;
     BlockdevOnError on_error;
     char *backing_file_str;
+    int bs_flags;
 } StreamBlockJob;
 
 static int coroutine_fn stream_populate(BlockBackend *blk,
@@ -81,6 +82,11 @@ static void stream_complete(BlockJob *job, void *opaque)
         bdrv_set_backing_hd(bs, base);
     }
 
+    /* Reopen the image back in read-only mode if necessary */
+    if (s->bs_flags != bdrv_get_flags(bs)) {
+        bdrv_reopen(bs, s->bs_flags, NULL);
+    }
+
     g_free(s->backing_file_str);
     block_job_completed(&s->common, data->ret);
     g_free(data);
@@ -212,26 +218,43 @@ static const BlockJobDriver stream_job_driver = {
     .instance_size = sizeof(StreamBlockJob),
     .job_type      = BLOCK_JOB_TYPE_STREAM,
     .set_speed     = stream_set_speed,
+    .start         = stream_run,
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error,
-                  BlockCompletionFunc *cb, void *opaque, Error **errp)
+                  int64_t speed, BlockdevOnError on_error, Error **errp)
 {
     StreamBlockJob *s;
+    BlockDriverState *iter;
+    int orig_bs_flags;
 
     s = block_job_create(job_id, &stream_job_driver, bs, speed,
-                         cb, opaque, errp);
+                         BLOCK_JOB_DEFAULT, NULL, NULL, errp);
     if (!s) {
         return;
     }
 
+    /* Make sure that the image is opened in read-write mode */
+    orig_bs_flags = bdrv_get_flags(bs);
+    if (!(orig_bs_flags & BDRV_O_RDWR)) {
+        if (bdrv_reopen(bs, orig_bs_flags | BDRV_O_RDWR, errp) != 0) {
+            block_job_unref(&s->common);
+            return;
+        }
+    }
+
+    /* Block all intermediate nodes between bs and base, because they
+     * will disappear from the chain after this operation */
+    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
+        block_job_add_bdrv(&s->common, iter);
+    }
+
     s->base = base;
     s->backing_file_str = g_strdup(backing_file_str);
+    s->bs_flags = orig_bs_flags;
 
     s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(stream_run, s);
-    trace_stream_start(bs, base, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co);
+    trace_stream_start(bs, base, s);
+    block_job_start(&s->common);
 }
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 59545e287e..17b2efb7c7 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -168,6 +168,22 @@ static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
     return blk_by_public(next);
 }
 
+/*
+ * Return whether a BlockBackend has pending requests.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @blk: the BlockBackend
+ * @is_write:  the type of operation (read/write)
+ * @ret:       whether the BlockBackend has pending requests.
+ */
+static inline bool blk_has_pending_reqs(BlockBackend *blk,
+                                        bool is_write)
+{
+    const BlockBackendPublic *blkp = blk_get_public(blk);
+    return blkp->pending_reqs[is_write];
+}
+
 /* Return the next BlockBackend in the round-robin sequence with pending I/O
  * requests.
  *
@@ -188,7 +204,7 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
 
     /* get next bs round in round robin style */
     token = throttle_group_next_blk(token);
-    while (token != start && !blkp->pending_reqs[is_write]) {
+    while (token != start && !blk_has_pending_reqs(token, is_write)) {
         token = throttle_group_next_blk(token);
     }
 
@@ -196,10 +212,13 @@ static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
      * then decide the token is the current bs because chances are
      * the current bs get the current request queued.
      */
-    if (token == start && !blkp->pending_reqs[is_write]) {
+    if (token == start && !blk_has_pending_reqs(token, is_write)) {
         token = blk;
     }
 
+    /* Either we return the original BB, or one with pending requests */
+    assert(token == blk || blk_has_pending_reqs(token, is_write));
+
     return token;
 }
 
@@ -257,7 +276,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
 
     /* Check if there's any pending request to schedule next */
     token = next_throttle_token(blk, is_write);
-    if (!blkp->pending_reqs[is_write]) {
+    if (!blk_has_pending_reqs(token, is_write)) {
         return;
     }
 
@@ -271,7 +290,7 @@ static void schedule_next_request(BlockBackend *blk, bool is_write)
             qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
             token = blk;
         } else {
-            ThrottleTimers *tt = &blkp->throttle_timers;
+            ThrottleTimers *tt = &blk_get_public(token)->throttle_timers;
             int64_t now = qemu_clock_get_ns(tt->clock_type);
             timer_mod(tt->timers[is_write], now + 1);
             tg->any_timer_armed[is_write] = true;
diff --git a/block/trace-events b/block/trace-events
index 05fa13c891..cfc05f2478 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -9,7 +9,6 @@ blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags
 blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
 
 # block/io.c
-bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
 bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
 bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
 bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
@@ -20,14 +19,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t c
 
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+stream_start(void *bs, void *base, void *s) "bs %p base %p s %p"
 
 # block/commit.c
 commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
-commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
+commit_start(void *bs, void *base, void *top, void *s) "bs %p base %p top %p s %p"
 
 # block/mirror.c
-mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_start(void *bs, void *s, void *opaque) "bs %p s %p opaque %p"
 mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
 mirror_before_flush(void *s) "s %p"
 mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
@@ -52,7 +51,6 @@ qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
-block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # block/raw-win32.c
diff --git a/block/vdi.c b/block/vdi.c
index 8a1cf97928..96b78d5a43 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -58,14 +58,7 @@
 #include "migration/migration.h"
 #include "qemu/coroutine.h"
 #include "qemu/cutils.h"
-
-#if defined(CONFIG_UUID)
-#include <uuid/uuid.h>
-#else
-/* TODO: move uuid emulation to some central place in QEMU. */
-#include "sysemu/sysemu.h"     /* UUID_FMT */
-typedef unsigned char uuid_t[16];
-#endif
+#include "qemu/uuid.h"
 
 /* Code configuration options. */
 
@@ -140,28 +133,6 @@ typedef unsigned char uuid_t[16];
 #define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
                                   (uint64_t)DEFAULT_CLUSTER_SIZE)
 
-#if !defined(CONFIG_UUID)
-static inline void uuid_generate(uuid_t out)
-{
-    memset(out, 0, sizeof(uuid_t));
-}
-
-static inline int uuid_is_null(const uuid_t uu)
-{
-    uuid_t null_uuid = { 0 };
-    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
-}
-
-# if defined(CONFIG_VDI_DEBUG)
-static inline void uuid_unparse(const uuid_t uu, char *out)
-{
-    snprintf(out, 37, UUID_FMT,
-            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
-            uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
-}
-# endif
-#endif
-
 typedef struct {
     char text[0x40];
     uint32_t signature;
@@ -182,10 +153,10 @@ typedef struct {
     uint32_t block_extra;       /* unused here */
     uint32_t blocks_in_image;
     uint32_t blocks_allocated;
-    uuid_t uuid_image;
-    uuid_t uuid_last_snap;
-    uuid_t uuid_link;
-    uuid_t uuid_parent;
+    QemuUUID uuid_image;
+    QemuUUID uuid_last_snap;
+    QemuUUID uuid_link;
+    QemuUUID uuid_parent;
     uint64_t unused2[7];
 } QEMU_PACKED VdiHeader;
 
@@ -206,16 +177,6 @@ typedef struct {
     Error *migration_blocker;
 } BDRVVdiState;
 
-/* Change UUID from little endian (IPRT = VirtualBox format) to big endian
- * format (network byte order, standard, see RFC 4122) and vice versa.
- */
-static void uuid_convert(uuid_t uuid)
-{
-    bswap32s((uint32_t *)&uuid[0]);
-    bswap16s((uint16_t *)&uuid[4]);
-    bswap16s((uint16_t *)&uuid[6]);
-}
-
 static void vdi_header_to_cpu(VdiHeader *header)
 {
     le32_to_cpus(&header->signature);
@@ -234,10 +195,10 @@ static void vdi_header_to_cpu(VdiHeader *header)
     le32_to_cpus(&header->block_extra);
     le32_to_cpus(&header->blocks_in_image);
     le32_to_cpus(&header->blocks_allocated);
-    uuid_convert(header->uuid_image);
-    uuid_convert(header->uuid_last_snap);
-    uuid_convert(header->uuid_link);
-    uuid_convert(header->uuid_parent);
+    qemu_uuid_bswap(&header->uuid_image);
+    qemu_uuid_bswap(&header->uuid_last_snap);
+    qemu_uuid_bswap(&header->uuid_link);
+    qemu_uuid_bswap(&header->uuid_parent);
 }
 
 static void vdi_header_to_le(VdiHeader *header)
@@ -258,10 +219,10 @@ static void vdi_header_to_le(VdiHeader *header)
     cpu_to_le32s(&header->block_extra);
     cpu_to_le32s(&header->blocks_in_image);
     cpu_to_le32s(&header->blocks_allocated);
-    uuid_convert(header->uuid_image);
-    uuid_convert(header->uuid_last_snap);
-    uuid_convert(header->uuid_link);
-    uuid_convert(header->uuid_parent);
+    qemu_uuid_bswap(&header->uuid_image);
+    qemu_uuid_bswap(&header->uuid_last_snap);
+    qemu_uuid_bswap(&header->uuid_link);
+    qemu_uuid_bswap(&header->uuid_parent);
 }
 
 #if defined(CONFIG_VDI_DEBUG)
@@ -469,11 +430,11 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
                    (uint64_t)header.blocks_in_image * header.block_size);
         ret = -ENOTSUP;
         goto fail;
-    } else if (!uuid_is_null(header.uuid_link)) {
+    } else if (!qemu_uuid_is_null(&header.uuid_link)) {
         error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
         ret = -ENOTSUP;
         goto fail;
-    } else if (!uuid_is_null(header.uuid_parent)) {
+    } else if (!qemu_uuid_is_null(&header.uuid_parent)) {
         error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
         ret = -ENOTSUP;
         goto fail;
@@ -821,8 +782,8 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
     if (image_type == VDI_TYPE_STATIC) {
         header.blocks_allocated = blocks;
     }
-    uuid_generate(header.uuid_image);
-    uuid_generate(header.uuid_last_snap);
+    qemu_uuid_generate(&header.uuid_image);
+    qemu_uuid_generate(&header.uuid_last_snap);
     /* There is no need to set header.uuid_link or header.uuid_parent here. */
 #if defined(CONFIG_VDI_DEBUG)
     vdi_header_print(&header);
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index c306b90d54..429d7556bd 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -21,9 +21,6 @@
 #include "qemu/bswap.h"
 #include "block/vhdx.h"
 
-#include <uuid/uuid.h>
-
-
 /*
  * All the VHDX formats on disk are little endian - the following
  * are helper import/export functions to correctly convert
diff --git a/block/vhdx.c b/block/vhdx.c
index 75ef2b1c2d..0ba2f0a2f9 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -25,8 +25,7 @@
 #include "qemu/bswap.h"
 #include "block/vhdx.h"
 #include "migration/migration.h"
-
-#include <uuid/uuid.h>
+#include "qemu/uuid.h"
 
 /* Options for VHDX creation */
 
@@ -213,11 +212,11 @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
  */
 void vhdx_guid_generate(MSGUID *guid)
 {
-    uuid_t uuid;
+    QemuUUID uuid;
     assert(guid != NULL);
 
-    uuid_generate(uuid);
-    memcpy(guid, uuid, sizeof(MSGUID));
+    qemu_uuid_generate(&uuid);
+    memcpy(guid, &uuid, sizeof(MSGUID));
 }
 
 /* Check for region overlaps inside the VHDX image */
diff --git a/block/vmdk.c b/block/vmdk.c
index 46d474e442..a11c27a1c4 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1645,56 +1645,11 @@ vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
     return ret;
 }
 
-typedef struct VmdkWriteCompressedCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    const uint8_t *buf;
-    int nb_sectors;
-    int ret;
-} VmdkWriteCompressedCo;
-
-static void vmdk_co_write_compressed(void *opaque)
-{
-    VmdkWriteCompressedCo *co = opaque;
-    QEMUIOVector local_qiov;
-    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-    struct iovec iov = (struct iovec) {
-        .iov_base   = (uint8_t*) co->buf,
-        .iov_len    = bytes,
-    };
-    qemu_iovec_init_external(&local_qiov, &iov, 1);
-
-    co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
-}
-
-static int vmdk_write_compressed(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 const uint8_t *buf,
-                                 int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                           uint64_t bytes, QEMUIOVector *qiov)
 {
-    BDRVVmdkState *s = bs->opaque;
-
-    if (s->num_extents == 1 && s->extents[0].compressed) {
-        Coroutine *co;
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-        VmdkWriteCompressedCo data = {
-            .bs         = bs,
-            .sector_num = sector_num,
-            .buf        = buf,
-            .nb_sectors = nb_sectors,
-            .ret        = -EINPROGRESS,
-        };
-        co = qemu_coroutine_create(vmdk_co_write_compressed, &data);
-        qemu_coroutine_enter(co);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(aio_context, true);
-        }
-        return data.ret;
-    } else {
-        return -ENOTSUP;
-    }
+    return vmdk_co_pwritev(bs, offset, bytes, qiov, 0);
 }
 
 static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
@@ -2393,7 +2348,7 @@ static BlockDriver bdrv_vmdk = {
     .bdrv_reopen_prepare          = vmdk_reopen_prepare,
     .bdrv_co_preadv               = vmdk_co_preadv,
     .bdrv_co_pwritev              = vmdk_co_pwritev,
-    .bdrv_write_compressed        = vmdk_write_compressed,
+    .bdrv_co_pwritev_compressed   = vmdk_co_pwritev_compressed,
     .bdrv_co_pwrite_zeroes        = vmdk_co_pwrite_zeroes,
     .bdrv_close                   = vmdk_close,
     .bdrv_create                  = vmdk_create,
diff --git a/block/vpc.c b/block/vpc.c
index 43707ed22c..8d5886f003 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -30,9 +30,7 @@
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include "qemu/bswap.h"
-#if defined(CONFIG_UUID)
-#include <uuid/uuid.h>
-#endif
+#include "qemu/uuid.h"
 
 /**************************************************************/
 
@@ -89,7 +87,7 @@ typedef struct vhd_footer {
     uint32_t    checksum;
 
     /* UUID used to identify a parent hard disk (backing file) */
-    uint8_t     uuid[16];
+    QemuUUID    uuid;
 
     uint8_t     in_saved_state;
 } QEMU_PACKED VHDFooter;
@@ -980,9 +978,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
 
     footer->type = cpu_to_be32(disk_type);
 
-#if defined(CONFIG_UUID)
-    uuid_generate(footer->uuid);
-#endif
+    qemu_uuid_generate(&footer->uuid);
 
     footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
 
diff --git a/block/vvfat.c b/block/vvfat.c
index ba2620f3c2..ded21092ee 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -2971,7 +2971,8 @@ static BlockDriver vvfat_write_target = {
 static void vvfat_qcow_options(int *child_flags, QDict *child_options,
                                int parent_flags, QDict *parent_options)
 {
-    *child_flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
+    qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "off");
+    *child_flags = BDRV_O_NO_FLUSH;
 }
 
 static const BdrvChildRole child_vvfat_qcow = {
diff --git a/block/write-threshold.c b/block/write-threshold.c
index cc2ca71835..0bd1a01c86 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -76,8 +76,7 @@ static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
 static void write_threshold_register_notifier(BlockDriverState *bs)
 {
     bs->write_threshold_notifier.notify = before_write_notify;
-    notifier_with_return_list_add(&bs->before_write_notifiers,
-                                  &bs->write_threshold_notifier);
+    bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier);
 }
 
 static void write_threshold_update(BlockDriverState *bs,
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 12cae0ea72..81bca1760f 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -44,6 +44,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
         return TRUE;
     }
 
+    qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
     nbd_client_new(NULL, cioc,
                    nbd_server->tlscreds, NULL,
                    nbd_client_put);
@@ -111,6 +112,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
     nbd_server = g_new0(NBDServerData, 1);
     nbd_server->watch = -1;
     nbd_server->listen_ioc = qio_channel_socket_new();
+    qio_channel_set_name(QIO_CHANNEL(nbd_server->listen_ioc),
+                         "nbd-listener");
     if (qio_channel_socket_listen_sync(
             nbd_server->listen_ioc, addr, errp) < 0) {
         goto error;
@@ -145,7 +148,8 @@ void qmp_nbd_server_start(SocketAddress *addr,
 void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
                         Error **errp)
 {
-    BlockBackend *blk;
+    BlockDriverState *bs = NULL;
+    BlockBackend *on_eject_blk;
     NBDExport *exp;
 
     if (!nbd_server) {
@@ -158,26 +162,22 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
         return;
     }
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
-        return;
-    }
-    if (!blk_is_inserted(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+    on_eject_blk = blk_by_name(device);
+
+    bs = bdrv_lookup_bs(device, device, errp);
+    if (!bs) {
         return;
     }
 
     if (!has_writable) {
         writable = false;
     }
-    if (blk_is_read_only(blk)) {
+    if (bdrv_is_read_only(bs)) {
         writable = false;
     }
 
-    exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
-                         errp);
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         NULL, false, on_eject_blk, errp);
     if (!exp) {
         return;
     }
diff --git a/blockdev.c b/blockdev.c
index 21614004d1..245e1e1d17 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -43,7 +43,7 @@
 #include "qapi/qmp/types.h"
 #include "qapi-visit.h"
 #include "qapi/qmp/qerror.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/util.h"
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
@@ -56,7 +56,8 @@
 static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
     QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
 
-static int do_open_tray(const char *device, bool force, Error **errp);
+static int do_open_tray(const char *blk_name, const char *qdev_id,
+                        bool force, Error **errp);
 
 static const char *const if_name[IF_COUNT] = {
     [IF_NONE] = "none",
@@ -355,25 +356,14 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
     const char **throttling_group, ThrottleConfig *throttle_cfg,
     BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
 {
-    const char *discard;
     Error *local_error = NULL;
     const char *aio;
 
     if (bdrv_flags) {
-        if (!qemu_opt_get_bool(opts, "read-only", false)) {
-            *bdrv_flags |= BDRV_O_RDWR;
-        }
         if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
             *bdrv_flags |= BDRV_O_COPY_ON_READ;
         }
 
-        if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
-            if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
-                error_setg(errp, "Invalid discard option");
-                return;
-            }
-        }
-
         if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
             if (!strcmp(aio, "native")) {
                 *bdrv_flags |= BDRV_O_NATIVE_AIO;
@@ -451,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
             error_propagate(errp, local_error);
             return;
         }
-
-        if (bdrv_flags &&
-            *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
-            !(*bdrv_flags & BDRV_O_UNMAP))
-        {
-            error_setg(errp, "setting detect-zeroes to unmap is not allowed "
-                             "without setting discard operation to unmap");
-            return;
-        }
     }
 }
 
@@ -471,7 +452,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
     bool account_invalid, account_failed;
-    bool writethrough;
+    bool writethrough, read_only;
     BlockBackend *blk;
     BlockDriverState *bs;
     ThrottleConfig cfg;
@@ -567,6 +548,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         bdrv_flags |= BDRV_O_SNAPSHOT;
     }
 
+    read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false);
+
     /* init */
     if ((!file || !*file) && !qdict_size(bs_opts)) {
         BlockBackendRootState *blk_rs;
@@ -574,7 +557,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         blk = blk_new();
         blk_rs = blk_get_root_state(blk);
         blk_rs->open_flags    = bdrv_flags;
-        blk_rs->read_only     = !(bdrv_flags & BDRV_O_RDWR);
+        blk_rs->read_only     = read_only;
         blk_rs->detect_zeroes = detect_zeroes;
 
         QDECREF(bs_opts);
@@ -588,6 +571,8 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
          * Apply the defaults here instead. */
         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
         qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+        qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY,
+                              read_only ? "on" : "off");
         assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0);
 
         if (runstate_check(RUN_STATE_INMIGRATE)) {
@@ -648,60 +633,23 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     return NULL;
 }
 
-static QemuOptsList qemu_root_bds_opts;
-
 /* Takes the ownership of bs_opts */
 static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
 {
-    BlockDriverState *bs;
-    QemuOpts *opts;
-    Error *local_error = NULL;
-    BlockdevDetectZeroesOptions detect_zeroes;
     int bdrv_flags = 0;
 
-    opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
-    if (!opts) {
-        goto fail;
-    }
-
-    qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
-    if (local_error) {
-        error_propagate(errp, local_error);
-        goto fail;
-    }
-
-    extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
-                                    &detect_zeroes, &local_error);
-    if (local_error) {
-        error_propagate(errp, local_error);
-        goto fail;
-    }
-
     /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
      * with other callers) rather than what we want as the real defaults.
      * Apply the defaults here instead. */
     qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
     qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+    qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY, "off");
 
     if (runstate_check(RUN_STATE_INMIGRATE)) {
         bdrv_flags |= BDRV_O_INACTIVE;
     }
 
-    bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
-    if (!bs) {
-        goto fail_no_bs_opts;
-    }
-
-    bs->detect_zeroes = detect_zeroes;
-
-fail_no_bs_opts:
-    qemu_opts_del(opts);
-    return bs;
-
-fail:
-    qemu_opts_del(opts);
-    QDECREF(bs_opts);
-    return NULL;
+    return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
 }
 
 void blockdev_close_all_bdrv_states(void)
@@ -805,7 +753,7 @@ QemuOptsList qemu_legacy_drive_opts = {
 
         /* Options that are passed on, but have special semantics with -drive */
         {
-            .name = "read-only",
+            .name = BDRV_OPT_READ_ONLY,
             .type = QEMU_OPT_BOOL,
             .help = "open drive file as read-only",
         },{
@@ -871,7 +819,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
 
         { "group",          "throttling.group" },
 
-        { "readonly",       "read-only" },
+        { "readonly",       BDRV_OPT_READ_ONLY },
     };
 
     for (i = 0; i < ARRAY_SIZE(opt_renames); i++) {
@@ -943,7 +891,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     }
 
     /* copy-on-read is disabled with a warning for read-only devices */
-    read_only |= qemu_opt_get_bool(legacy_opts, "read-only", false);
+    read_only |= qemu_opt_get_bool(legacy_opts, BDRV_OPT_READ_ONLY, false);
     copy_on_read = qemu_opt_get_bool(legacy_opts, "copy-on-read", false);
 
     if (read_only && copy_on_read) {
@@ -951,7 +899,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
         copy_on_read = false;
     }
 
-    qdict_put(bs_opts, "read-only",
+    qdict_put(bs_opts, BDRV_OPT_READ_ONLY,
               qstring_from_str(read_only ? "on" : "off"));
     qdict_put(bs_opts, "copy-on-read",
               qstring_from_str(copy_on_read ? "on" :"off"));
@@ -1174,6 +1122,51 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type)
     return dinfo;
 }
 
+static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp)
+{
+    BlockDriverState *bs;
+
+    bs = bdrv_lookup_bs(name, name, errp);
+    if (bs == NULL) {
+        return NULL;
+    }
+
+    if (!bdrv_is_root_node(bs)) {
+        error_setg(errp, "Need a root block node");
+        return NULL;
+    }
+
+    if (!bdrv_is_inserted(bs)) {
+        error_setg(errp, "Device has no medium");
+        return NULL;
+    }
+
+    return bs;
+}
+
+static BlockBackend *qmp_get_blk(const char *blk_name, const char *qdev_id,
+                                 Error **errp)
+{
+    BlockBackend *blk;
+
+    if (!blk_name == !qdev_id) {
+        error_setg(errp, "Need exactly one of 'device' and 'id'");
+        return NULL;
+    }
+
+    if (qdev_id) {
+        blk = blk_by_qdev_id(qdev_id, errp);
+    } else {
+        blk = blk_by_name(blk_name);
+        if (blk == NULL) {
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", blk_name);
+        }
+    }
+
+    return blk;
+}
+
 void hmp_commit(Monitor *mon, const QDict *qdict)
 {
     const char *device = qdict_get_str(qdict, "device");
@@ -1284,21 +1277,17 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
                                                          Error **errp)
 {
     BlockDriverState *bs;
-    BlockBackend *blk;
     AioContext *aio_context;
     QEMUSnapshotInfo sn;
     Error *local_err = NULL;
     SnapshotInfo *info = NULL;
     int ret;
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
+    bs = qmp_get_root_bs(device, errp);
+    if (!bs) {
         return NULL;
     }
-
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
     if (!has_id) {
@@ -1314,12 +1303,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
         goto out_aio_context;
     }
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, "Device '%s' has no medium", device);
-        goto out_aio_context;
-    }
-    bs = blk_bs(blk);
-
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
         goto out_aio_context;
     }
@@ -1499,7 +1482,6 @@ static void internal_snapshot_prepare(BlkActionState *common,
     Error *local_err = NULL;
     const char *device;
     const char *name;
-    BlockBackend *blk;
     BlockDriverState *bs;
     QEMUSnapshotInfo old_sn, *sn;
     bool ret;
@@ -1522,23 +1504,15 @@ static void internal_snapshot_prepare(BlkActionState *common,
         return;
     }
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
+    bs = qmp_get_root_bs(device, errp);
+    if (!bs) {
         return;
     }
 
     /* AioContext is released in .clean() */
-    state->aio_context = blk_get_aio_context(blk);
+    state->aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(state->aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        return;
-    }
-    bs = blk_bs(blk);
-
     state->bs = bs;
     bdrv_drained_begin(bs);
 
@@ -1774,8 +1748,7 @@ static void external_snapshot_prepare(BlkActionState *common,
     }
 
     if (bdrv_has_blk(state->new_bs)) {
-        error_setg(errp, "The snapshot is already in use by %s",
-                   bdrv_get_parent_name(state->new_bs));
+        error_setg(errp, "The snapshot is already in use");
         return;
     }
 
@@ -1838,72 +1811,50 @@ typedef struct DriveBackupState {
     BlockJob *job;
 } DriveBackupState;
 
-static void do_drive_backup(const char *job_id, const char *device,
-                            const char *target, bool has_format,
-                            const char *format, enum MirrorSyncMode sync,
-                            bool has_mode, enum NewImageMode mode,
-                            bool has_speed, int64_t speed,
-                            bool has_bitmap, const char *bitmap,
-                            bool has_on_source_error,
-                            BlockdevOnError on_source_error,
-                            bool has_on_target_error,
-                            BlockdevOnError on_target_error,
-                            BlockJobTxn *txn, Error **errp);
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+                            Error **errp);
 
 static void drive_backup_prepare(BlkActionState *common, Error **errp)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockBackend *blk;
+    BlockDriverState *bs;
     DriveBackup *backup;
     Error *local_err = NULL;
 
     assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
     backup = common->action->u.drive_backup.data;
 
-    blk = blk_by_name(backup->device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", backup->device);
-        return;
-    }
-
-    if (!blk_is_available(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+    bs = qmp_get_root_bs(backup->device, errp);
+    if (!bs) {
         return;
     }
 
     /* AioContext is released in .clean() */
-    state->aio_context = blk_get_aio_context(blk);
+    state->aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(state->aio_context);
-    bdrv_drained_begin(blk_bs(blk));
-    state->bs = blk_bs(blk);
-
-    do_drive_backup(backup->has_job_id ? backup->job_id : NULL,
-                    backup->device, backup->target,
-                    backup->has_format, backup->format,
-                    backup->sync,
-                    backup->has_mode, backup->mode,
-                    backup->has_speed, backup->speed,
-                    backup->has_bitmap, backup->bitmap,
-                    backup->has_on_source_error, backup->on_source_error,
-                    backup->has_on_target_error, backup->on_target_error,
-                    common->block_job_txn, &local_err);
+    bdrv_drained_begin(bs);
+    state->bs = bs;
+
+    state->job = do_drive_backup(backup, common->block_job_txn, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
     }
+}
 
-    state->job = state->bs->job;
+static void drive_backup_commit(BlkActionState *common)
+{
+    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+    assert(state->job);
+    block_job_start(state->job);
 }
 
 static void drive_backup_abort(BlkActionState *common)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
-    BlockDriverState *bs = state->bs;
 
-    /* Only cancel if it's the job we started */
-    if (bs && bs->job && bs->job == state->job) {
-        block_job_cancel_sync(bs->job);
+    if (state->job) {
+        block_job_cancel_sync(state->job);
     }
 }
 
@@ -1924,34 +1875,21 @@ typedef struct BlockdevBackupState {
     AioContext *aio_context;
 } BlockdevBackupState;
 
-static void do_blockdev_backup(const char *job_id, const char *device,
-                               const char *target, enum MirrorSyncMode sync,
-                               bool has_speed, int64_t speed,
-                               bool has_on_source_error,
-                               BlockdevOnError on_source_error,
-                               bool has_on_target_error,
-                               BlockdevOnError on_target_error,
-                               BlockJobTxn *txn, Error **errp);
+static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+                                    Error **errp);
 
 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
     BlockdevBackup *backup;
-    BlockBackend *blk;
-    BlockDriverState *target;
+    BlockDriverState *bs, *target;
     Error *local_err = NULL;
 
     assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
     backup = common->action->u.blockdev_backup.data;
 
-    blk = blk_by_name(backup->device);
-    if (!blk) {
-        error_setg(errp, "Device '%s' not found", backup->device);
-        return;
-    }
-
-    if (!blk_is_available(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+    bs = qmp_get_root_bs(backup->device, errp);
+    if (!bs) {
         return;
     }
 
@@ -1961,38 +1899,36 @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
     }
 
     /* AioContext is released in .clean() */
-    state->aio_context = blk_get_aio_context(blk);
+    state->aio_context = bdrv_get_aio_context(bs);
     if (state->aio_context != bdrv_get_aio_context(target)) {
         state->aio_context = NULL;
         error_setg(errp, "Backup between two IO threads is not implemented");
         return;
     }
     aio_context_acquire(state->aio_context);
-    state->bs = blk_bs(blk);
+    state->bs = bs;
     bdrv_drained_begin(state->bs);
 
-    do_blockdev_backup(backup->has_job_id ? backup->job_id : NULL,
-                       backup->device, backup->target, backup->sync,
-                       backup->has_speed, backup->speed,
-                       backup->has_on_source_error, backup->on_source_error,
-                       backup->has_on_target_error, backup->on_target_error,
-                       common->block_job_txn, &local_err);
+    state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
     }
+}
 
-    state->job = state->bs->job;
+static void blockdev_backup_commit(BlkActionState *common)
+{
+    BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+    assert(state->job);
+    block_job_start(state->job);
 }
 
 static void blockdev_backup_abort(BlkActionState *common)
 {
     BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
-    BlockDriverState *bs = state->bs;
 
-    /* Only cancel if it's the job we started */
-    if (bs && bs->job && bs->job == state->job) {
-        block_job_cancel_sync(bs->job);
+    if (state->job) {
+        block_job_cancel_sync(state->job);
     }
 }
 
@@ -2142,12 +2078,14 @@ static const BlkActionOps actions[] = {
     [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
         .instance_size = sizeof(DriveBackupState),
         .prepare = drive_backup_prepare,
+        .commit = drive_backup_commit,
         .abort = drive_backup_abort,
         .clean = drive_backup_clean,
     },
     [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
         .instance_size = sizeof(BlockdevBackupState),
         .prepare = blockdev_backup_prepare,
+        .commit = blockdev_backup_commit,
         .abort = blockdev_backup_abort,
         .clean = blockdev_backup_clean,
     },
@@ -2279,7 +2217,9 @@ void qmp_transaction(TransactionActionList *dev_list,
     block_job_txn_unref(block_job_txn);
 }
 
-void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
+void qmp_eject(bool has_device, const char *device,
+               bool has_id, const char *id,
+               bool has_force, bool force, Error **errp)
 {
     Error *local_err = NULL;
     int rc;
@@ -2288,14 +2228,16 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
         force = false;
     }
 
-    rc = do_open_tray(device, force, &local_err);
+    rc = do_open_tray(has_device ? device : NULL,
+                      has_id ? id : NULL,
+                      force, &local_err);
     if (rc && rc != -ENOSYS) {
         error_propagate(errp, local_err);
         return;
     }
     error_free(local_err);
 
-    qmp_x_blockdev_remove_medium(device, errp);
+    qmp_x_blockdev_remove_medium(has_device, device, has_id, id, errp);
 }
 
 void qmp_block_passwd(bool has_device, const char *device,
@@ -2333,15 +2275,15 @@ void qmp_block_passwd(bool has_device, const char *device,
  * If the guest was asked to open the tray, return -EINPROGRESS.
  * Else, return 0.
  */
-static int do_open_tray(const char *device, bool force, Error **errp)
+static int do_open_tray(const char *blk_name, const char *qdev_id,
+                        bool force, Error **errp)
 {
     BlockBackend *blk;
+    const char *device = qdev_id ?: blk_name;
     bool locked;
 
-    blk = blk_by_name(device);
+    blk = qmp_get_blk(blk_name, qdev_id, errp);
     if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
         return -ENODEV;
     }
 
@@ -2377,7 +2319,9 @@ static int do_open_tray(const char *device, bool force, Error **errp)
     return 0;
 }
 
-void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
+void qmp_blockdev_open_tray(bool has_device, const char *device,
+                            bool has_id, const char *id,
+                            bool has_force, bool force,
                             Error **errp)
 {
     Error *local_err = NULL;
@@ -2386,7 +2330,9 @@ void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
     if (!has_force) {
         force = false;
     }
-    rc = do_open_tray(device, force, &local_err);
+    rc = do_open_tray(has_device ? device : NULL,
+                      has_id ? id : NULL,
+                      force, &local_err);
     if (rc && rc != -ENOSYS && rc != -EINPROGRESS) {
         error_propagate(errp, local_err);
         return;
@@ -2394,19 +2340,22 @@ void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
     error_free(local_err);
 }
 
-void qmp_blockdev_close_tray(const char *device, Error **errp)
+void qmp_blockdev_close_tray(bool has_device, const char *device,
+                             bool has_id, const char *id,
+                             Error **errp)
 {
     BlockBackend *blk;
 
-    blk = blk_by_name(device);
+    device = has_device ? device : NULL;
+    id = has_id ? id : NULL;
+
+    blk = qmp_get_blk(device, id, errp);
     if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
         return;
     }
 
     if (!blk_dev_has_removable_media(blk)) {
-        error_setg(errp, "Device '%s' is not removable", device);
+        error_setg(errp, "Device '%s' is not removable", device ?: id);
         return;
     }
 
@@ -2422,30 +2371,34 @@ void qmp_blockdev_close_tray(const char *device, Error **errp)
     blk_dev_change_media_cb(blk, true);
 }
 
-void qmp_x_blockdev_remove_medium(const char *device, Error **errp)
+void qmp_x_blockdev_remove_medium(bool has_device, const char *device,
+                                  bool has_id, const char *id, Error **errp)
 {
     BlockBackend *blk;
     BlockDriverState *bs;
     AioContext *aio_context;
-    bool has_device;
+    bool has_attached_device;
 
-    blk = blk_by_name(device);
+    device = has_device ? device : NULL;
+    id = has_id ? id : NULL;
+
+    blk = qmp_get_blk(device, id, errp);
     if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
         return;
     }
 
     /* For BBs without a device, we can exchange the BDS tree at will */
-    has_device = blk_get_attached_dev(blk);
+    has_attached_device = blk_get_attached_dev(blk);
 
-    if (has_device && !blk_dev_has_removable_media(blk)) {
-        error_setg(errp, "Device '%s' is not removable", device);
+    if (has_attached_device && !blk_dev_has_removable_media(blk)) {
+        error_setg(errp, "Device '%s' is not removable", device ?: id);
         return;
     }
 
-    if (has_device && blk_dev_has_tray(blk) && !blk_dev_is_tray_open(blk)) {
-        error_setg(errp, "Tray of device '%s' is not open", device);
+    if (has_attached_device && blk_dev_has_tray(blk) &&
+        !blk_dev_is_tray_open(blk))
+    {
+        error_setg(errp, "Tray of device '%s' is not open", device ?: id);
         return;
     }
 
@@ -2475,34 +2428,26 @@ void qmp_x_blockdev_remove_medium(const char *device, Error **errp)
     aio_context_release(aio_context);
 }
 
-static void qmp_blockdev_insert_anon_medium(const char *device,
+static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                             BlockDriverState *bs, Error **errp)
 {
-    BlockBackend *blk;
     bool has_device;
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
-        return;
-    }
-
     /* For BBs without a device, we can exchange the BDS tree at will */
     has_device = blk_get_attached_dev(blk);
 
     if (has_device && !blk_dev_has_removable_media(blk)) {
-        error_setg(errp, "Device '%s' is not removable", device);
+        error_setg(errp, "Device is not removable");
         return;
     }
 
     if (has_device && blk_dev_has_tray(blk) && !blk_dev_is_tray_open(blk)) {
-        error_setg(errp, "Tray of device '%s' is not open", device);
+        error_setg(errp, "Tray of the device is not open");
         return;
     }
 
     if (blk_bs(blk)) {
-        error_setg(errp, "There already is a medium in device '%s'", device);
+        error_setg(errp, "There already is a medium in the device");
         return;
     }
 
@@ -2518,11 +2463,20 @@ static void qmp_blockdev_insert_anon_medium(const char *device,
     }
 }
 
-void qmp_x_blockdev_insert_medium(const char *device, const char *node_name,
-                                  Error **errp)
+void qmp_x_blockdev_insert_medium(bool has_device, const char *device,
+                                  bool has_id, const char *id,
+                                  const char *node_name, Error **errp)
 {
+    BlockBackend *blk;
     BlockDriverState *bs;
 
+    blk = qmp_get_blk(has_device ? device : NULL,
+                      has_id ? id : NULL,
+                      errp);
+    if (!blk) {
+        return;
+    }
+
     bs = bdrv_find_node(node_name);
     if (!bs) {
         error_setg(errp, "Node '%s' not found", node_name);
@@ -2530,15 +2484,16 @@ void qmp_x_blockdev_insert_medium(const char *device, const char *node_name,
     }
 
     if (bdrv_has_blk(bs)) {
-        error_setg(errp, "Node '%s' is already in use by '%s'", node_name,
-                   bdrv_get_parent_name(bs));
+        error_setg(errp, "Node '%s' is already in use", node_name);
         return;
     }
 
-    qmp_blockdev_insert_anon_medium(device, bs, errp);
+    qmp_blockdev_insert_anon_medium(blk, bs, errp);
 }
 
-void qmp_blockdev_change_medium(const char *device, const char *filename,
+void qmp_blockdev_change_medium(bool has_device, const char *device,
+                                bool has_id, const char *id,
+                                const char *filename,
                                 bool has_format, const char *format,
                                 bool has_read_only,
                                 BlockdevChangeReadOnlyMode read_only,
@@ -2547,14 +2502,15 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
     BlockBackend *blk;
     BlockDriverState *medium_bs = NULL;
     int bdrv_flags;
+    bool detect_zeroes;
     int rc;
     QDict *options = NULL;
     Error *err = NULL;
 
-    blk = blk_by_name(device);
+    blk = qmp_get_blk(has_device ? device : NULL,
+                      has_id ? id : NULL,
+                      errp);
     if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
         goto fail;
     }
 
@@ -2586,8 +2542,12 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
         abort();
     }
 
+    options = qdict_new();
+    detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
+    qdict_put(options, "detect-zeroes",
+              qstring_from_str(detect_zeroes ? "on" : "off"));
+
     if (has_format) {
-        options = qdict_new();
         qdict_put(options, "driver", qstring_from_str(format));
     }
 
@@ -2602,7 +2562,9 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
         goto fail;
     }
 
-    rc = do_open_tray(device, false, &err);
+    rc = do_open_tray(has_device ? device : NULL,
+                      has_id ? id : NULL,
+                      false, &err);
     if (rc && rc != -ENOSYS) {
         error_propagate(errp, err);
         goto fail;
@@ -2610,21 +2572,19 @@ void qmp_blockdev_change_medium(const char *device, const char *filename,
     error_free(err);
     err = NULL;
 
-    qmp_x_blockdev_remove_medium(device, &err);
+    qmp_x_blockdev_remove_medium(has_device, device, has_id, id, &err);
     if (err) {
         error_propagate(errp, err);
         goto fail;
     }
 
-    qmp_blockdev_insert_anon_medium(device, medium_bs, &err);
+    qmp_blockdev_insert_anon_medium(blk, medium_bs, &err);
     if (err) {
         error_propagate(errp, err);
         goto fail;
     }
 
-    blk_apply_root_state(blk, medium_bs);
-
-    qmp_blockdev_close_tray(device, errp);
+    qmp_blockdev_close_tray(has_device, device, has_id, id, errp);
 
 fail:
     /* If the medium has been inserted, the device has its own reference, so
@@ -2641,10 +2601,10 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
     BlockBackend *blk;
     AioContext *aio_context;
 
-    blk = blk_by_name(arg->device);
+    blk = qmp_get_blk(arg->has_device ? arg->device : NULL,
+                      arg->has_id ? arg->id : NULL,
+                      errp);
     if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", arg->device);
         return;
     }
 
@@ -2653,7 +2613,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
 
     bs = blk_bs(blk);
     if (!bs) {
-        error_setg(errp, "Device '%s' has no medium", arg->device);
+        error_setg(errp, "Device has no medium");
         goto out;
     }
 
@@ -2717,7 +2677,9 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
          * just update the throttling group. */
         if (!blk_get_public(blk)->throttle_state) {
             blk_io_limits_enable(blk,
-                                 arg->has_group ? arg->group : arg->device);
+                                 arg->has_group ? arg->group :
+                                 arg->has_device ? arg->device :
+                                 arg->id);
         } else if (arg->has_group) {
             blk_io_limits_update_group(blk, arg->group);
         }
@@ -2838,7 +2800,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
 
     bs = bdrv_find_node(id);
     if (bs) {
-        qmp_x_blockdev_del(false, NULL, true, id, &local_err);
+        qmp_x_blockdev_del(id, &local_err);
         if (local_err) {
             error_report_err(local_err);
         }
@@ -2951,40 +2913,15 @@ void qmp_block_resize(bool has_device, const char *device,
     aio_context_release(aio_context);
 }
 
-static void block_job_cb(void *opaque, int ret)
-{
-    /* Note that this function may be executed from another AioContext besides
-     * the QEMU main loop.  If you need to access anything that assumes the
-     * QEMU global mutex, use a BH or introduce a mutex.
-     */
-
-    BlockDriverState *bs = opaque;
-    const char *msg = NULL;
-
-    trace_block_job_cb(bs, bs->job, ret);
-
-    assert(bs->job);
-
-    if (ret < 0) {
-        msg = strerror(-ret);
-    }
-
-    if (block_job_is_cancelled(bs->job)) {
-        block_job_event_cancelled(bs->job);
-    } else {
-        block_job_event_completed(bs->job, msg);
-    }
-}
-
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
                       bool has_base, const char *base,
+                      bool has_base_node, const char *base_node,
                       bool has_backing_file, const char *backing_file,
                       bool has_speed, int64_t speed,
                       bool has_on_error, BlockdevOnError on_error,
                       Error **errp)
 {
-    BlockBackend *blk;
-    BlockDriverState *bs;
+    BlockDriverState *bs, *iter;
     BlockDriverState *base_bs = NULL;
     AioContext *aio_context;
     Error *local_err = NULL;
@@ -2994,23 +2931,17 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
         on_error = BLOCKDEV_ON_ERROR_REPORT;
     }
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
+    bs = bdrv_lookup_bs(device, device, errp);
+    if (!bs) {
         return;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, "Device '%s' has no medium", device);
-        goto out;
-    }
-    bs = blk_bs(blk);
-
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
+    if (has_base && has_base_node) {
+        error_setg(errp, "'base' and 'base-node' cannot be specified "
+                   "at the same time");
         goto out;
     }
 
@@ -3024,6 +2955,27 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
         base_name = base;
     }
 
+    if (has_base_node) {
+        base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+        if (!base_bs) {
+            goto out;
+        }
+        if (bs == base_bs || !bdrv_chain_contains(bs, base_bs)) {
+            error_setg(errp, "Node '%s' is not a backing image of '%s'",
+                       base_node, device);
+            goto out;
+        }
+        assert(bdrv_get_aio_context(base_bs) == aio_context);
+        base_name = base_bs->filename;
+    }
+
+    /* Check for op blockers in the whole chain between bs and base */
+    for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) {
+        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
+            goto out;
+        }
+    }
+
     /* if we are streaming the entire chain, the result will have no backing
      * file, and specifying one is therefore an error */
     if (base_bs == NULL && has_backing_file) {
@@ -3036,7 +2988,7 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
     base_name = has_backing_file ? backing_file : base_name;
 
     stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
-                 has_speed ? speed : 0, on_error, block_job_cb, bs, &local_err);
+                 has_speed ? speed : 0, on_error, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto out;
@@ -3055,8 +3007,8 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                       bool has_speed, int64_t speed,
                       Error **errp)
 {
-    BlockBackend *blk;
     BlockDriverState *bs;
+    BlockDriverState *iter;
     BlockDriverState *base_bs, *top_bs;
     AioContext *aio_context;
     Error *local_err = NULL;
@@ -3074,22 +3026,22 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
      *  live commit feature versions; for this to work, we must make sure to
      *  perform the device lookup before any generic errors that may occur in a
      *  scenario in which all optional arguments are omitted. */
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
+    bs = qmp_get_root_bs(device, &local_err);
+    if (!bs) {
+        bs = bdrv_lookup_bs(device, device, NULL);
+        if (!bs) {
+            error_free(local_err);
+            error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+                      "Device '%s' not found", device);
+        } else {
+            error_propagate(errp, local_err);
+        }
         return;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, "Device '%s' has no medium", device);
-        goto out;
-    }
-    bs = blk_bs(blk);
-
     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
         goto out;
     }
@@ -3123,8 +3075,10 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
 
     assert(bdrv_get_aio_context(base_bs) == aio_context);
 
-    if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
-        goto out;
+    for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) {
+        if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+            goto out;
+        }
     }
 
     /* Do not allow attempts to commit an image into itself */
@@ -3139,12 +3093,17 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
                              " but 'top' is the active layer");
             goto out;
         }
-        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
-                            on_error, block_job_cb, bs, &local_err);
+        commit_active_start(has_job_id ? job_id : NULL, bs, base_bs,
+                            BLOCK_JOB_DEFAULT, speed, on_error, NULL, NULL,
+                            &local_err, false);
     } else {
+        BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
+        if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+            goto out;
+        }
         commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
-                     on_error, block_job_cb, bs,
-                     has_backing_file ? backing_file : NULL, &local_err);
+                     on_error, has_backing_file ? backing_file : NULL,
+                     &local_err);
     }
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -3155,22 +3114,13 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
     aio_context_release(aio_context);
 }
 
-static void do_drive_backup(const char *job_id, const char *device,
-                            const char *target, bool has_format,
-                            const char *format, enum MirrorSyncMode sync,
-                            bool has_mode, enum NewImageMode mode,
-                            bool has_speed, int64_t speed,
-                            bool has_bitmap, const char *bitmap,
-                            bool has_on_source_error,
-                            BlockdevOnError on_source_error,
-                            bool has_on_target_error,
-                            BlockdevOnError on_target_error,
-                            BlockJobTxn *txn, Error **errp)
+static BlockJob *do_drive_backup(DriveBackup *backup, BlockJobTxn *txn,
+                                 Error **errp)
 {
-    BlockBackend *blk;
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
+    BlockJob *job = NULL;
     BdrvDirtyBitmap *bmap = NULL;
     AioContext *aio_context;
     QDict *options = NULL;
@@ -3178,39 +3128,36 @@ static void do_drive_backup(const char *job_id, const char *device,
     int flags;
     int64_t size;
 
-    if (!has_speed) {
-        speed = 0;
+    if (!backup->has_speed) {
+        backup->speed = 0;
     }
-    if (!has_on_source_error) {
-        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    if (!backup->has_on_source_error) {
+        backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
     }
-    if (!has_on_target_error) {
-        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    if (!backup->has_on_target_error) {
+        backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!backup->has_mode) {
+        backup->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
-    if (!has_mode) {
-        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    if (!backup->has_job_id) {
+        backup->job_id = NULL;
+    }
+    if (!backup->has_compress) {
+        backup->compress = false;
     }
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
-        return;
+    bs = qmp_get_root_bs(backup->device, errp);
+    if (!bs) {
+        return NULL;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    /* Although backup_run has this check too, we need to use bs->drv below, so
-     * do an early check redundantly. */
-    if (!blk_is_available(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-        goto out;
-    }
-    bs = blk_bs(blk);
-
-    if (!has_format) {
-        format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
+    if (!backup->has_format) {
+        backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ?
+                         NULL : (char*) bs->drv->format_name;
     }
 
     /* Early check to avoid creating target */
@@ -3222,13 +3169,13 @@ static void do_drive_backup(const char *job_id, const char *device,
 
     /* See if we have a backing HD we can use to create our new image
      * on top of. */
-    if (sync == MIRROR_SYNC_MODE_TOP) {
+    if (backup->sync == MIRROR_SYNC_MODE_TOP) {
         source = backing_bs(bs);
         if (!source) {
-            sync = MIRROR_SYNC_MODE_FULL;
+            backup->sync = MIRROR_SYNC_MODE_FULL;
         }
     }
-    if (sync == MIRROR_SYNC_MODE_NONE) {
+    if (backup->sync == MIRROR_SYNC_MODE_NONE) {
         source = bs;
     }
 
@@ -3238,14 +3185,14 @@ static void do_drive_backup(const char *job_id, const char *device,
         goto out;
     }
 
-    if (mode != NEW_IMAGE_MODE_EXISTING) {
-        assert(format);
+    if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
+        assert(backup->format);
         if (source) {
-            bdrv_img_create(target, format, source->filename,
+            bdrv_img_create(backup->target, backup->format, source->filename,
                             source->drv->format_name, NULL,
                             size, flags, &local_err, false);
         } else {
-            bdrv_img_create(target, format, NULL, NULL, NULL,
+            bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
                             size, flags, &local_err, false);
         }
     }
@@ -3255,30 +3202,31 @@ static void do_drive_backup(const char *job_id, const char *device,
         goto out;
     }
 
-    if (format) {
+    if (backup->format) {
         options = qdict_new();
-        qdict_put(options, "driver", qstring_from_str(format));
+        qdict_put(options, "driver", qstring_from_str(backup->format));
     }
 
-    target_bs = bdrv_open(target, NULL, options, flags, errp);
+    target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
     if (!target_bs) {
         goto out;
     }
 
     bdrv_set_aio_context(target_bs, aio_context);
 
-    if (has_bitmap) {
-        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+    if (backup->has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
         if (!bmap) {
-            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+            error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
             bdrv_unref(target_bs);
             goto out;
         }
     }
 
-    backup_start(job_id, bs, target_bs, speed, sync, bmap,
-                 on_source_error, on_target_error,
-                 block_job_cb, bs, txn, &local_err);
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                            backup->sync, bmap, backup->compress,
+                            backup->on_source_error, backup->on_target_error,
+                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
     bdrv_unref(target_bs);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
@@ -3287,26 +3235,17 @@ static void do_drive_backup(const char *job_id, const char *device,
 
 out:
     aio_context_release(aio_context);
+    return job;
 }
 
-void qmp_drive_backup(bool has_job_id, const char *job_id,
-                      const char *device, const char *target,
-                      bool has_format, const char *format,
-                      enum MirrorSyncMode sync,
-                      bool has_mode, enum NewImageMode mode,
-                      bool has_speed, int64_t speed,
-                      bool has_bitmap, const char *bitmap,
-                      bool has_on_source_error, BlockdevOnError on_source_error,
-                      bool has_on_target_error, BlockdevOnError on_target_error,
-                      Error **errp)
+void qmp_drive_backup(DriveBackup *arg, Error **errp)
 {
-    return do_drive_backup(has_job_id ? job_id : NULL, device, target,
-                           has_format, format, sync,
-                           has_mode, mode, has_speed, speed,
-                           has_bitmap, bitmap,
-                           has_on_source_error, on_source_error,
-                           has_on_target_error, on_target_error,
-                           NULL, errp);
+
+    BlockJob *job;
+    job = do_drive_backup(arg, NULL, errp);
+    if (job) {
+        block_job_start(job);
+    }
 }
 
 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
@@ -3314,47 +3253,40 @@ BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
     return bdrv_named_nodes_list(errp);
 }
 
-void do_blockdev_backup(const char *job_id, const char *device,
-                        const char *target, enum MirrorSyncMode sync,
-                         bool has_speed, int64_t speed,
-                         bool has_on_source_error,
-                         BlockdevOnError on_source_error,
-                         bool has_on_target_error,
-                         BlockdevOnError on_target_error,
-                         BlockJobTxn *txn, Error **errp)
+BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
+                             Error **errp)
 {
-    BlockBackend *blk;
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     Error *local_err = NULL;
     AioContext *aio_context;
+    BlockJob *job = NULL;
 
-    if (!has_speed) {
-        speed = 0;
+    if (!backup->has_speed) {
+        backup->speed = 0;
     }
-    if (!has_on_source_error) {
-        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    if (!backup->has_on_source_error) {
+        backup->on_source_error = BLOCKDEV_ON_ERROR_REPORT;
     }
-    if (!has_on_target_error) {
-        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    if (!backup->has_on_target_error) {
+        backup->on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!backup->has_job_id) {
+        backup->job_id = NULL;
+    }
+    if (!backup->has_compress) {
+        backup->compress = false;
     }
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_setg(errp, "Device '%s' not found", device);
-        return;
+    bs = qmp_get_root_bs(backup->device, errp);
+    if (!bs) {
+        return NULL;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, "Device '%s' has no medium", device);
-        goto out;
-    }
-    bs = blk_bs(blk);
-
-    target_bs = bdrv_lookup_bs(target, target, errp);
+    target_bs = bdrv_lookup_bs(backup->target, backup->target, errp);
     if (!target_bs) {
         goto out;
     }
@@ -3370,30 +3302,25 @@ void do_blockdev_backup(const char *job_id, const char *device,
             goto out;
         }
     }
-    backup_start(job_id, bs, target_bs, speed, sync, NULL, on_source_error,
-                 on_target_error, block_job_cb, bs, txn, &local_err);
+    job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+                            backup->sync, NULL, backup->compress,
+                            backup->on_source_error, backup->on_target_error,
+                            BLOCK_JOB_DEFAULT, NULL, NULL, txn, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
     }
 out:
     aio_context_release(aio_context);
+    return job;
 }
 
-void qmp_blockdev_backup(bool has_job_id, const char *job_id,
-                         const char *device, const char *target,
-                         enum MirrorSyncMode sync,
-                         bool has_speed, int64_t speed,
-                         bool has_on_source_error,
-                         BlockdevOnError on_source_error,
-                         bool has_on_target_error,
-                         BlockdevOnError on_target_error,
-                         Error **errp)
+void qmp_blockdev_backup(BlockdevBackup *arg, Error **errp)
 {
-    do_blockdev_backup(has_job_id ? job_id : NULL, device, target,
-                       sync, has_speed, speed,
-                       has_on_source_error, on_source_error,
-                       has_on_target_error, on_target_error,
-                       NULL, errp);
+    BlockJob *job;
+    job = do_blockdev_backup(arg, NULL, errp);
+    if (job) {
+        block_job_start(job);
+    }
 }
 
 /* Parameter check and block job starting for drive mirroring.
@@ -3462,14 +3389,12 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
     mirror_start(job_id, bs, target,
                  has_replaces ? replaces : NULL,
                  speed, granularity, buf_size, sync, backing_mode,
-                 on_source_error, on_target_error, unmap,
-                 block_job_cb, bs, errp);
+                 on_source_error, on_target_error, unmap, errp);
 }
 
 void qmp_drive_mirror(DriveMirror *arg, Error **errp)
 {
     BlockDriverState *bs;
-    BlockBackend *blk;
     BlockDriverState *source, *target_bs;
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode;
@@ -3479,21 +3404,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
     int64_t size;
     const char *format = arg->format;
 
-    blk = blk_by_name(arg->device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", arg->device);
+    bs = qmp_get_root_bs(arg->device, errp);
+    if (!bs) {
         return;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, arg->device);
-        goto out;
-    }
-    bs = blk_bs(blk);
     if (!arg->has_mode) {
         arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
@@ -3630,21 +3548,13 @@ void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
                          Error **errp)
 {
     BlockDriverState *bs;
-    BlockBackend *blk;
     BlockDriverState *target_bs;
     AioContext *aio_context;
     BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
     Error *local_err = NULL;
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_setg(errp, "Device '%s' not found", device);
-        return;
-    }
-    bs = blk_bs(blk);
-
+    bs = qmp_get_root_bs(device, errp);
     if (!bs) {
-        error_setg(errp, "Device '%s' has no media", device);
         return;
     }
 
@@ -3723,7 +3633,7 @@ void qmp_block_job_cancel(const char *device,
         force = false;
     }
 
-    if (job->user_paused && !force) {
+    if (block_job_user_paused(job) && !force) {
         error_setg(errp, "The block job for device '%s' is currently paused",
                    device);
         goto out;
@@ -3740,13 +3650,12 @@ void qmp_block_job_pause(const char *device, Error **errp)
     AioContext *aio_context;
     BlockJob *job = find_block_job(device, &aio_context, errp);
 
-    if (!job || job->user_paused) {
+    if (!job || block_job_user_paused(job)) {
         return;
     }
 
-    job->user_paused = true;
     trace_qmp_block_job_pause(job);
-    block_job_pause(job);
+    block_job_user_pause(job);
     aio_context_release(aio_context);
 }
 
@@ -3755,14 +3664,13 @@ void qmp_block_job_resume(const char *device, Error **errp)
     AioContext *aio_context;
     BlockJob *job = find_block_job(device, &aio_context, errp);
 
-    if (!job || !job->user_paused) {
+    if (!job || !block_job_user_paused(job)) {
         return;
     }
 
-    job->user_paused = false;
     trace_qmp_block_job_resume(job);
     block_job_iostatus_reset(job);
-    block_job_resume(job);
+    block_job_user_resume(job);
     aio_context_release(aio_context);
 }
 
@@ -3785,7 +3693,6 @@ void qmp_change_backing_file(const char *device,
                              const char *backing_file,
                              Error **errp)
 {
-    BlockBackend *blk;
     BlockDriverState *bs = NULL;
     AioContext *aio_context;
     BlockDriverState *image_bs = NULL;
@@ -3794,22 +3701,14 @@ void qmp_change_backing_file(const char *device,
     int open_flags;
     int ret;
 
-    blk = blk_by_name(device);
-    if (!blk) {
-        error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
+    bs = qmp_get_root_bs(device, errp);
+    if (!bs) {
         return;
     }
 
-    aio_context = blk_get_aio_context(blk);
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (!blk_is_available(blk)) {
-        error_setg(errp, "Device '%s' has no medium", device);
-        goto out;
-    }
-    bs = blk_bs(blk);
-
     image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3905,27 +3804,11 @@ void hmp_drive_add_node(Monitor *mon, const char *optstr)
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     BlockDriverState *bs;
-    BlockBackend *blk = NULL;
     QObject *obj;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
     QDict *qdict;
     Error *local_err = NULL;
 
-    /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
-     * cache.direct=false instead of silently switching to aio=threads, except
-     * when called from drive_new().
-     *
-     * For now, simply forbidding the combination for all drivers will do. */
-    if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) {
-        bool direct = options->has_cache &&
-                      options->cache->has_direct &&
-                      options->cache->direct;
-        if (!direct) {
-            error_setg(errp, "aio=native requires cache.direct=true");
-            goto fail;
-        }
-    }
-
     visit_type_BlockdevOptions(v, NULL, &options, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -3937,37 +3820,21 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 
     qdict_flatten(qdict);
 
-    if (options->has_id) {
-        blk = blockdev_init(NULL, qdict, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            goto fail;
-        }
-
-        bs = blk_bs(blk);
-    } else {
-        if (!qdict_get_try_str(qdict, "node-name")) {
-            error_setg(errp, "'id' and/or 'node-name' need to be specified for "
-                       "the root node");
-            goto fail;
-        }
-
-        bs = bds_tree_init(qdict, errp);
-        if (!bs) {
-            goto fail;
-        }
+    if (!qdict_get_try_str(qdict, "node-name")) {
+        error_setg(errp, "'node-name' must be specified for the root node");
+        goto fail;
+    }
 
-        QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+    bs = bds_tree_init(qdict, errp);
+    if (!bs) {
+        goto fail;
     }
 
+    QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+
     if (bs && bdrv_key_required(bs)) {
-        if (blk) {
-            monitor_remove_blk(blk);
-            blk_unref(blk);
-        } else {
-            QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
-            bdrv_unref(bs);
-        }
+        QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
+        bdrv_unref(bs);
         error_setg(errp, "blockdev-add doesn't support encrypted devices");
         goto fail;
     }
@@ -3976,82 +3843,42 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
     visit_free(v);
 }
 
-void qmp_x_blockdev_del(bool has_id, const char *id,
-                        bool has_node_name, const char *node_name, Error **errp)
+void qmp_x_blockdev_del(const char *node_name, Error **errp)
 {
     AioContext *aio_context;
-    BlockBackend *blk;
     BlockDriverState *bs;
 
-    if (has_id && has_node_name) {
-        error_setg(errp, "Only one of id and node-name must be specified");
-        return;
-    } else if (!has_id && !has_node_name) {
-        error_setg(errp, "No block device specified");
+    bs = bdrv_find_node(node_name);
+    if (!bs) {
+        error_setg(errp, "Cannot find node %s", node_name);
         return;
     }
-
-    if (has_id) {
-        /* blk_by_name() never returns a BB that is not owned by the monitor */
-        blk = blk_by_name(id);
-        if (!blk) {
-            error_setg(errp, "Cannot find block backend %s", id);
-            return;
-        }
-        if (blk_legacy_dinfo(blk)) {
-            error_setg(errp, "Deleting block backend added with drive-add"
-                       " is not supported");
-            return;
-        }
-        if (blk_get_refcnt(blk) > 1) {
-            error_setg(errp, "Block backend %s is in use", id);
-            return;
-        }
-        bs = blk_bs(blk);
-        aio_context = blk_get_aio_context(blk);
-    } else {
-        blk = NULL;
-        bs = bdrv_find_node(node_name);
-        if (!bs) {
-            error_setg(errp, "Cannot find node %s", node_name);
-            return;
-        }
-        if (bdrv_has_blk(bs)) {
-            error_setg(errp, "Node %s is in use by %s",
-                       node_name, bdrv_get_parent_name(bs));
-            return;
-        }
-        aio_context = bdrv_get_aio_context(bs);
+    if (bdrv_has_blk(bs)) {
+        error_setg(errp, "Node %s is in use", node_name);
+        return;
     }
-
+    aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(aio_context);
 
-    if (bs) {
-        if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
-            goto out;
-        }
-
-        if (!blk && !bs->monitor_list.tqe_prev) {
-            error_setg(errp, "Node %s is not owned by the monitor",
-                       bs->node_name);
-            goto out;
-        }
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
+        goto out;
+    }
 
-        if (bs->refcnt > 1) {
-            error_setg(errp, "Block device %s is in use",
-                       bdrv_get_device_or_node_name(bs));
-            goto out;
-        }
+    if (!bs->monitor_list.tqe_prev) {
+        error_setg(errp, "Node %s is not owned by the monitor",
+                   bs->node_name);
+        goto out;
     }
 
-    if (blk) {
-        monitor_remove_blk(blk);
-        blk_unref(blk);
-    } else {
-        QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
-        bdrv_unref(bs);
+    if (bs->refcnt > 1) {
+        error_setg(errp, "Block device %s is in use",
+                   bdrv_get_device_or_node_name(bs));
+        goto out;
     }
 
+    QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
+    bdrv_unref(bs);
+
 out:
     aio_context_release(aio_context);
 }
@@ -4117,13 +3944,22 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
     BlockJob *job;
 
     for (job = block_job_next(NULL); job; job = block_job_next(job)) {
-        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
-        AioContext *aio_context = blk_get_aio_context(job->blk);
+        BlockJobInfoList *elem;
+        AioContext *aio_context;
 
+        if (block_job_is_internal(job)) {
+            continue;
+        }
+        elem = g_new0(BlockJobInfoList, 1);
+        aio_context = blk_get_aio_context(job->blk);
         aio_context_acquire(aio_context);
-        elem->value = block_job_query(job);
+        elem->value = block_job_query(job, errp);
         aio_context_release(aio_context);
-
+        if (!elem->value) {
+            g_free(elem);
+            qapi_free_BlockJobInfoList(head);
+            return NULL;
+        }
         *p_next = elem;
         p_next = &elem->next;
     }
@@ -4139,10 +3975,6 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "snapshot",
             .type = QEMU_OPT_BOOL,
             .help = "enable/disable snapshot mode",
-        },{
-            .name = "discard",
-            .type = QEMU_OPT_STRING,
-            .help = "discard operation (ignore/off, unmap/on)",
         },{
             .name = "aio",
             .type = QEMU_OPT_STRING,
@@ -4164,7 +3996,7 @@ QemuOptsList qemu_common_drive_opts = {
             .type = QEMU_OPT_STRING,
             .help = "write error action",
         },{
-            .name = "read-only",
+            .name = BDRV_OPT_READ_ONLY,
             .type = QEMU_OPT_BOOL,
             .help = "open drive file as read-only",
         },{
@@ -4270,35 +4102,6 @@ QemuOptsList qemu_common_drive_opts = {
     },
 };
 
-static QemuOptsList qemu_root_bds_opts = {
-    .name = "root-bds",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
-    .desc = {
-        {
-            .name = "discard",
-            .type = QEMU_OPT_STRING,
-            .help = "discard operation (ignore/off, unmap/on)",
-        },{
-            .name = "aio",
-            .type = QEMU_OPT_STRING,
-            .help = "host AIO implementation (threads, native)",
-        },{
-            .name = "read-only",
-            .type = QEMU_OPT_BOOL,
-            .help = "open drive file as read-only",
-        },{
-            .name = "copy-on-read",
-            .type = QEMU_OPT_BOOL,
-            .help = "copy read data from backing file into image file",
-        },{
-            .name = "detect-zeroes",
-            .type = QEMU_OPT_STRING,
-            .help = "try to optimize zero writes (off, on, unmap)",
-        },
-        { /* end of list */ }
-    },
-};
-
 QemuOptsList qemu_drive_opts = {
     .name = "drive",
     .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
diff --git a/blockjob.c b/blockjob.c
index a5ba3bee52..513620c199 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -27,7 +27,7 @@
 #include "qemu-common.h"
 #include "trace.h"
 #include "block/block.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "block/block_int.h"
 #include "sysemu/block-backend.h"
 #include "qapi/qmp/qerror.h"
@@ -38,6 +38,9 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"
 
+static void block_job_event_cancelled(BlockJob *job);
+static void block_job_event_completed(BlockJob *job, const char *msg);
+
 /* Transactional group of block jobs */
 struct BlockJobTxn {
 
@@ -66,7 +69,7 @@ BlockJob *block_job_get(const char *id)
     BlockJob *job;
 
     QLIST_FOREACH(job, &block_jobs, job_list) {
-        if (!strcmp(id, job->id)) {
+        if (job->id && !strcmp(id, job->id)) {
             return job;
         }
     }
@@ -74,17 +77,6 @@ BlockJob *block_job_get(const char *id)
     return NULL;
 }
 
-/* Normally the job runs in its BlockBackend's AioContext.  The exception is
- * block_job_defer_to_main_loop() where it runs in the QEMU main loop.  Code
- * that supports both cases uses this helper function.
- */
-static AioContext *block_job_get_aio_context(BlockJob *job)
-{
-    return job->deferred_to_main_loop ?
-           qemu_get_aio_context() :
-           blk_get_aio_context(job->blk);
-}
-
 static void block_job_attached_aio_context(AioContext *new_context,
                                            void *opaque)
 {
@@ -97,6 +89,17 @@ static void block_job_attached_aio_context(AioContext *new_context,
     block_job_resume(job);
 }
 
+static void block_job_drain(BlockJob *job)
+{
+    /* If job is !job->busy this kicks it into the next pause point. */
+    block_job_enter(job);
+
+    blk_drain(job->blk);
+    if (job->driver->drain) {
+        job->driver->drain(job);
+    }
+}
+
 static void block_job_detach_aio_context(void *opaque)
 {
     BlockJob *job = opaque;
@@ -106,42 +109,55 @@ static void block_job_detach_aio_context(void *opaque)
 
     block_job_pause(job);
 
-    if (!job->paused) {
-        /* If job is !job->busy this kicks it into the next pause point. */
-        block_job_enter(job);
-    }
     while (!job->paused && !job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        block_job_drain(job);
     }
 
     block_job_unref(job);
 }
 
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs)
+{
+    job->nodes = g_slist_prepend(job->nodes, bs);
+    bdrv_ref(bs);
+    bdrv_op_block_all(bs, job->blocker);
+}
+
 void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed,
+                       BlockDriverState *bs, int64_t speed, int flags,
                        BlockCompletionFunc *cb, void *opaque, Error **errp)
 {
     BlockBackend *blk;
     BlockJob *job;
 
-    assert(cb);
     if (bs->job) {
         error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
         return NULL;
     }
 
-    if (job_id == NULL) {
+    if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) {
         job_id = bdrv_get_device_name(bs);
+        if (!*job_id) {
+            error_setg(errp, "An explicit job ID is required for this node");
+            return NULL;
+        }
     }
 
-    if (!id_wellformed(job_id)) {
-        error_setg(errp, "Invalid job ID '%s'", job_id);
-        return NULL;
-    }
+    if (job_id) {
+        if (flags & BLOCK_JOB_INTERNAL) {
+            error_setg(errp, "Cannot specify job ID for internal block job");
+            return NULL;
+        }
 
-    if (block_job_get(job_id)) {
-        error_setg(errp, "Job ID '%s' already in use", job_id);
-        return NULL;
+        if (!id_wellformed(job_id)) {
+            error_setg(errp, "Invalid job ID '%s'", job_id);
+            return NULL;
+        }
+
+        if (block_job_get(job_id)) {
+            error_setg(errp, "Job ID '%s' already in use", job_id);
+            return NULL;
+        }
     }
 
     blk = blk_new();
@@ -150,7 +166,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job = g_malloc0(driver->instance_size);
     error_setg(&job->blocker, "block device is in use by block job: %s",
                BlockJobType_lookup[driver->job_type]);
-    bdrv_op_block_all(bs, job->blocker);
+    block_job_add_bdrv(job, bs);
     bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
 
     job->driver        = driver;
@@ -158,7 +174,9 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     job->blk           = blk;
     job->cb            = cb;
     job->opaque        = opaque;
-    job->busy          = true;
+    job->busy          = false;
+    job->paused        = true;
+    job->pause_count   = 1;
     job->refcnt        = 1;
     bs->job = job;
 
@@ -181,6 +199,28 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
     return job;
 }
 
+bool block_job_is_internal(BlockJob *job)
+{
+    return (job->id == NULL);
+}
+
+static bool block_job_started(BlockJob *job)
+{
+    return job->co;
+}
+
+void block_job_start(BlockJob *job)
+{
+    assert(job && !block_job_started(job) && job->paused &&
+           !job->busy && job->driver->start);
+    job->co = qemu_coroutine_create(job->driver->start, job);
+    if (--job->pause_count == 0) {
+        job->paused = false;
+        job->busy = true;
+        qemu_coroutine_enter(job->co);
+    }
+}
+
 void block_job_ref(BlockJob *job)
 {
     ++job->refcnt;
@@ -189,9 +229,15 @@ void block_job_ref(BlockJob *job)
 void block_job_unref(BlockJob *job)
 {
     if (--job->refcnt == 0) {
+        GSList *l;
         BlockDriverState *bs = blk_bs(job->blk);
         bs->job = NULL;
-        bdrv_op_unblock_all(bs, job->blocker);
+        for (l = job->nodes; l; l = l->next) {
+            bs = l->data;
+            bdrv_op_unblock_all(bs, job->blocker);
+            bdrv_unref(bs);
+        }
+        g_slist_free(job->nodes);
         blk_remove_aio_context_notifier(job->blk,
                                         block_job_attached_aio_context,
                                         block_job_detach_aio_context, job);
@@ -214,8 +260,29 @@ static void block_job_completed_single(BlockJob *job)
             job->driver->abort(job);
         }
     }
-    job->cb(job->opaque, job->ret);
+    if (job->driver->clean) {
+        job->driver->clean(job);
+    }
+
+    if (job->cb) {
+        job->cb(job->opaque, job->ret);
+    }
+
+    /* Emit events only if we actually started */
+    if (block_job_started(job)) {
+        if (block_job_is_cancelled(job)) {
+            block_job_event_cancelled(job);
+        } else {
+            const char *msg = NULL;
+            if (job->ret < 0) {
+                msg = strerror(-job->ret);
+            }
+            block_job_event_completed(job, msg);
+        }
+    }
+
     if (job->txn) {
+        QLIST_REMOVE(job, txn_list);
         block_job_txn_unref(job->txn);
     }
     block_job_unref(job);
@@ -317,7 +384,10 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 
 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->pause_count || job->cancelled || !job->driver->complete) {
+    /* Should not be reachable via external interface for internal jobs */
+    assert(job->id);
+    if (job->pause_count || job->cancelled ||
+        !block_job_started(job) || !job->driver->complete) {
         error_setg(errp, "The active block job '%s' cannot be completed",
                    job->id);
         return;
@@ -331,13 +401,26 @@ void block_job_pause(BlockJob *job)
     job->pause_count++;
 }
 
+void block_job_user_pause(BlockJob *job)
+{
+    job->user_paused = true;
+    block_job_pause(job);
+}
+
 static bool block_job_should_pause(BlockJob *job)
 {
     return job->pause_count > 0;
 }
 
+bool block_job_user_paused(BlockJob *job)
+{
+    return job ? job->user_paused : 0;
+}
+
 void coroutine_fn block_job_pause_point(BlockJob *job)
 {
+    assert(job && block_job_started(job));
+
     if (!block_job_should_pause(job)) {
         return;
     }
@@ -372,6 +455,14 @@ void block_job_resume(BlockJob *job)
     block_job_enter(job);
 }
 
+void block_job_user_resume(BlockJob *job)
+{
+    if (job && job->user_paused && job->pause_count > 0) {
+        job->user_paused = false;
+        block_job_resume(job);
+    }
+}
+
 void block_job_enter(BlockJob *job)
 {
     if (job->co && !job->busy) {
@@ -381,9 +472,13 @@ void block_job_enter(BlockJob *job)
 
 void block_job_cancel(BlockJob *job)
 {
-    job->cancelled = true;
-    block_job_iostatus_reset(job);
-    block_job_enter(job);
+    if (block_job_started(job)) {
+        job->cancelled = true;
+        block_job_iostatus_reset(job);
+        block_job_enter(job);
+    } else {
+        block_job_completed(job, -ECANCELED);
+    }
 }
 
 bool block_job_is_cancelled(BlockJob *job)
@@ -409,14 +504,21 @@ static int block_job_finish_sync(BlockJob *job,
     assert(blk_bs(job->blk)->job == job);
 
     block_job_ref(job);
+
     finish(job, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         block_job_unref(job);
         return -EBUSY;
     }
+    /* block_job_drain calls block_job_enter, and it should be enough to
+     * induce progress until the job completes or moves to the main thread.
+    */
+    while (!job->deferred_to_main_loop && !job->completed) {
+        block_job_drain(job);
+    }
     while (!job->completed) {
-        aio_poll(block_job_get_aio_context(job), true);
+        aio_poll(qemu_get_aio_context(), true);
     }
     ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
     block_job_unref(job);
@@ -490,9 +592,15 @@ void block_job_yield(BlockJob *job)
     block_job_pause_point(job);
 }
 
-BlockJobInfo *block_job_query(BlockJob *job)
+BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 {
-    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    BlockJobInfo *info;
+
+    if (block_job_is_internal(job)) {
+        error_setg(errp, "Cannot query QEMU internal jobs");
+        return NULL;
+    }
+    info = g_new0(BlockJobInfo, 1);
     info->type      = g_strdup(BlockJobType_lookup[job->driver->job_type]);
     info->device    = g_strdup(job->id);
     info->len       = job->len;
@@ -513,8 +621,12 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
     }
 }
 
-void block_job_event_cancelled(BlockJob *job)
+static void block_job_event_cancelled(BlockJob *job)
 {
+    if (block_job_is_internal(job)) {
+        return;
+    }
+
     qapi_event_send_block_job_cancelled(job->driver->job_type,
                                         job->id,
                                         job->len,
@@ -523,8 +635,12 @@ void block_job_event_cancelled(BlockJob *job)
                                         &error_abort);
 }
 
-void block_job_event_completed(BlockJob *job, const char *msg)
+static void block_job_event_completed(BlockJob *job, const char *msg)
 {
+    if (block_job_is_internal(job)) {
+        return;
+    }
+
     qapi_event_send_block_job_completed(job->driver->job_type,
                                         job->id,
                                         job->len,
@@ -539,6 +655,10 @@ void block_job_event_ready(BlockJob *job)
 {
     job->ready = true;
 
+    if (block_job_is_internal(job)) {
+        return;
+    }
+
     qapi_event_send_block_job_ready(job->driver->job_type,
                                     job->id,
                                     job->len,
@@ -569,14 +689,15 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
     default:
         abort();
     }
-    qapi_event_send_block_job_error(job->id,
-                                    is_read ? IO_OPERATION_TYPE_READ :
-                                    IO_OPERATION_TYPE_WRITE,
-                                    action, &error_abort);
+    if (!block_job_is_internal(job)) {
+        qapi_event_send_block_job_error(job->id,
+                                        is_read ? IO_OPERATION_TYPE_READ :
+                                        IO_OPERATION_TYPE_WRITE,
+                                        action, &error_abort);
+    }
     if (action == BLOCK_ERROR_ACTION_STOP) {
         /* make the pause user visible, which will be resumed from QMP. */
-        job->user_paused = true;
-        block_job_pause(job);
+        block_job_user_pause(job);
         block_job_iostatus_set_err(job, error);
     }
     return action;
@@ -584,7 +705,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
 
 typedef struct {
     BlockJob *job;
-    QEMUBH *bh;
     AioContext *aio_context;
     BlockJobDeferToMainLoopFn *fn;
     void *opaque;
@@ -595,8 +715,6 @@ static void block_job_defer_to_main_loop_bh(void *opaque)
     BlockJobDeferToMainLoopData *data = opaque;
     AioContext *aio_context;
 
-    qemu_bh_delete(data->bh);
-
     /* Prevent race with block_job_defer_to_main_loop() */
     aio_context_acquire(data->aio_context);
 
@@ -620,13 +738,13 @@ void block_job_defer_to_main_loop(BlockJob *job,
 {
     BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
     data->job = job;
-    data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
     data->aio_context = blk_get_aio_context(job->blk);
     data->fn = fn;
     data->opaque = opaque;
     job->deferred_to_main_loop = true;
 
-    qemu_bh_schedule(data->bh);
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
+                            block_job_defer_to_main_loop_bh, data);
 }
 
 BlockJobTxn *block_job_txn_new(void)
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 0fb08e405d..714a692e6f 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -67,23 +67,6 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 }
 #endif
 
-/* These are no-ops because we are not threadsafe.  */
-static inline void cpu_exec_start(CPUArchState *env)
-{
-}
-
-static inline void cpu_exec_end(CPUArchState *env)
-{
-}
-
-static inline void start_exclusive(void)
-{
-}
-
-static inline void end_exclusive(void)
-{
-}
-
 void fork_start(void)
 {
 }
@@ -95,14 +78,6 @@ void fork_end(int child)
     }
 }
 
-void cpu_list_lock(void)
-{
-}
-
-void cpu_list_unlock(void)
-{
-}
-
 #ifdef TARGET_I386
 /***********************************************************/
 /* CPUX86 core interface */
@@ -172,7 +147,11 @@ void cpu_loop(CPUX86State *env)
     //target_siginfo_t info;
 
     for(;;) {
+        cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
+        cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case 0x80:
             /* syscall from int $0x80 */
@@ -513,7 +492,10 @@ void cpu_loop(CPUSPARCState *env)
     //target_siginfo_t info;
 
     while (1) {
+        cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
+        cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
 
         switch (trapnr) {
 #ifndef TARGET_SPARC64
@@ -669,7 +651,7 @@ void cpu_loop(CPUSPARCState *env)
 static void usage(void)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
-           ", " QEMU_COPYRIGHT "\n"
+           "\n" QEMU_COPYRIGHT "\n"
            "usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
            "BSD CPU emulator (compiled for %s emulation)\n"
            "\n"
@@ -713,6 +695,16 @@ static void usage(void)
 
 THREAD CPUState *thread_cpu;
 
+bool qemu_cpu_is_self(CPUState *cpu)
+{
+    return thread_cpu == cpu;
+}
+
+void qemu_cpu_kick(CPUState *cpu)
+{
+    cpu_exit(cpu);
+}
+
 /* Assumes contents are already zeroed.  */
 void init_task_state(TaskState *ts)
 {
@@ -748,6 +740,8 @@ int main(int argc, char **argv)
     if (argc <= 1)
         usage();
 
+    module_call_init(MODULE_INIT_TRACE);
+    qemu_init_cpu_list();
     module_call_init(MODULE_INIT_QOM);
 
     if ((envlist = envlist_create()) == NULL) {
@@ -1133,7 +1127,6 @@ int main(int argc, char **argv)
         gdbserver_start (gdbstub_port);
         gdb_handlesig(cpu, 0);
     }
-    trace_init_vcpu_events();
     cpu_loop(env);
     /* never exits */
     return 0;
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 610f91b285..ee5907330f 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -42,6 +42,11 @@ void mmap_unlock(void)
     }
 }
 
+bool have_mmap_lock(void)
+{
+    return mmap_lock_count > 0 ? true : false;
+}
+
 /* Grab lock to make sure things are in a consistent state after fork().  */
 void mmap_fork_start(void)
 {
diff --git a/configure b/configure
index b6b046ab8e..d9bdf609b2 100755
--- a/configure
+++ b/configure
@@ -212,7 +212,6 @@ sdlabi=""
 virtfs=""
 vnc="yes"
 sparse="no"
-uuid=""
 vde=""
 vnc_sasl=""
 vnc_jpeg=""
@@ -229,7 +228,9 @@ xfs=""
 
 vhost_net="no"
 vhost_scsi="no"
+vhost_vsock="no"
 kvm="no"
+colo="yes"
 rdma=""
 gprof="no"
 debug_tcg="no"
@@ -296,6 +297,7 @@ libiscsi=""
 libnfs=""
 coroutine=""
 coroutine_pool=""
+debug_stack_usage="no"
 seccomp=""
 glusterfs=""
 glusterfs_xlator_opt="no"
@@ -316,10 +318,10 @@ vte=""
 virglrenderer=""
 tpm="yes"
 libssh2=""
-vhdx=""
 numa=""
 tcmalloc="no"
 jemalloc="no"
+replication="yes"
 
 # --- [GNU ARM Eclipse] ---
 gnuarmeclipse="no"
@@ -395,7 +397,11 @@ sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
 ARFLAGS="${ARFLAGS-rv}"
 
 # default flags for all hosts
-QEMU_CFLAGS="-fno-strict-aliasing -fno-common $QEMU_CFLAGS"
+# We use -fwrapv to tell the compiler that we require a C dialect where
+# left shift of signed integers is well defined and has the expected
+# 2s-complement style results. (Both clang and gcc agree that it
+# provides these semantics.)
+QEMU_CFLAGS="-fno-strict-aliasing -fno-common -fwrapv $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wall -Wundef -Wwrite-strings -Wmissing-prototypes $QEMU_CFLAGS"
 QEMU_CFLAGS="-Wstrict-prototypes -Wredundant-decls $QEMU_CFLAGS"
 QEMU_CFLAGS="-D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE $QEMU_CFLAGS"
@@ -512,8 +518,6 @@ elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __aarch64__ ; then
   cpu="aarch64"
-elif check_define __hppa__ ; then
-  cpu="hppa"
 else
   cpu=$(uname -m)
 fi
@@ -585,6 +589,8 @@ FreeBSD)
   audio_possible_drivers="oss sdl pa"
   # needed for kinfo_getvmmap(3) in libutil.h
   LIBS="-lutil $LIBS"
+  # needed for kinfo_getproc
+  libs_qga="-lutil $libs_qga"
   netmap=""  # enable netmap autodetect
   HOST_VARIANT_DIR="freebsd"
 ;;
@@ -681,6 +687,7 @@ Haiku)
   kvm="yes"
   vhost_net="yes"
   vhost_scsi="yes"
+  vhost_vsock="yes"
   QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
 ;;
 esac
@@ -933,10 +940,6 @@ for opt do
   ;;
   --disable-slirp) slirp="no"
   ;;
-  --disable-uuid) uuid="no"
-  ;;
-  --enable-uuid) uuid="yes"
-  ;;
   --disable-vde) vde="no"
   ;;
   --enable-vde) vde="yes"
@@ -969,6 +972,10 @@ for opt do
   ;;
   --enable-kvm) kvm="yes"
   ;;
+  --disable-colo) colo="no"
+  ;;
+  --enable-colo) colo="yes"
+  ;;
   --disable-tcg-interpreter) tcg_interpreter="no"
   ;;
   --enable-tcg-interpreter) tcg_interpreter="yes"
@@ -1056,6 +1063,8 @@ for opt do
   ;;
   --enable-coroutine-pool) coroutine_pool="yes"
   ;;
+  --enable-debug-stack-usage) debug_stack_usage="yes"
+  ;;
   --disable-docs) docs="no"
   ;;
   --enable-docs) docs="yes"
@@ -1068,6 +1077,10 @@ for opt do
   ;;
   --enable-vhost-scsi) vhost_scsi="yes"
   ;;
+  --disable-vhost-vsock) vhost_vsock="no"
+  ;;
+  --enable-vhost-vsock) vhost_vsock="yes"
+  ;;
   --disable-opengl) opengl="no"
   ;;
   --enable-opengl) opengl="yes"
@@ -1145,6 +1158,12 @@ for opt do
   --disable-virtio-blk-data-plane|--enable-virtio-blk-data-plane)
       echo "$0: $opt is obsolete, virtio-blk data-plane is always on" >&2
   ;;
+  --enable-vhdx|--disable-vhdx)
+      echo "$0: $opt is obsolete, VHDX driver is always built" >&2
+  ;;
+  --enable-uuid|--disable-uuid)
+      echo "$0: $opt is obsolete, UUID support is always built" >&2
+  ;;
   --disable-gtk) gtk="no"
   ;;
   --enable-gtk) gtk="yes"
@@ -1185,10 +1204,6 @@ for opt do
   ;;
   --enable-libssh2) libssh2="yes"
   ;;
-  --enable-vhdx) vhdx="yes"
-  ;;
-  --disable-vhdx) vhdx="no"
-  ;;
   --disable-numa) numa="no"
   ;;
   --enable-numa) numa="yes"
@@ -1201,6 +1216,10 @@ for opt do
   ;;
   --enable-jemalloc) jemalloc="yes"
   ;;
+  --disable-replication) replication="no"
+  ;;
+  --enable-replication) replication="yes"
+  ;;
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1255,7 +1274,10 @@ case "$cpu" in
            cc_i386='$(CC) -m32'
            ;;
     x86_64)
-           CPU_CFLAGS="-m64"
+           # ??? Only extremely old AMD cpus do not have cmpxchg16b.
+           # If we truly care, we should simply detect this case at
+           # runtime and generate the fallback to serial emulation.
+           CPU_CFLAGS="-m64 -mcx16"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
            ;;
@@ -1402,8 +1424,8 @@ disabled with --disable-FEATURE, default is enabled if available:
   fdt             fdt device tree
   bluez           bluez stack connectivity
   kvm             KVM acceleration support
+  colo            COarse-grain LOck-stepping VM for Non-stop Service
   rdma            RDMA-based migration support
-  uuid            uuid support
   vde             support for vde network
   netmap          support for netmap network
   linux-aio       Linux AIO support
@@ -1427,10 +1449,10 @@ disabled with --disable-FEATURE, default is enabled if available:
   archipelago     Archipelago backend
   tpm             TPM support
   libssh2         ssh block device support
-  vhdx            support for the Microsoft VHDX image format
   numa            libnuma support
   tcmalloc        tcmalloc support
   jemalloc        jemalloc support
+  replication     replication support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -1765,6 +1787,19 @@ if test "$cocoa" = "yes"; then
     sdl=no
 fi
 
+# Some versions of Mac OS X incorrectly define SIZE_MAX
+cat > $TMPC << EOF
+#include <stdint.h>
+#include <stdio.h>
+int main(int argc, char *argv[]) {
+    return printf("%zu", SIZE_MAX);
+}
+EOF
+have_broken_size_max=no
+if ! compile_object -Werror ; then
+    have_broken_size_max=yes
+fi
+
 ##########################################
 # L2TPV3 probe
 
@@ -1839,28 +1874,19 @@ fi
 ##########################################
 # avx2 optimization requirement check
 
-
-if test "$static" = "no" ; then
-  cat > $TMPC << EOF
+cat > $TMPC << EOF
 #pragma GCC push_options
 #pragma GCC target("avx2")
 #include <cpuid.h>
 #include <immintrin.h>
-
 static int bar(void *a) {
-    return _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(__m256i *)a, (__m256i){0}));
+    __m256i x = *(__m256i *)a;
+    return _mm256_testz_si256(x, x);
 }
-static void *bar_ifunc(void) {return (void*) bar;}
-int foo(void *a) __attribute__((ifunc("bar_ifunc")));
-int main(int argc, char *argv[]) { return foo(argv[0]);}
+int main(int argc, char *argv[]) { return bar(argv[0]); }
 EOF
-  if compile_object "" ; then
-      if has readelf; then
-          if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
-              avx2_opt="yes"
-          fi
-      fi
-  fi
+if compile_object "" ; then
+  avx2_opt="yes"
 fi
 
 #########################################
@@ -2004,6 +2030,61 @@ EOF
   # Xen unstable
   elif
       cat > $TMPC <<EOF &&
+/*
+ * If we have stable libs the we don't want the libxc compat
+ * layers, regardless of what CFLAGS we may have been given.
+ *
+ * Also, check if xengnttab_grant_copy_segment_t is defined and
+ * grant copy operation is implemented.
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <xenevtchn.h>
+#include <xengnttab.h>
+#include <xenforeignmemory.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+  xc_interface *xc = NULL;
+  xenforeignmemory_handle *xfmem;
+  xenevtchn_handle *xe;
+  xengnttab_handle *xg;
+  xen_domain_handle_t handle;
+  xengnttab_grant_copy_segment_t* seg = NULL;
+
+  xs_daemon_open();
+
+  xc = xc_interface_open(0, 0, 0);
+  xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+  xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+  xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+  xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+  xc_domain_create(xc, 0, handle, 0, NULL, NULL);
+
+  xfmem = xenforeignmemory_open(0, 0);
+  xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
+
+  xe = xenevtchn_open(0, 0);
+  xenevtchn_fd(xe);
+
+  xg = xengnttab_open(0, 0);
+  xengnttab_grant_copy(xg, 0, seg);
+
+  return 0;
+}
+EOF
+      compile_prog "" "$xen_libs $xen_stable_libs"
+    then
+    xen_ctrl_version=480
+    xen=yes
+  elif
+      cat > $TMPC <<EOF &&
 /*
  * If we have stable libs the we don't want the libxc compat
  * layers, regardless of what CFLAGS we may have been given.
@@ -2718,47 +2799,6 @@ if compile_prog "" "" ; then
    fnmatch="yes"
 fi
 
-##########################################
-# uuid_generate() probe, used for vdi block driver
-# Note that on some systems (notably MacOSX) no extra library
-# need be linked to get the uuid functions.
-if test "$uuid" != "no" ; then
-  uuid_libs="-luuid"
-  cat > $TMPC << EOF
-#include <uuid/uuid.h>
-int main(void)
-{
-    uuid_t my_uuid;
-    uuid_generate(my_uuid);
-    return 0;
-}
-EOF
-  if compile_prog "" "" ; then
-    uuid="yes"
-  elif compile_prog "" "$uuid_libs" ; then
-    uuid="yes"
-    libs_softmmu="$uuid_libs $libs_softmmu"
-    libs_tools="$uuid_libs $libs_tools"
-  else
-    if test "$uuid" = "yes" ; then
-      feature_not_found "uuid" "Install libuuid devel"
-    fi
-    uuid=no
-  fi
-fi
-
-if test "$vhdx" = "yes" ; then
-    if test "$uuid" = "no" ; then
-        error_exit "uuid required for VHDX support"
-    fi
-elif test "$vhdx" != "no" ; then
-    if test "$uuid" = "yes" ; then
-        vhdx=yes
-    else
-        vhdx=no
-    fi
-fi
-
 ##########################################
 # xfsctl() probe, used for raw-posix
 if test "$xfs" != "no" ; then
@@ -2947,25 +2987,41 @@ fi
 # curses probe
 if test "$curses" != "no" ; then
   if test "$mingw32" = "yes" ; then
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
+    curses_inc_list="$($pkg_config --cflags ncurses 2>/dev/null):"
+    curses_lib_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
   else
-    curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
+    curses_inc_list="$($pkg_config --cflags ncursesw 2>/dev/null):-I/usr/include/ncursesw:"
+    curses_lib_list="$($pkg_config --libs ncursesw 2>/dev/null):-lncursesw:-lcursesw"
   fi
   curses_found=no
   cat > $TMPC << EOF
+#include <locale.h>
 #include <curses.h>
+#include <wchar.h>
 int main(void) {
   const char *s = curses_version();
+  wchar_t wch = L'w';
+  setlocale(LC_ALL, "");
   resize_term(0, 0);
+  addwstr(L"wide chars\n");
+  addnwstr(&wch, 1);
+  add_wch(WACS_DEGREE);
   return s != 0;
 }
 EOF
   IFS=:
-  for curses_lib in $curses_list; do
-    unset IFS
-    if compile_prog "" "$curses_lib" ; then
-      curses_found=yes
-      libs_softmmu="$curses_lib $libs_softmmu"
+  for curses_inc in $curses_inc_list; do
+    IFS=:
+    for curses_lib in $curses_lib_list; do
+      unset IFS
+      if compile_prog "$curses_inc" "$curses_lib" ; then
+        curses_found=yes
+        QEMU_CFLAGS="$curses_inc $QEMU_CFLAGS"
+        libs_softmmu="$curses_lib $libs_softmmu"
+        break
+      fi
+    done
+    if test "$curses_found" = yes ; then
       break
     fi
   done
@@ -3037,7 +3093,7 @@ for i in $glib_modules; do
     if $pkg_config --atleast-version=$glib_req_ver $i; then
         glib_cflags=$($pkg_config --cflags $i)
         glib_libs=$($pkg_config --libs $i)
-        CFLAGS="$glib_cflags $CFLAGS"
+        QEMU_CFLAGS="$glib_cflags $QEMU_CFLAGS"
         LIBS="$glib_libs $LIBS"
         libs_qga="$glib_libs $libs_qga"
     else
@@ -3071,7 +3127,7 @@ fi
 
 # g_test_trap_subprocess added in 2.38. Used by some tests.
 glib_subprocess=yes
-if ! $pkg_config --atleast-version=2.38 glib-2.0; then
+if test "$mingw32" = "yes" || ! $pkg_config --atleast-version=2.38 glib-2.0; then
     glib_subprocess=no
 fi
 
@@ -3944,6 +4000,36 @@ if compile_prog "" "" ; then
   setns=yes
 fi
 
+# clock_adjtime probe
+clock_adjtime=no
+cat > $TMPC <<EOF
+#include <time.h>
+
+int main(void)
+{
+    return clock_adjtime(0, 0);
+}
+EOF
+clock_adjtime=no
+if compile_prog "" "" ; then
+  clock_adjtime=yes
+fi
+
+# syncfs probe
+syncfs=no
+cat > $TMPC <<EOF
+#include <unistd.h>
+
+int main(void)
+{
+    return syncfs(0);
+}
+EOF
+syncfs=no
+if compile_prog "" "" ; then
+  syncfs=yes
+fi
+
 # Check if tools are available to build documentation.
 if test "$docs" != "no" ; then
   if has makeinfo && has pod2man; then
@@ -4132,7 +4218,7 @@ EOF
   if compile_prog "$vss_win32_include" "" ; then
     guest_agent_with_vss="yes"
     QEMU_CFLAGS="$QEMU_CFLAGS $vss_win32_include"
-    libs_qga="-lole32 -loleaut32 -lshlwapi -luuid -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga"
+    libs_qga="-lole32 -loleaut32 -lshlwapi -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga"
     qga_vss_provider="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb"
   else
     if test "$vss_win32_sdk" != "" ; then
@@ -4253,6 +4339,18 @@ if compile_prog "" "" ; then
     posix_madvise=yes
 fi
 
+##########################################
+# check if we have posix_syslog
+
+posix_syslog=no
+cat > $TMPC << EOF
+#include <syslog.h>
+int main(void) { openlog("qemu", LOG_PID, LOG_DAEMON); syslog(LOG_INFO, "configure"); return 0; }
+EOF
+if compile_prog "" "" ; then
+    posix_syslog=yes
+fi
+
 ##########################################
 # check if trace backend exists
 
@@ -4269,11 +4367,11 @@ if have_backend "ust"; then
 #include <lttng/tracepoint.h>
 int main(void) { return 0; }
 EOF
-  if compile_prog "" "" ; then
+  if compile_prog "" "-Wl,--no-as-needed -ldl" ; then
     if $pkg_config lttng-ust --exists; then
       lttng_ust_libs=$($pkg_config --libs lttng-ust)
     else
-      lttng_ust_libs="-llttng-ust"
+      lttng_ust_libs="-llttng-ust -ldl"
     fi
     if $pkg_config liburcu-bp --exists; then
       urcu_bp_libs=$($pkg_config --libs liburcu-bp)
@@ -4368,6 +4466,17 @@ if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
   error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
 fi
 
+if test "$debug_stack_usage" = "yes"; then
+  if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then
+    error_exit "stack usage debugging is not supported for $cpu"
+  fi
+  if test "$coroutine_pool" = "yes"; then
+    echo "WARN: disabling coroutine pool for stack usage debugging"
+    coroutine_pool=no
+  fi
+fi
+
+
 ##########################################
 # check if we have open_by_handle_at
 
@@ -4501,6 +4610,55 @@ if compile_prog "" "" ; then
     int128=yes
 fi
 
+#########################################
+# See if 128-bit atomic operations are supported.
+
+atomic128=no
+if test "$int128" = "yes"; then
+  cat > $TMPC << EOF
+int main(void)
+{
+  unsigned __int128 x = 0, y = 0;
+  y = __atomic_load_16(&x, 0);
+  __atomic_store_16(&x, y, 0);
+  __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0);
+  return 0;
+}
+EOF
+  if compile_prog "" "" ; then
+    atomic128=yes
+  fi
+fi
+
+#########################################
+# See if 64-bit atomic operations are supported.
+# Note that without __atomic builtins, we can only
+# assume atomic loads/stores max at pointer size.
+
+cat > $TMPC << EOF
+#include <stdint.h>
+int main(void)
+{
+  uint64_t x = 0, y = 0;
+#ifdef __ATOMIC_RELAXED
+  y = __atomic_load_8(&x, 0);
+  __atomic_store_8(&x, y, 0);
+  __atomic_compare_exchange_8(&x, &y, x, 0, 0, 0);
+  __atomic_exchange_8(&x, y, 0);
+  __atomic_fetch_add_8(&x, y, 0);
+#else
+  typedef char is_host64[sizeof(void *) >= sizeof(uint64_t) ? 1 : -1];
+  __sync_lock_test_and_set(&x, y);
+  __sync_val_compare_and_swap(&x, y, 0);
+  __sync_fetch_and_add(&x, y);
+#endif
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+  atomic64=yes
+fi
+
 ########################################
 # check if getauxval is available.
 
@@ -4585,6 +4743,33 @@ if compile_prog "" "" ; then
     have_rtnetlink=yes
 fi
 
+##########################################
+# check for usable AF_VSOCK environment
+have_af_vsock=no
+cat > $TMPC << EOF
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#if !defined(AF_VSOCK)
+# error missing AF_VSOCK flag
+#endif
+#include <linux/vm_sockets.h>
+int main(void) {
+    int sock, ret;
+    struct sockaddr_vm svm;
+    socklen_t len = sizeof(svm);
+    sock = socket(AF_VSOCK, SOCK_STREAM, 0);
+    ret = getpeername(sock, (struct sockaddr *)&svm, &len);
+    if ((ret == -1) && (errno == ENOTCONN)) {
+        return 0;
+    }
+    return -1;
+}
+EOF
+if compile_prog "" "" ; then
+    have_af_vsock=yes
+fi
+
 #################################################
 # Sparc implicitly links with --relax, which is
 # incompatible with -r, so --no-relax should be
@@ -4600,8 +4785,14 @@ EOF
 if ! compile_object ""; then
   error_exit "Failed to compile object file for LD_REL_FLAGS test"
 fi
-if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
-  LD_REL_FLAGS="-Wl,--no-relax"
+for i in '-Wl,-r -Wl,--no-relax' -Wl,-r -r; do
+  if do_cc -nostdlib $i -o $TMPMO $TMPO; then
+    LD_REL_FLAGS=$i
+    break
+  fi
+done
+if test "$modules" = "yes" && test "$LD_REL_FLAGS" = ""; then
+  feature_not_found "modules" "Cannot find how to build relocatable objects"
 fi
 
 ##########################################
@@ -4623,7 +4814,6 @@ if test "$libnfs" != "no" ; then
   if $pkg_config --atleast-version=1.9.3 libnfs; then
     libnfs="yes"
     libnfs_libs=$($pkg_config --libs libnfs)
-    LIBS="$LIBS $libnfs_libs"
   else
     if test "$libnfs" = "yes" ; then
       feature_not_found "libnfs" "Install libnfs devel >= 1.9.3"
@@ -4922,6 +5112,7 @@ echo "Linux AIO support $linux_aio"
 echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
+echo "COLO support      $colo"
 echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
@@ -4929,10 +5120,10 @@ echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
 echo "madvise           $madvise"
 echo "posix_madvise     $posix_madvise"
-echo "uuid support      $uuid"
 echo "libcap-ng support $cap_ng"
 echo "vhost-net support $vhost_net"
 echo "vhost-scsi support $vhost_scsi"
+echo "vhost-vsock support $vhost_vsock"
 echo "Trace backends    $trace_backends"
 if have_backend "simple"; then
 echo "Trace output file $trace_file-<pid>"
@@ -4954,6 +5145,7 @@ echo "QGA MSI support   $guest_agent_msi"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine"
 echo "coroutine pool    $coroutine_pool"
+echo "debug stack usage $debug_stack_usage"
 echo "GlusterFS support $glusterfs"
 echo "Archipelago support $archipelago"
 echo "gcov              $gcov_tool"
@@ -4962,7 +5154,6 @@ echo "TPM support       $tpm"
 echo "libssh2 support   $libssh2"
 echo "TPM passthrough   $tpm_passthrough"
 echo "QOM debugging     $qom_cast_debug"
-echo "vhdx              $vhdx"
 echo "lzo support       $lzo"
 echo "snappy support    $snappy"
 echo "bzip2 support     $bzip2"
@@ -4970,6 +5161,7 @@ echo "NUMA host support $numa"
 echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
+echo "replication support $replication"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5118,9 +5310,6 @@ fi
 if test "$fnmatch" = "yes" ; then
   echo "CONFIG_FNMATCH=y" >> $config_host_mak
 fi
-if test "$uuid" = "yes" ; then
-  echo "CONFIG_UUID=y" >> $config_host_mak
-fi
 if test "$xfs" = "yes" ; then
   echo "CONFIG_XFS=y" >> $config_host_mak
 fi
@@ -5212,6 +5401,12 @@ fi
 if test "$setns" = "yes" ; then
   echo "CONFIG_SETNS=y" >> $config_host_mak
 fi
+if test "$clock_adjtime" = "yes" ; then
+  echo "CONFIG_CLOCK_ADJTIME=y" >> $config_host_mak
+fi
+if test "$syncfs" = "yes" ; then
+  echo "CONFIG_SYNCFS=y" >> $config_host_mak
+fi
 if test "$inotify" = "yes" ; then
   echo "CONFIG_INOTIFY=y" >> $config_host_mak
 fi
@@ -5239,7 +5434,6 @@ fi
 if test "$glib_subprocess" = "yes" ; then
   echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
 fi
-echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
 if test "$gtk" = "yes" ; then
   echo "CONFIG_GTK=y" >> $config_host_mak
   echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
@@ -5275,6 +5469,9 @@ fi
 if test "$have_ifaddrs_h" = "yes" ; then
     echo "HAVE_IFADDRS_H=y" >> $config_host_mak
 fi
+if test "$have_broken_size_max" = "yes" ; then
+    echo "HAVE_BROKEN_SIZE_MAX=y" >> $config_host_mak
+fi
 
 # Work around a system header bug with some kernel/XFS header
 # versions where they both try to define 'struct fsxattr':
@@ -5317,6 +5514,9 @@ fi
 if test "$vhost_net" = "yes" ; then
   echo "CONFIG_VHOST_NET_USED=y" >> $config_host_mak
 fi
+if test "$vhost_vsock" = "yes" ; then
+  echo "CONFIG_VHOST_VSOCK=y" >> $config_host_mak
+fi
 if test "$blobs" = "yes" ; then
   echo "INSTALL_BLOBS=yes" >> $config_host_mak
 fi
@@ -5394,7 +5594,8 @@ if test "$libiscsi" = "yes" ; then
 fi
 
 if test "$libnfs" = "yes" ; then
-  echo "CONFIG_LIBNFS=y" >> $config_host_mak
+  echo "CONFIG_LIBNFS=m" >> $config_host_mak
+  echo "LIBNFS_LIBS=$libnfs_libs" >> $config_host_mak
 fi
 
 if test "$seccomp" = "yes"; then
@@ -5425,6 +5626,10 @@ else
   echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
 fi
 
+if test "$debug_stack_usage" = "yes" ; then
+  echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
+fi
+
 if test "$open_by_handle_at" = "yes" ; then
   echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
 fi
@@ -5453,6 +5658,14 @@ if test "$int128" = "yes" ; then
   echo "CONFIG_INT128=y" >> $config_host_mak
 fi
 
+if test "$atomic128" = "yes" ; then
+  echo "CONFIG_ATOMIC128=y" >> $config_host_mak
+fi
+
+if test "$atomic64" = "yes" ; then
+  echo "CONFIG_ATOMIC64=y" >> $config_host_mak
+fi
+
 if test "$getauxval" = "yes" ; then
   echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
 fi
@@ -5486,10 +5699,6 @@ if test "$libssh2" = "yes" ; then
   echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
 fi
 
-if test "$vhdx" = "yes" ; then
-  echo "CONFIG_VHDX=y" >> $config_host_mak
-fi
-
 # USB host support
 if test "$libusb" = "yes"; then
   echo "HOST_USB=libusb legacy" >> $config_host_mak
@@ -5533,8 +5742,19 @@ if have_backend "ftrace"; then
     feature_not_found "ftrace(trace backend)" "ftrace requires Linux"
   fi
 fi
+if have_backend "syslog"; then
+  if test "$posix_syslog" = "yes" ; then
+    echo "CONFIG_TRACE_SYSLOG=y" >> $config_host_mak
+  else
+    feature_not_found "syslog(trace backend)" "syslog not available"
+  fi
+fi
 echo "CONFIG_TRACE_FILE=$trace_file" >> $config_host_mak
 
+if test "$colo" = "yes"; then
+  echo "CONFIG_COLO=y" >> $config_host_mak
+fi
+
 if test "$rdma" = "yes" ; then
   echo "CONFIG_RDMA=y" >> $config_host_mak
 fi
@@ -5543,6 +5763,14 @@ if test "$have_rtnetlink" = "yes" ; then
   echo "CONFIG_RTNETLINK=y" >> $config_host_mak
 fi
 
+if test "$replication" = "yes" ; then
+  echo "CONFIG_REPLICATION=y" >> $config_host_mak
+fi
+
+if test "$have_af_vsock" = "yes" ; then
+  echo "CONFIG_AF_VSOCK=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 #                                     a thread we have a handle to
@@ -5936,9 +6164,6 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
   cris)
     disas_config "CRIS"
   ;;
-  hppa)
-    disas_config "HPPA"
-  ;;
   i386|x86_64|x32)
     disas_config "I386"
   ;;
@@ -6062,6 +6287,7 @@ FILES="$FILES roms/seabios/Makefile roms/vgabios/Makefile"
 FILES="$FILES pc-bios/qemu-icon.bmp"
 for bios_file in \
     $source_path/pc-bios/*.bin \
+    $source_path/pc-bios/*.lid \
     $source_path/pc-bios/*.aml \
     $source_path/pc-bios/*.rom \
     $source_path/pc-bios/*.dtb \
diff --git a/cpu-exec-common.c b/cpu-exec-common.c
index 0cb4ae60ef..767d9c6f0c 100644
--- a/cpu-exec-common.c
+++ b/cpu-exec-common.c
@@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
     }
     siglongjmp(cpu->jmp_env, 1);
 }
+
+void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
+{
+    cpu->exception_index = EXCP_ATOMIC;
+    cpu_loop_exit_restore(cpu, pc);
+}
diff --git a/cpu-exec.c b/cpu-exec.c
index f7b0f7cef9..d42477dbfc 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -152,22 +152,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
     uint8_t *tb_ptr = itb->tc_ptr;
 
     qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
-                           "Trace %p [" TARGET_FMT_lx "] %s\n",
-                           itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
+                           "Trace %p [%d: " TARGET_FMT_lx "] %s\n",
+                           itb->tc_ptr, cpu->cpu_index, itb->pc,
+                           lookup_symbol(itb->pc));
 
 #if defined(DEBUG_DISAS)
-    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
+    if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
+        && qemu_log_in_addr_range(itb->pc)) {
+        qemu_log_lock();
 #if defined(TARGET_I386)
         log_cpu_state(cpu, CPU_DUMP_CCOP);
-#elif defined(TARGET_M68K)
-        /* ??? Should not modify env state for dumping.  */
-        cpu_m68k_flush_flags(env, env->cc_op);
-        env->cc_op = CC_OP_FLAGS;
-        env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
-        log_cpu_state(cpu, 0);
 #else
         log_cpu_state(cpu, 0);
 #endif
+        qemu_log_unlock();
     }
 #endif /* DEBUG_DISAS */
 
@@ -200,7 +198,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
         /* We were asked to stop executing TBs (probably a pending
          * interrupt. We've now stopped, so clear the flag.
          */
-        cpu->tcg_exit_req = 0;
+        atomic_set(&cpu->tcg_exit_req, 0);
     }
     return ret;
 }
@@ -212,28 +210,60 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
                              TranslationBlock *orig_tb, bool ignore_icount)
 {
     TranslationBlock *tb;
-    bool old_tb_flushed;
 
     /* Should never happen.
        We only end up here when an existing TB is too long.  */
     if (max_cycles > CF_COUNT_MASK)
         max_cycles = CF_COUNT_MASK;
 
-    old_tb_flushed = cpu->tb_flushed;
-    cpu->tb_flushed = false;
+    tb_lock();
     tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
                      max_cycles | CF_NOCACHE
                          | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
-    tb->orig_tb = cpu->tb_flushed ? NULL : orig_tb;
-    cpu->tb_flushed |= old_tb_flushed;
+    tb->orig_tb = orig_tb;
+    tb_unlock();
+
     /* execute the generated code */
     trace_exec_tb_nocache(tb, tb->pc);
     cpu_tb_exec(cpu, tb);
+
+    tb_lock();
     tb_phys_invalidate(tb, -1);
     tb_free(tb);
+    tb_unlock();
 }
 #endif
 
+static void cpu_exec_step(CPUState *cpu)
+{
+    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
+    TranslationBlock *tb;
+    target_ulong cs_base, pc;
+    uint32_t flags;
+
+    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+    tb = tb_gen_code(cpu, pc, cs_base, flags,
+                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
+    tb->orig_tb = NULL;
+    /* execute the generated code */
+    trace_exec_tb_nocache(tb, pc);
+    cpu_tb_exec(cpu, tb);
+    tb_phys_invalidate(tb, -1);
+    tb_free(tb);
+}
+
+void cpu_exec_step_atomic(CPUState *cpu)
+{
+    start_exclusive();
+
+    /* Since we got here, we know that parallel_cpus must be true.  */
+    parallel_cpus = false;
+    cpu_exec_step(cpu);
+    parallel_cpus = true;
+
+    end_exclusive();
+}
+
 struct tb_desc {
     target_ulong pc;
     target_ulong cs_base;
@@ -250,7 +280,8 @@ static bool tb_cmp(const void *p, const void *d)
     if (tb->pc == desc->pc &&
         tb->page_addr[0] == desc->phys_page1 &&
         tb->cs_base == desc->cs_base &&
-        tb->flags == desc->flags) {
+        tb->flags == desc->flags &&
+        !atomic_read(&tb->invalid)) {
         /* check next page if needed */
         if (tb->page_addr[1] == -1) {
             return true;
@@ -268,7 +299,7 @@ static bool tb_cmp(const void *p, const void *d)
     return false;
 }
 
-static TranslationBlock *tb_find_physical(CPUState *cpu,
+static TranslationBlock *tb_htable_lookup(CPUState *cpu,
                                           target_ulong pc,
                                           target_ulong cs_base,
                                           uint32_t flags)
@@ -287,72 +318,48 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
     return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
 }
 
-static TranslationBlock *tb_find_slow(CPUState *cpu,
-                                      target_ulong pc,
-                                      target_ulong cs_base,
-                                      uint32_t flags)
-{
-    TranslationBlock *tb;
-
-    tb = tb_find_physical(cpu, pc, cs_base, flags);
-    if (tb) {
-        goto found;
-    }
-
-#ifdef CONFIG_USER_ONLY
-    /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
-     * taken outside tb_lock.  Since we're momentarily dropping
-     * tb_lock, there's a chance that our desired tb has been
-     * translated.
-     */
-    tb_unlock();
-    mmap_lock();
-    tb_lock();
-    tb = tb_find_physical(cpu, pc, cs_base, flags);
-    if (tb) {
-        mmap_unlock();
-        goto found;
-    }
-#endif
-
-    /* if no translated code available, then translate it now */
-    tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
-
-#ifdef CONFIG_USER_ONLY
-    mmap_unlock();
-#endif
-
-found:
-    /* we add the TB in the virtual pc hash table */
-    cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
-    return tb;
-}
-
-static inline TranslationBlock *tb_find_fast(CPUState *cpu,
-                                             TranslationBlock **last_tb,
-                                             int tb_exit)
+static inline TranslationBlock *tb_find(CPUState *cpu,
+                                        TranslationBlock *last_tb,
+                                        int tb_exit)
 {
     CPUArchState *env = (CPUArchState *)cpu->env_ptr;
     TranslationBlock *tb;
     target_ulong cs_base, pc;
     uint32_t flags;
+    bool have_tb_lock = false;
 
     /* we record a subset of the CPU state. It will
        always be the same before a given translated block
        is executed. */
     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
-    tb_lock();
-    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
+    tb = atomic_rcu_read(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
     if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
                  tb->flags != flags)) {
-        tb = tb_find_slow(cpu, pc, cs_base, flags);
-    }
-    if (cpu->tb_flushed) {
-        /* Ensure that no TB jump will be modified as the
-         * translation buffer has been flushed.
-         */
-        *last_tb = NULL;
-        cpu->tb_flushed = false;
+        tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+        if (!tb) {
+
+            /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
+             * taken outside tb_lock. As system emulation is currently
+             * single threaded the locks are NOPs.
+             */
+            mmap_lock();
+            tb_lock();
+            have_tb_lock = true;
+
+            /* There's a chance that our desired tb has been translated while
+             * taking the locks so we check again inside the lock.
+             */
+            tb = tb_htable_lookup(cpu, pc, cs_base, flags);
+            if (!tb) {
+                /* if no translated code available, then translate it now */
+                tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
+            }
+
+            mmap_unlock();
+        }
+
+        /* We add the TB in the virtual pc hash table for the fast lookup */
+        atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
     }
 #ifndef CONFIG_USER_ONLY
     /* We don't take care of direct jumps when address mapping changes in
@@ -360,14 +367,22 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu,
      * spanning two pages because the mapping for the second page can change.
      */
     if (tb->page_addr[1] != -1) {
-        *last_tb = NULL;
+        last_tb = NULL;
     }
 #endif
     /* See if we can patch the calling TB. */
-    if (*last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
-        tb_add_jump(*last_tb, tb_exit, tb);
+    if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+        if (!have_tb_lock) {
+            tb_lock();
+            have_tb_lock = true;
+        }
+        if (!tb->invalid) {
+            tb_add_jump(last_tb, tb_exit, tb);
+        }
+    }
+    if (have_tb_lock) {
+        tb_unlock();
     }
-    tb_unlock();
     return tb;
 }
 
@@ -446,8 +461,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
     } else if (replay_has_exception()
                && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
         /* try to cause an exception pending in the log */
-        TranslationBlock *last_tb = NULL; /* Avoid chaining TBs */
-        cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, &last_tb, 0), true);
+        cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
         *ret = -1;
         return true;
 #endif
@@ -518,8 +532,8 @@ static inline void cpu_handle_interrupt(CPUState *cpu,
             *last_tb = NULL;
         }
     }
-    if (unlikely(cpu->exit_request || replay_has_interrupt())) {
-        cpu->exit_request = 0;
+    if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
+        atomic_set(&cpu->exit_request, 0);
         cpu->exception_index = EXCP_INTERRUPT;
         cpu_loop_exit(cpu);
     }
@@ -531,7 +545,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 {
     uintptr_t ret;
 
-    if (unlikely(cpu->exit_request)) {
+    if (unlikely(atomic_read(&cpu->exit_request))) {
         return;
     }
 
@@ -627,10 +641,9 @@ int cpu_exec(CPUState *cpu)
                 break;
             }
 
-            cpu->tb_flushed = false; /* reset before first TB lookup */
             for(;;) {
                 cpu_handle_interrupt(cpu, &last_tb);
-                tb = tb_find_fast(cpu, &last_tb, tb_exit);
+                tb = tb_find(cpu, last_tb, tb_exit);
                 cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
                 /* Try to align the host and virtual clocks
                    if the guest is in advance */
diff --git a/cpus-common.c b/cpus-common.c
new file mode 100644
index 0000000000..59f751ecf9
--- /dev/null
+++ b/cpus-common.c
@@ -0,0 +1,353 @@
+/*
+ * CPU thread main loop - common bits for user and system mode emulation
+ *
+ *  Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "exec/cpu-common.h"
+#include "qom/cpu.h"
+#include "sysemu/cpus.h"
+
+static QemuMutex qemu_cpu_list_lock;
+static QemuCond exclusive_cond;
+static QemuCond exclusive_resume;
+static QemuCond qemu_work_cond;
+
+/* >= 1 if a thread is inside start_exclusive/end_exclusive.  Written
+ * under qemu_cpu_list_lock, read with atomic operations.
+ */
+static int pending_cpus;
+
+void qemu_init_cpu_list(void)
+{
+    /* This is needed because qemu_init_cpu_list is also called by the
+     * child process in a fork.  */
+    pending_cpus = 0;
+
+    qemu_mutex_init(&qemu_cpu_list_lock);
+    qemu_cond_init(&exclusive_cond);
+    qemu_cond_init(&exclusive_resume);
+    qemu_cond_init(&qemu_work_cond);
+}
+
+void cpu_list_lock(void)
+{
+    qemu_mutex_lock(&qemu_cpu_list_lock);
+}
+
+void cpu_list_unlock(void)
+{
+    qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+static bool cpu_index_auto_assigned;
+
+static int cpu_get_free_index(void)
+{
+    CPUState *some_cpu;
+    int cpu_index = 0;
+
+    cpu_index_auto_assigned = true;
+    CPU_FOREACH(some_cpu) {
+        cpu_index++;
+    }
+    return cpu_index;
+}
+
+static void finish_safe_work(CPUState *cpu)
+{
+    cpu_exec_start(cpu);
+    cpu_exec_end(cpu);
+}
+
+void cpu_list_add(CPUState *cpu)
+{
+    qemu_mutex_lock(&qemu_cpu_list_lock);
+    if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
+        cpu->cpu_index = cpu_get_free_index();
+        assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
+    } else {
+        assert(!cpu_index_auto_assigned);
+    }
+    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
+    qemu_mutex_unlock(&qemu_cpu_list_lock);
+
+    finish_safe_work(cpu);
+}
+
+void cpu_list_remove(CPUState *cpu)
+{
+    qemu_mutex_lock(&qemu_cpu_list_lock);
+    if (!QTAILQ_IN_USE(cpu, node)) {
+        /* there is nothing to undo since cpu_exec_init() hasn't been called */
+        qemu_mutex_unlock(&qemu_cpu_list_lock);
+        return;
+    }
+
+    assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
+
+    QTAILQ_REMOVE(&cpus, cpu, node);
+    cpu->cpu_index = UNASSIGNED_CPU_INDEX;
+    qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+struct qemu_work_item {
+    struct qemu_work_item *next;
+    run_on_cpu_func func;
+    run_on_cpu_data data;
+    bool free, exclusive, done;
+};
+
+static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+{
+    qemu_mutex_lock(&cpu->work_mutex);
+    if (cpu->queued_work_first == NULL) {
+        cpu->queued_work_first = wi;
+    } else {
+        cpu->queued_work_last->next = wi;
+    }
+    cpu->queued_work_last = wi;
+    wi->next = NULL;
+    wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
+
+    qemu_cpu_kick(cpu);
+}
+
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
+                   QemuMutex *mutex)
+{
+    struct qemu_work_item wi;
+
+    if (qemu_cpu_is_self(cpu)) {
+        func(cpu, data);
+        return;
+    }
+
+    wi.func = func;
+    wi.data = data;
+    wi.done = false;
+    wi.free = false;
+    wi.exclusive = false;
+
+    queue_work_on_cpu(cpu, &wi);
+    while (!atomic_mb_read(&wi.done)) {
+        CPUState *self_cpu = current_cpu;
+
+        qemu_cond_wait(&qemu_work_cond, mutex);
+        current_cpu = self_cpu;
+    }
+}
+
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
+{
+    struct qemu_work_item *wi;
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+
+    queue_work_on_cpu(cpu, wi);
+}
+
+/* Wait for pending exclusive operations to complete.  The CPU list lock
+   must be held.  */
+static inline void exclusive_idle(void)
+{
+    while (pending_cpus) {
+        qemu_cond_wait(&exclusive_resume, &qemu_cpu_list_lock);
+    }
+}
+
+/* Start an exclusive operation.
+   Must only be called from outside cpu_exec.  */
+void start_exclusive(void)
+{
+    CPUState *other_cpu;
+    int running_cpus;
+
+    qemu_mutex_lock(&qemu_cpu_list_lock);
+    exclusive_idle();
+
+    /* Make all other cpus stop executing.  */
+    atomic_set(&pending_cpus, 1);
+
+    /* Write pending_cpus before reading other_cpu->running.  */
+    smp_mb();
+    running_cpus = 0;
+    CPU_FOREACH(other_cpu) {
+        if (atomic_read(&other_cpu->running)) {
+            other_cpu->has_waiter = true;
+            running_cpus++;
+            qemu_cpu_kick(other_cpu);
+        }
+    }
+
+    atomic_set(&pending_cpus, running_cpus + 1);
+    while (pending_cpus > 1) {
+        qemu_cond_wait(&exclusive_cond, &qemu_cpu_list_lock);
+    }
+
+    /* Can release mutex, no one will enter another exclusive
+     * section until end_exclusive resets pending_cpus to 0.
+     */
+    qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+/* Finish an exclusive operation.  */
+void end_exclusive(void)
+{
+    qemu_mutex_lock(&qemu_cpu_list_lock);
+    atomic_set(&pending_cpus, 0);
+    qemu_cond_broadcast(&exclusive_resume);
+    qemu_mutex_unlock(&qemu_cpu_list_lock);
+}
+
+/* Wait for exclusive ops to finish, and begin cpu execution.  */
+void cpu_exec_start(CPUState *cpu)
+{
+    atomic_set(&cpu->running, true);
+
+    /* Write cpu->running before reading pending_cpus.  */
+    smp_mb();
+
+    /* 1. start_exclusive saw cpu->running == true and pending_cpus >= 1.
+     * After taking the lock we'll see cpu->has_waiter == true and run---not
+     * for long because start_exclusive kicked us.  cpu_exec_end will
+     * decrement pending_cpus and signal the waiter.
+     *
+     * 2. start_exclusive saw cpu->running == false but pending_cpus >= 1.
+     * This includes the case when an exclusive item is running now.
+     * Then we'll see cpu->has_waiter == false and wait for the item to
+     * complete.
+     *
+     * 3. pending_cpus == 0.  Then start_exclusive is definitely going to
+     * see cpu->running == true, and it will kick the CPU.
+     */
+    if (unlikely(atomic_read(&pending_cpus))) {
+        qemu_mutex_lock(&qemu_cpu_list_lock);
+        if (!cpu->has_waiter) {
+            /* Not counted in pending_cpus, let the exclusive item
+             * run.  Since we have the lock, just set cpu->running to true
+             * while holding it; no need to check pending_cpus again.
+             */
+            atomic_set(&cpu->running, false);
+            exclusive_idle();
+            /* Now pending_cpus is zero.  */
+            atomic_set(&cpu->running, true);
+        } else {
+            /* Counted in pending_cpus, go ahead and release the
+             * waiter at cpu_exec_end.
+             */
+        }
+        qemu_mutex_unlock(&qemu_cpu_list_lock);
+    }
+}
+
+/* Mark cpu as not executing, and release pending exclusive ops.  */
+void cpu_exec_end(CPUState *cpu)
+{
+    atomic_set(&cpu->running, false);
+
+    /* Write cpu->running before reading pending_cpus.  */
+    smp_mb();
+
+    /* 1. start_exclusive saw cpu->running == true.  Then it will increment
+     * pending_cpus and wait for exclusive_cond.  After taking the lock
+     * we'll see cpu->has_waiter == true.
+     *
+     * 2. start_exclusive saw cpu->running == false but here pending_cpus >= 1.
+     * This includes the case when an exclusive item started after setting
+     * cpu->running to false and before we read pending_cpus.  Then we'll see
+     * cpu->has_waiter == false and not touch pending_cpus.  The next call to
+     * cpu_exec_start will run exclusive_idle if still necessary, thus waiting
+     * for the item to complete.
+     *
+     * 3. pending_cpus == 0.  Then start_exclusive is definitely going to
+     * see cpu->running == false, and it can ignore this CPU until the
+     * next cpu_exec_start.
+     */
+    if (unlikely(atomic_read(&pending_cpus))) {
+        qemu_mutex_lock(&qemu_cpu_list_lock);
+        if (cpu->has_waiter) {
+            cpu->has_waiter = false;
+            atomic_set(&pending_cpus, pending_cpus - 1);
+            if (pending_cpus == 1) {
+                qemu_cond_signal(&exclusive_cond);
+            }
+        }
+        qemu_mutex_unlock(&qemu_cpu_list_lock);
+    }
+}
+
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
+                           run_on_cpu_data data)
+{
+    struct qemu_work_item *wi;
+
+    wi = g_malloc0(sizeof(struct qemu_work_item));
+    wi->func = func;
+    wi->data = data;
+    wi->free = true;
+    wi->exclusive = true;
+
+    queue_work_on_cpu(cpu, wi);
+}
+
+void process_queued_cpu_work(CPUState *cpu)
+{
+    struct qemu_work_item *wi;
+
+    if (cpu->queued_work_first == NULL) {
+        return;
+    }
+
+    qemu_mutex_lock(&cpu->work_mutex);
+    while (cpu->queued_work_first != NULL) {
+        wi = cpu->queued_work_first;
+        cpu->queued_work_first = wi->next;
+        if (!cpu->queued_work_first) {
+            cpu->queued_work_last = NULL;
+        }
+        qemu_mutex_unlock(&cpu->work_mutex);
+        if (wi->exclusive) {
+            /* Running work items outside the BQL avoids the following deadlock:
+             * 1) start_exclusive() is called with the BQL taken while another
+             * CPU is running; 2) cpu_exec in the other CPU tries to takes the
+             * BQL, so it goes to sleep; start_exclusive() is sleeping too, so
+             * neither CPU can proceed.
+             */
+            qemu_mutex_unlock_iothread();
+            start_exclusive();
+            wi->func(cpu, wi->data);
+            end_exclusive();
+            qemu_mutex_lock_iothread();
+        } else {
+            wi->func(cpu, wi->data);
+        }
+        qemu_mutex_lock(&cpu->work_mutex);
+        if (wi->free) {
+            g_free(wi);
+        } else {
+            atomic_mb_set(&wi->done, true);
+        }
+    }
+    qemu_mutex_unlock(&cpu->work_mutex);
+    qemu_cond_broadcast(&qemu_work_cond);
+}
diff --git a/cpus.c b/cpus.c
index 84c3520d44..5213351c6d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -69,7 +69,6 @@
 
 #endif /* CONFIG_LINUX */
 
-static CPUState *next_cpu;
 int64_t max_delay;
 int64_t max_advance;
 
@@ -191,8 +190,12 @@ int64_t cpu_icount_to_ns(int64_t icount)
     return icount << icount_time_shift;
 }
 
-/* return the host CPU cycle counter and handle stop/restart */
-/* Caller must hold the BQL */
+/* return the time elapsed in VM between vm_start and vm_stop.  Unless
+ * icount is active, cpu_get_ticks() uses units of the host CPU cycle
+ * counter.
+ *
+ * Caller must hold the BQL
+ */
 int64_t cpu_get_ticks(void)
 {
     int64_t ticks;
@@ -219,17 +222,19 @@ int64_t cpu_get_ticks(void)
 
 static int64_t cpu_get_clock_locked(void)
 {
-    int64_t ticks;
+    int64_t time;
 
-    ticks = timers_state.cpu_clock_offset;
+    time = timers_state.cpu_clock_offset;
     if (timers_state.cpu_ticks_enabled) {
-        ticks += get_clock();
+        time += get_clock();
     }
 
-    return ticks;
+    return time;
 }
 
-/* return the host CPU monotonic timer and handle stop/restart */
+/* Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
 int64_t cpu_get_clock(void)
 {
     int64_t ti;
@@ -244,7 +249,7 @@ int64_t cpu_get_clock(void)
 }
 
 /* enable cpu_get_ticks()
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
  */
 void cpu_enable_ticks(void)
 {
@@ -260,7 +265,7 @@ void cpu_enable_ticks(void)
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
  * cpu_get_ticks() after that.
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
  */
 void cpu_disable_ticks(void)
 {
@@ -551,9 +556,8 @@ static const VMStateDescription vmstate_timers = {
     }
 };
 
-static void cpu_throttle_thread(void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
-    CPUState *cpu = opaque;
     double pct;
     double throttle_ratio;
     long sleeptime_ns;
@@ -583,7 +587,8 @@ static void cpu_throttle_timer_tick(void *opaque)
     }
     CPU_FOREACH(cpu) {
         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
-            async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+            async_run_on_cpu(cpu, cpu_throttle_thread,
+                             RUN_ON_CPU_NULL);
         }
     }
 
@@ -745,7 +750,8 @@ static int do_vm_stop(RunState state)
     }
 
     bdrv_drain_all();
-    ret = blk_flush_all();
+    replay_disable_events();
+    ret = bdrv_flush_all();
 
     return ret;
 }
@@ -897,79 +903,21 @@ static QemuThread io_thread;
 static QemuCond qemu_cpu_cond;
 /* system init */
 static QemuCond qemu_pause_cond;
-static QemuCond qemu_work_cond;
 
 void qemu_init_cpu_loop(void)
 {
     qemu_init_sigbus();
     qemu_cond_init(&qemu_cpu_cond);
     qemu_cond_init(&qemu_pause_cond);
-    qemu_cond_init(&qemu_work_cond);
     qemu_cond_init(&qemu_io_proceeded_cond);
     qemu_mutex_init(&qemu_global_mutex);
 
     qemu_thread_get_self(&io_thread);
 }
 
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
-{
-    struct qemu_work_item wi;
-
-    if (qemu_cpu_is_self(cpu)) {
-        func(data);
-        return;
-    }
-
-    wi.func = func;
-    wi.data = data;
-    wi.free = false;
-
-    qemu_mutex_lock(&cpu->work_mutex);
-    if (cpu->queued_work_first == NULL) {
-        cpu->queued_work_first = &wi;
-    } else {
-        cpu->queued_work_last->next = &wi;
-    }
-    cpu->queued_work_last = &wi;
-    wi.next = NULL;
-    wi.done = false;
-    qemu_mutex_unlock(&cpu->work_mutex);
-
-    qemu_cpu_kick(cpu);
-    while (!atomic_mb_read(&wi.done)) {
-        CPUState *self_cpu = current_cpu;
-
-        qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
-        current_cpu = self_cpu;
-    }
-}
-
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
-    struct qemu_work_item *wi;
-
-    if (qemu_cpu_is_self(cpu)) {
-        func(data);
-        return;
-    }
-
-    wi = g_malloc0(sizeof(struct qemu_work_item));
-    wi->func = func;
-    wi->data = data;
-    wi->free = true;
-
-    qemu_mutex_lock(&cpu->work_mutex);
-    if (cpu->queued_work_first == NULL) {
-        cpu->queued_work_first = wi;
-    } else {
-        cpu->queued_work_last->next = wi;
-    }
-    cpu->queued_work_last = wi;
-    wi->next = NULL;
-    wi->done = false;
-    qemu_mutex_unlock(&cpu->work_mutex);
-
-    qemu_cpu_kick(cpu);
+    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
 }
 
 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
@@ -984,34 +932,6 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
 {
 }
 
-static void flush_queued_work(CPUState *cpu)
-{
-    struct qemu_work_item *wi;
-
-    if (cpu->queued_work_first == NULL) {
-        return;
-    }
-
-    qemu_mutex_lock(&cpu->work_mutex);
-    while (cpu->queued_work_first != NULL) {
-        wi = cpu->queued_work_first;
-        cpu->queued_work_first = wi->next;
-        if (!cpu->queued_work_first) {
-            cpu->queued_work_last = NULL;
-        }
-        qemu_mutex_unlock(&cpu->work_mutex);
-        wi->func(wi->data);
-        qemu_mutex_lock(&cpu->work_mutex);
-        if (wi->free) {
-            g_free(wi);
-        } else {
-            atomic_mb_set(&wi->done, true);
-        }
-    }
-    qemu_mutex_unlock(&cpu->work_mutex);
-    qemu_cond_broadcast(&qemu_work_cond);
-}
-
 static void qemu_wait_io_event_common(CPUState *cpu)
 {
     if (cpu->stop) {
@@ -1019,7 +939,7 @@ static void qemu_wait_io_event_common(CPUState *cpu)
         cpu->stopped = true;
         qemu_cond_broadcast(&qemu_pause_cond);
     }
-    flush_queued_work(cpu);
+    process_queued_cpu_work(cpu);
     cpu->thread_kicked = false;
 }
 
@@ -1135,12 +1055,102 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
 #endif
 }
 
-static void tcg_exec_all(void);
+static int64_t tcg_get_icount_limit(void)
+{
+    int64_t deadline;
+
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+        /* Maintain prior (possibly buggy) behaviour where if no deadline
+         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+         * nanoseconds.
+         */
+        if ((deadline < 0) || (deadline > INT32_MAX)) {
+            deadline = INT32_MAX;
+        }
+
+        return qemu_icount_round(deadline);
+    } else {
+        return replay_get_instructions();
+    }
+}
+
+static void handle_icount_deadline(void)
+{
+    if (use_icount) {
+        int64_t deadline =
+            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+        if (deadline == 0) {
+            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+        }
+    }
+}
+
+static int tcg_cpu_exec(CPUState *cpu)
+{
+    int ret;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+    ti = profile_getclock();
+#endif
+    if (use_icount) {
+        int64_t count;
+        int decr;
+        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+                                    + cpu->icount_extra);
+        cpu->icount_decr.u16.low = 0;
+        cpu->icount_extra = 0;
+        count = tcg_get_icount_limit();
+        timers_state.qemu_icount += count;
+        decr = (count > 0xffff) ? 0xffff : count;
+        count -= decr;
+        cpu->icount_decr.u16.low = decr;
+        cpu->icount_extra = count;
+    }
+    cpu_exec_start(cpu);
+    ret = cpu_exec(cpu);
+    cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+    tcg_time += profile_getclock() - ti;
+#endif
+    if (use_icount) {
+        /* Fold pending instructions back into the
+           instruction counter, and clear the interrupt flag.  */
+        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+                        + cpu->icount_extra);
+        cpu->icount_decr.u32 = 0;
+        cpu->icount_extra = 0;
+        replay_account_executed_instructions();
+    }
+    return ret;
+}
+
+/* Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        if (cpu->unplug && !cpu_can_run(cpu)) {
+            qemu_tcg_destroy_vcpu(cpu);
+            cpu->created = false;
+            qemu_cond_signal(&qemu_cpu_cond);
+            break;
+        }
+    }
+}
 
 static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
     CPUState *cpu = arg;
-    CPUState *remove_cpu = NULL;
 
     rcu_register_thread();
 
@@ -1167,29 +1177,44 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
     /* process any pending work */
     atomic_mb_set(&exit_request, 1);
 
-    while (1) {
-        tcg_exec_all();
+    cpu = first_cpu;
 
-        if (use_icount) {
-            int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+    while (1) {
+        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
+        qemu_account_warp_timer();
 
-            if (deadline == 0) {
-                qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-            }
+        if (!cpu) {
+            cpu = first_cpu;
         }
-        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
-        CPU_FOREACH(cpu) {
-            if (cpu->unplug && !cpu_can_run(cpu)) {
-                remove_cpu = cpu;
+
+        for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
+
+            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+            if (cpu_can_run(cpu)) {
+                int r;
+                r = tcg_cpu_exec(cpu);
+                if (r == EXCP_DEBUG) {
+                    cpu_handle_guest_debug(cpu);
+                    break;
+                }
+            } else if (cpu->stop || cpu->stopped) {
+                if (cpu->unplug) {
+                    cpu = CPU_NEXT(cpu);
+                }
                 break;
             }
-        }
-        if (remove_cpu) {
-            qemu_tcg_destroy_vcpu(remove_cpu);
-            cpu->created = false;
-            qemu_cond_signal(&qemu_cpu_cond);
-            remove_cpu = NULL;
-        }
+
+        } /* for cpu.. */
+
+        /* Pairs with smp_wmb in qemu_cpu_kick.  */
+        atomic_mb_set(&exit_request, 0);
+
+        handle_icount_deadline();
+
+        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+        deal_with_unplugged_cpus();
     }
 
     return NULL;
@@ -1287,17 +1312,17 @@ void qemu_mutex_unlock_iothread(void)
     qemu_mutex_unlock(&qemu_global_mutex);
 }
 
-static int all_vcpus_paused(void)
+static bool all_vcpus_paused(void)
 {
     CPUState *cpu;
 
     CPU_FOREACH(cpu) {
         if (!cpu->stopped) {
-            return 0;
+            return false;
         }
     }
 
-    return 1;
+    return true;
 }
 
 void pause_all_vcpus(void)
@@ -1488,106 +1513,10 @@ int vm_stop_force_state(RunState state)
         bdrv_drain_all();
         /* Make sure to return an error if the flush in a previous vm_stop()
          * failed. */
-        return blk_flush_all();
+        return bdrv_flush_all();
     }
 }
 
-static int64_t tcg_get_icount_limit(void)
-{
-    int64_t deadline;
-
-    if (replay_mode != REPLAY_MODE_PLAY) {
-        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
-        /* Maintain prior (possibly buggy) behaviour where if no deadline
-         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
-         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
-         * nanoseconds.
-         */
-        if ((deadline < 0) || (deadline > INT32_MAX)) {
-            deadline = INT32_MAX;
-        }
-
-        return qemu_icount_round(deadline);
-    } else {
-        return replay_get_instructions();
-    }
-}
-
-static int tcg_cpu_exec(CPUState *cpu)
-{
-    int ret;
-#ifdef CONFIG_PROFILER
-    int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
-    ti = profile_getclock();
-#endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                                    + cpu->icount_extra);
-        cpu->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        count = tcg_get_icount_limit();
-        timers_state.qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        cpu->icount_decr.u16.low = decr;
-        cpu->icount_extra = count;
-    }
-    ret = cpu_exec(cpu);
-#ifdef CONFIG_PROFILER
-    tcg_time += profile_getclock() - ti;
-#endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                        + cpu->icount_extra);
-        cpu->icount_decr.u32 = 0;
-        cpu->icount_extra = 0;
-        replay_account_executed_instructions();
-    }
-    return ret;
-}
-
-static void tcg_exec_all(void)
-{
-    int r;
-
-    /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-    qemu_account_warp_timer();
-
-    if (next_cpu == NULL) {
-        next_cpu = first_cpu;
-    }
-    for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
-        CPUState *cpu = next_cpu;
-
-        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
-                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
-        if (cpu_can_run(cpu)) {
-            r = tcg_cpu_exec(cpu);
-            if (r == EXCP_DEBUG) {
-                cpu_handle_guest_debug(cpu);
-                break;
-            }
-        } else if (cpu->stop || cpu->stopped) {
-            if (cpu->unplug) {
-                next_cpu = CPU_NEXT(cpu);
-            }
-            break;
-        }
-    }
-
-    /* Pairs with smp_wmb in qemu_cpu_kick.  */
-    atomic_mb_set(&exit_request, 0);
-}
-
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
     /* XXX: implement xxx_cpu_list for targets that still miss it */
diff --git a/cputlb.c b/cputlb.c
index d068ee597e..813279f3bc 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -23,15 +23,14 @@
 #include "exec/memory.h"
 #include "exec/address-spaces.h"
 #include "exec/cpu_ldst.h"
-
 #include "exec/cputlb.h"
-
 #include "exec/memory-internal.h"
 #include "exec/ram_addr.h"
-#include "exec/exec-all.h"
 #include "tcg/tcg.h"
 #include "qemu/error-report.h"
 #include "exec/log.h"
+#include "exec/helper-proto.h"
+#include "qemu/atomic.h"
 
 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
 /* #define DEBUG_TLB */
@@ -498,6 +497,43 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
     return qemu_ram_addr_from_host_nofail(p);
 }
 
+static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+                         target_ulong addr, uintptr_t retaddr, int size)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    hwaddr physaddr = iotlbentry->addr;
+    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+    uint64_t val;
+
+    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    cpu->mem_io_pc = retaddr;
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
+        cpu_io_recompile(cpu, retaddr);
+    }
+
+    cpu->mem_io_vaddr = addr;
+    memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
+    return val;
+}
+
+static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+                      uint64_t val, target_ulong addr,
+                      uintptr_t retaddr, int size)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    hwaddr physaddr = iotlbentry->addr;
+    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
+
+    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
+        cpu_io_recompile(cpu, retaddr);
+    }
+
+    cpu->mem_io_vaddr = addr;
+    cpu->mem_io_pc = retaddr;
+    memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
+}
+
 /* Return true if ADDR is present in the victim tlb, and has been copied
    back to the main tlb.  */
 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
@@ -527,36 +563,178 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
                  (ADDR) & TARGET_PAGE_MASK)
 
+/* Probe for whether the specified guest write access is permitted.
+ * If it is not permitted then an exception will be taken in the same
+ * way as if this were a real write access (and we will not return).
+ * Otherwise the function will return, and there will be a valid
+ * entry in the TLB for this access.
+ */
+void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
+                 uintptr_t retaddr)
+{
+    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        /* TLB entry is for a different page */
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+        }
+    }
+}
+
+/* Probe for a read-modify-write atomic operation.  Do not allow unaligned
+ * operations, or io operations to proceed.  Return the host address.  */
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+                               TCGMemOpIdx oi, uintptr_t retaddr)
+{
+    size_t mmu_idx = get_mmuidx(oi);
+    size_t index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    CPUTLBEntry *tlbe = &env->tlb_table[mmu_idx][index];
+    target_ulong tlb_addr = tlbe->addr_write;
+    TCGMemOp mop = get_memop(oi);
+    int a_bits = get_alignment_bits(mop);
+    int s_bits = mop & MO_SIZE;
+
+    /* Adjust the given return address.  */
+    retaddr -= GETPC_ADJ;
+
+    /* Enforce guest required alignment.  */
+    if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
+        /* ??? Maybe indicate atomic op to cpu_unaligned_access */
+        cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+                             mmu_idx, retaddr);
+    }
+
+    /* Enforce qemu required alignment.  */
+    if (unlikely(addr & ((1 << s_bits) - 1))) {
+        /* We get here if guest alignment was not requested,
+           or was not enforced by cpu_unaligned_access above.
+           We might widen the access and emulate, but for now
+           mark an exception and exit the cpu loop.  */
+        goto stop_the_world;
+    }
+
+    /* Check TLB entry and enforce page permissions.  */
+    if ((addr & TARGET_PAGE_MASK)
+        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+        if (!VICTIM_TLB_HIT(addr_write, addr)) {
+            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
+        }
+        tlb_addr = tlbe->addr_write;
+    }
+
+    /* Notice an IO access, or a notdirty page.  */
+    if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+        /* There's really nothing that can be done to
+           support this apart from stop-the-world.  */
+        goto stop_the_world;
+    }
+
+    /* Let the guest notice RMW on a write-only page.  */
+    if (unlikely(tlbe->addr_read != tlb_addr)) {
+        tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_LOAD, mmu_idx, retaddr);
+        /* Since we don't support reads and writes to different addresses,
+           and we do have the proper page loaded for write, this shouldn't
+           ever return.  But just in case, handle via stop-the-world.  */
+        goto stop_the_world;
+    }
+
+    return (void *)((uintptr_t)addr + tlbe->addend);
+
+ stop_the_world:
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
+}
+
+#ifdef TARGET_WORDS_BIGENDIAN
+# define TGT_BE(X)  (X)
+# define TGT_LE(X)  BSWAP(X)
+#else
+# define TGT_BE(X)  BSWAP(X)
+# define TGT_LE(X)  (X)
+#endif
+
 #define MMUSUFFIX _mmu
 
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
 
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
 
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
 
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
-#undef MMUSUFFIX
 
+/* First set of helpers allows passing in of OI and RETADDR.  This makes
+   them callable from other helpers.  */
+
+#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
+#define ATOMIC_NAME(X) \
+    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, retaddr)
+
+#define DATA_SIZE 1
+#include "atomic_template.h"
+
+#define DATA_SIZE 2
+#include "atomic_template.h"
+
+#define DATA_SIZE 4
+#include "atomic_template.h"
+
+#ifdef CONFIG_ATOMIC64
+#define DATA_SIZE 8
+#include "atomic_template.h"
+#endif
+
+#ifdef CONFIG_ATOMIC128
+#define DATA_SIZE 16
+#include "atomic_template.h"
+#endif
+
+/* Second set of helpers are directly callable from TCG as helpers.  */
+
+#undef EXTRA_ARGS
+#undef ATOMIC_NAME
+#undef ATOMIC_MMU_LOOKUP
+#define EXTRA_ARGS         , TCGMemOpIdx oi
+#define ATOMIC_NAME(X)     HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, oi, GETPC())
+
+#define DATA_SIZE 1
+#include "atomic_template.h"
+
+#define DATA_SIZE 2
+#include "atomic_template.h"
+
+#define DATA_SIZE 4
+#include "atomic_template.h"
+
+#ifdef CONFIG_ATOMIC64
+#define DATA_SIZE 8
+#include "atomic_template.h"
+#endif
+
+/* Code access functions.  */
+
+#undef MMUSUFFIX
 #define MMUSUFFIX _cmmu
-#undef GETPC_ADJ
-#define GETPC_ADJ 0
-#undef GETRA
-#define GETRA() ((uintptr_t)0)
+#undef GETPC
+#define GETPC() ((uintptr_t)0)
 #define SOFTMMU_CODE_ACCESS
 
-#define SHIFT 0
+#define DATA_SIZE 1
 #include "softmmu_template.h"
 
-#define SHIFT 1
+#define DATA_SIZE 2
 #include "softmmu_template.h"
 
-#define SHIFT 2
+#define DATA_SIZE 4
 #include "softmmu_template.h"
 
-#define SHIFT 3
+#define DATA_SIZE 8
 #include "softmmu_template.h"
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index aba4455646..4530f8241c 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -29,10 +29,7 @@
 #include "crypto/pbkdf.h"
 #include "crypto/secret.h"
 #include "crypto/random.h"
-
-#ifdef CONFIG_UUID
-#include <uuid/uuid.h>
-#endif
+#include "qemu/uuid.h"
 
 #include "qemu/coroutine.h"
 
@@ -877,18 +874,12 @@ qcrypto_block_luks_open(QCryptoBlock *block,
 }
 
 
-static int
-qcrypto_block_luks_uuid_gen(uint8_t *uuidstr, Error **errp)
+static void
+qcrypto_block_luks_uuid_gen(uint8_t *uuidstr)
 {
-#ifdef CONFIG_UUID
-    uuid_t uuid;
-    uuid_generate(uuid);
-    uuid_unparse(uuid, (char *)uuidstr);
-    return 0;
-#else
-    error_setg(errp, "Unable to generate uuids on this platform");
-    return -1;
-#endif
+    QemuUUID uuid;
+    qemu_uuid_generate(&uuid);
+    qemu_uuid_unparse(&uuid, (char *)uuidstr);
 }
 
 static int
@@ -917,8 +908,12 @@ qcrypto_block_luks_create(QCryptoBlock *block,
     const char *hash_alg;
     char *cipher_mode_spec = NULL;
     QCryptoCipherAlgorithm ivcipheralg = 0;
+    uint64_t iters;
 
     memcpy(&luks_opts, &options->u.luks, sizeof(luks_opts));
+    if (!luks_opts.has_iter_time) {
+        luks_opts.iter_time = 2000;
+    }
     if (!luks_opts.has_cipher_alg) {
         luks_opts.cipher_alg = QCRYPTO_CIPHER_ALG_AES_256;
     }
@@ -961,10 +956,7 @@ qcrypto_block_luks_create(QCryptoBlock *block,
      * it out to disk
      */
     luks->header.version = QCRYPTO_BLOCK_LUKS_VERSION;
-    if (qcrypto_block_luks_uuid_gen(luks->header.uuid,
-                                    errp) < 0) {
-        goto error;
-    }
+    qcrypto_block_luks_uuid_gen(luks->header.uuid);
 
     cipher_alg = qcrypto_block_luks_cipher_alg_lookup(luks_opts.cipher_alg,
                                                       errp);
@@ -1064,26 +1056,40 @@ qcrypto_block_luks_create(QCryptoBlock *block,
     /* Determine how many iterations we need to hash the master
      * key, in order to have 1 second of compute time used
      */
-    luks->header.master_key_iterations =
-        qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
-                                   masterkey, luks->header.key_bytes,
-                                   luks->header.master_key_salt,
-                                   QCRYPTO_BLOCK_LUKS_SALT_LEN,
-                                   &local_err);
+    iters = qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
+                                       masterkey, luks->header.key_bytes,
+                                       luks->header.master_key_salt,
+                                       QCRYPTO_BLOCK_LUKS_SALT_LEN,
+                                       QCRYPTO_BLOCK_LUKS_DIGEST_LEN,
+                                       &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
     }
 
+    if (iters > (ULLONG_MAX / luks_opts.iter_time)) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu too large to scale",
+                         (unsigned long long)iters);
+        goto error;
+    }
+
+    /* iter_time was in millis, but count_iters reported for secs */
+    iters = iters * luks_opts.iter_time / 1000;
+
     /* Why /= 8 ?  That matches cryptsetup, but there's no
      * explanation why they chose /= 8... Probably so that
      * if all 8 keyslots are active we only spend 1 second
      * in total time to check all keys */
-    luks->header.master_key_iterations /= 8;
-    luks->header.master_key_iterations = MAX(
-        luks->header.master_key_iterations,
-        QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS);
-
+    iters /= 8;
+    if (iters > UINT32_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu larger than %u",
+                         (unsigned long long)iters, UINT32_MAX);
+        goto error;
+    }
+    iters = MAX(iters, QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS);
+    luks->header.master_key_iterations = iters;
 
     /* Hash the master key, saving the result in the LUKS
      * header. This hash is used when opening the encrypted
@@ -1131,22 +1137,36 @@ qcrypto_block_luks_create(QCryptoBlock *block,
     /* Again we determine how many iterations are required to
      * hash the user password while consuming 1 second of compute
      * time */
-    luks->header.key_slots[0].iterations =
-        qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
-                                   (uint8_t *)password, strlen(password),
-                                   luks->header.key_slots[0].salt,
-                                   QCRYPTO_BLOCK_LUKS_SALT_LEN,
-                                   &local_err);
+    iters = qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
+                                       (uint8_t *)password, strlen(password),
+                                       luks->header.key_slots[0].salt,
+                                       QCRYPTO_BLOCK_LUKS_SALT_LEN,
+                                       luks->header.key_bytes,
+                                       &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         goto error;
     }
-    /* Why /= 2 ?  That matches cryptsetup, but there's no
-     * explanation why they chose /= 2... */
-    luks->header.key_slots[0].iterations /= 2;
-    luks->header.key_slots[0].iterations = MAX(
-        luks->header.key_slots[0].iterations,
-        QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS);
+
+    if (iters > (ULLONG_MAX / luks_opts.iter_time)) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu too large to scale",
+                         (unsigned long long)iters);
+        goto error;
+    }
+
+    /* iter_time was in millis, but count_iters reported for secs */
+    iters = iters * luks_opts.iter_time / 1000;
+
+    if (iters > UINT32_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu larger than %u",
+                         (unsigned long long)iters, UINT32_MAX);
+        goto error;
+    }
+
+    luks->header.key_slots[0].iterations =
+        MAX(iters, QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS);
 
 
     /* Generate a key that we'll use to encrypt the master
diff --git a/crypto/block.c b/crypto/block.c
index be823eebeb..64c8420425 100644
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -59,7 +59,8 @@ QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
 
     if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
         !qcrypto_block_drivers[options->format]) {
-        error_setg(errp, "Unsupported block driver %d", options->format);
+        error_setg(errp, "Unsupported block driver %s",
+                   QCryptoBlockFormat_lookup[options->format]);
         g_free(block);
         return NULL;
     }
@@ -88,7 +89,8 @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
 
     if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
         !qcrypto_block_drivers[options->format]) {
-        error_setg(errp, "Unsupported block driver %d", options->format);
+        error_setg(errp, "Unsupported block driver %s",
+                   QCryptoBlockFormat_lookup[options->format]);
         g_free(block);
         return NULL;
     }
diff --git a/crypto/cipher-builtin.c b/crypto/cipher-builtin.c
index 88963f65c8..b4bc2b9ca6 100644
--- a/crypto/cipher-builtin.c
+++ b/crypto/cipher-builtin.c
@@ -244,7 +244,8 @@ static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
     if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
         cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
         cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
-        error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
         return -1;
     }
 
@@ -376,7 +377,8 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
     QCryptoCipherBuiltin *ctxt;
 
     if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
-        error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
         return -1;
     }
 
@@ -398,14 +400,26 @@ static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
 }
 
 
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode)
 {
     switch (alg) {
     case QCRYPTO_CIPHER_ALG_DES_RFB:
     case QCRYPTO_CIPHER_ALG_AES_128:
     case QCRYPTO_CIPHER_ALG_AES_192:
     case QCRYPTO_CIPHER_ALG_AES_256:
+        break;
+    default:
+        return false;
+    }
+
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+    case QCRYPTO_CIPHER_MODE_XTS:
         return true;
+    case QCRYPTO_CIPHER_MODE_CTR:
+        return false;
     default:
         return false;
     }
@@ -419,6 +433,17 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
 {
     QCryptoCipher *cipher;
 
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+    case QCRYPTO_CIPHER_MODE_XTS:
+        break;
+    default:
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[mode]);
+        return NULL;
+    }
+
     cipher = g_new0(QCryptoCipher, 1);
     cipher->alg = alg;
     cipher->mode = mode;
@@ -442,7 +467,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         break;
     default:
         error_setg(errp,
-                   "Unsupported cipher algorithm %d", cipher->alg);
+                   "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[cipher->alg]);
         goto error;
     }
 
diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c
index ede2f70df8..c550db9008 100644
--- a/crypto/cipher-gcrypt.c
+++ b/crypto/cipher-gcrypt.c
@@ -24,7 +24,8 @@
 #include <gcrypt.h>
 
 
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode)
 {
     switch (alg) {
     case QCRYPTO_CIPHER_ALG_DES_RFB:
@@ -37,6 +38,16 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
     case QCRYPTO_CIPHER_ALG_SERPENT_256:
     case QCRYPTO_CIPHER_ALG_TWOFISH_128:
     case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        break;
+    default:
+        return false;
+    }
+
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+    case QCRYPTO_CIPHER_MODE_XTS:
+    case QCRYPTO_CIPHER_MODE_CTR:
         return true;
     default:
         return false;
@@ -48,6 +59,7 @@ struct QCryptoCipherGcrypt {
     gcry_cipher_hd_t handle;
     gcry_cipher_hd_t tweakhandle;
     size_t blocksize;
+    /* Initialization vector or Counter */
     uint8_t *iv;
 };
 
@@ -69,8 +81,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
     case QCRYPTO_CIPHER_MODE_CBC:
         gcrymode = GCRY_CIPHER_MODE_CBC;
         break;
+    case QCRYPTO_CIPHER_MODE_CTR:
+        gcrymode = GCRY_CIPHER_MODE_CTR;
+        break;
     default:
-        error_setg(errp, "Unsupported cipher mode %d", mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[mode]);
         return NULL;
     }
 
@@ -120,7 +136,8 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         break;
 
     default:
-        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        error_setg(errp, "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[alg]);
         return NULL;
     }
 
@@ -192,6 +209,12 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
     }
 
     if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+        if (ctx->blocksize != XTS_BLOCK_SIZE) {
+            error_setg(errp,
+                       "Cipher block size %zu must equal XTS block size %d",
+                       ctx->blocksize, XTS_BLOCK_SIZE);
+            goto error;
+        }
         ctx->iv = g_new0(uint8_t, ctx->blocksize);
     }
 
@@ -331,12 +354,21 @@ int qcrypto_cipher_setiv(QCryptoCipher *cipher,
     if (ctx->iv) {
         memcpy(ctx->iv, iv, niv);
     } else {
-        gcry_cipher_reset(ctx->handle);
-        err = gcry_cipher_setiv(ctx->handle, iv, niv);
-        if (err != 0) {
-            error_setg(errp, "Cannot set IV: %s",
-                   gcry_strerror(err));
-            return -1;
+        if (cipher->mode == QCRYPTO_CIPHER_MODE_CTR) {
+            err = gcry_cipher_setctr(ctx->handle, iv, niv);
+            if (err != 0) {
+                error_setg(errp, "Cannot set Counter: %s",
+                       gcry_strerror(err));
+                return -1;
+            }
+        } else {
+            gcry_cipher_reset(ctx->handle);
+            err = gcry_cipher_setiv(ctx->handle, iv, niv);
+            if (err != 0) {
+                error_setg(errp, "Cannot set IV: %s",
+                       gcry_strerror(err));
+                return -1;
+            }
         }
     }
 
diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c
index 70909fb7fe..cd094cd6a5 100644
--- a/crypto/cipher-nettle.c
+++ b/crypto/cipher-nettle.c
@@ -28,6 +28,7 @@
 #include <nettle/cast128.h>
 #include <nettle/serpent.h>
 #include <nettle/twofish.h>
+#include <nettle/ctr.h>
 
 typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
                                                size_t length,
@@ -186,12 +187,13 @@ struct QCryptoCipherNettle {
     QCryptoCipherNettleFuncNative alg_decrypt_native;
     QCryptoCipherNettleFuncWrapper alg_encrypt_wrapper;
     QCryptoCipherNettleFuncWrapper alg_decrypt_wrapper;
-
+    /* Initialization vector or Counter */
     uint8_t *iv;
     size_t blocksize;
 };
 
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode)
 {
     switch (alg) {
     case QCRYPTO_CIPHER_ALG_DES_RFB:
@@ -205,6 +207,16 @@ bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
     case QCRYPTO_CIPHER_ALG_TWOFISH_128:
     case QCRYPTO_CIPHER_ALG_TWOFISH_192:
     case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+        break;
+    default:
+        return false;
+    }
+
+    switch (mode) {
+    case QCRYPTO_CIPHER_MODE_ECB:
+    case QCRYPTO_CIPHER_MODE_CBC:
+    case QCRYPTO_CIPHER_MODE_XTS:
+    case QCRYPTO_CIPHER_MODE_CTR:
         return true;
     default:
         return false;
@@ -225,9 +237,11 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
     case QCRYPTO_CIPHER_MODE_ECB:
     case QCRYPTO_CIPHER_MODE_CBC:
     case QCRYPTO_CIPHER_MODE_XTS:
+    case QCRYPTO_CIPHER_MODE_CTR:
         break;
     default:
-        error_setg(errp, "Unsupported cipher mode %d", mode);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[mode]);
         return NULL;
     }
 
@@ -357,7 +371,15 @@ QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
         break;
 
     default:
-        error_setg(errp, "Unsupported cipher algorithm %d", alg);
+        error_setg(errp, "Unsupported cipher algorithm %s",
+                   QCryptoCipherAlgorithm_lookup[alg]);
+        goto error;
+    }
+
+    if (mode == QCRYPTO_CIPHER_MODE_XTS &&
+        ctx->blocksize != XTS_BLOCK_SIZE) {
+        error_setg(errp, "Cipher block size %zu must equal XTS block size %d",
+                   ctx->blocksize, XTS_BLOCK_SIZE);
         goto error;
     }
 
@@ -421,9 +443,15 @@ int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
                     ctx->iv, len, out, in);
         break;
 
+    case QCRYPTO_CIPHER_MODE_CTR:
+        ctr_crypt(ctx->ctx, ctx->alg_encrypt_native,
+                    ctx->blocksize, ctx->iv,
+                    len, out, in);
+        break;
+
     default:
-        error_setg(errp, "Unsupported cipher algorithm %d",
-                   cipher->alg);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
         return -1;
     }
     return 0;
@@ -456,19 +484,19 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
         break;
 
     case QCRYPTO_CIPHER_MODE_XTS:
-        if (ctx->blocksize != XTS_BLOCK_SIZE) {
-            error_setg(errp, "Block size must be %d not %zu",
-                       XTS_BLOCK_SIZE, ctx->blocksize);
-            return -1;
-        }
         xts_decrypt(ctx->ctx, ctx->ctx_tweak,
                     ctx->alg_encrypt_wrapper, ctx->alg_decrypt_wrapper,
                     ctx->iv, len, out, in);
         break;
+    case QCRYPTO_CIPHER_MODE_CTR:
+        ctr_crypt(ctx->ctx, ctx->alg_encrypt_native,
+                    ctx->blocksize, ctx->iv,
+                    len, out, in);
+        break;
 
     default:
-        error_setg(errp, "Unsupported cipher algorithm %d",
-                   cipher->alg);
+        error_setg(errp, "Unsupported cipher mode %s",
+                   QCryptoCipherMode_lookup[cipher->mode]);
         return -1;
     }
     return 0;
diff --git a/crypto/cipher.c b/crypto/cipher.c
index cafb454363..a9bca41302 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -55,6 +55,7 @@ static bool mode_need_iv[QCRYPTO_CIPHER_MODE__MAX] = {
     [QCRYPTO_CIPHER_MODE_ECB] = false,
     [QCRYPTO_CIPHER_MODE_CBC] = true,
     [QCRYPTO_CIPHER_MODE_XTS] = true,
+    [QCRYPTO_CIPHER_MODE_CTR] = true,
 };
 
 
diff --git a/crypto/init.c b/crypto/init.c
index 1e564d9492..f65207e57d 100644
--- a/crypto/init.c
+++ b/crypto/init.c
@@ -59,8 +59,7 @@
 
 #if (defined(CONFIG_GCRYPT) &&                  \
      (!defined(CONFIG_GNUTLS) ||                \
-      !defined(GNUTLS_VERSION_NUMBER) ||       \
-      (GNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
+     (LIBGNUTLS_VERSION_NUMBER < 0x020c00)) &&    \
      (!defined(GCRYPT_VERSION_NUMBER) ||        \
       (GCRYPT_VERSION_NUMBER < 0x010600)))
 #define QCRYPTO_INIT_GCRYPT_THREADS
@@ -120,6 +119,10 @@ static struct gcry_thread_cbs qcrypto_gcrypt_thread_impl = {
 
 int qcrypto_init(Error **errp)
 {
+#ifdef QCRYPTO_INIT_GCRYPT_THREADS
+    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
+#endif /* QCRYPTO_INIT_GCRYPT_THREADS */
+
 #ifdef CONFIG_GNUTLS
     int ret;
     ret = gnutls_global_init();
@@ -140,9 +143,6 @@ int qcrypto_init(Error **errp)
         error_setg(errp, "Unable to initialize gcrypt");
         return -1;
     }
-#ifdef QCRYPTO_INIT_GCRYPT_THREADS
-    gcry_control(GCRYCTL_SET_THREAD_CBS, &qcrypto_gcrypt_thread_impl);
-#endif /* QCRYPTO_INIT_GCRYPT_THREADS */
     gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
 #endif
 
diff --git a/crypto/pbkdf-gcrypt.c b/crypto/pbkdf-gcrypt.c
index 34af3a97e9..40289858bf 100644
--- a/crypto/pbkdf-gcrypt.c
+++ b/crypto/pbkdf-gcrypt.c
@@ -28,7 +28,11 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
     switch (hash) {
     case QCRYPTO_HASH_ALG_MD5:
     case QCRYPTO_HASH_ALG_SHA1:
+    case QCRYPTO_HASH_ALG_SHA224:
     case QCRYPTO_HASH_ALG_SHA256:
+    case QCRYPTO_HASH_ALG_SHA384:
+    case QCRYPTO_HASH_ALG_SHA512:
+    case QCRYPTO_HASH_ALG_RIPEMD160:
         return true;
     default:
         return false;
@@ -38,20 +42,33 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
 int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
                    const uint8_t *key, size_t nkey,
                    const uint8_t *salt, size_t nsalt,
-                   unsigned int iterations,
+                   uint64_t iterations,
                    uint8_t *out, size_t nout,
                    Error **errp)
 {
     static const int hash_map[QCRYPTO_HASH_ALG__MAX] = {
         [QCRYPTO_HASH_ALG_MD5] = GCRY_MD_MD5,
         [QCRYPTO_HASH_ALG_SHA1] = GCRY_MD_SHA1,
+        [QCRYPTO_HASH_ALG_SHA224] = GCRY_MD_SHA224,
         [QCRYPTO_HASH_ALG_SHA256] = GCRY_MD_SHA256,
+        [QCRYPTO_HASH_ALG_SHA384] = GCRY_MD_SHA384,
+        [QCRYPTO_HASH_ALG_SHA512] = GCRY_MD_SHA512,
+        [QCRYPTO_HASH_ALG_RIPEMD160] = GCRY_MD_RMD160,
     };
     int ret;
 
+    if (iterations > ULONG_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu must be less than %lu",
+                         (long long unsigned)iterations, ULONG_MAX);
+        return -1;
+    }
+
     if (hash >= G_N_ELEMENTS(hash_map) ||
         hash_map[hash] == GCRY_MD_NONE) {
-        error_setg(errp, "Unexpected hash algorithm %d", hash);
+        error_setg_errno(errp, ENOSYS,
+                         "PBKDF does not support hash algorithm %s",
+                         QCryptoHashAlgorithm_lookup[hash]);
         return -1;
     }
 
diff --git a/crypto/pbkdf-nettle.c b/crypto/pbkdf-nettle.c
index d681a606f9..6fb2671656 100644
--- a/crypto/pbkdf-nettle.c
+++ b/crypto/pbkdf-nettle.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include <nettle/pbkdf2.h>
+#include <nettle/hmac.h>
 #include "qapi/error.h"
 #include "crypto/pbkdf.h"
 
@@ -28,7 +29,11 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
 {
     switch (hash) {
     case QCRYPTO_HASH_ALG_SHA1:
+    case QCRYPTO_HASH_ALG_SHA224:
     case QCRYPTO_HASH_ALG_SHA256:
+    case QCRYPTO_HASH_ALG_SHA384:
+    case QCRYPTO_HASH_ALG_SHA512:
+    case QCRYPTO_HASH_ALG_RIPEMD160:
         return true;
     default:
         return false;
@@ -38,28 +43,74 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash)
 int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
                    const uint8_t *key, size_t nkey,
                    const uint8_t *salt, size_t nsalt,
-                   unsigned int iterations,
+                   uint64_t iterations,
                    uint8_t *out, size_t nout,
                    Error **errp)
 {
+    union {
+        struct hmac_md5_ctx md5;
+        struct hmac_sha1_ctx sha1;
+        struct hmac_sha224_ctx sha224;
+        struct hmac_sha256_ctx sha256;
+        struct hmac_sha384_ctx sha384;
+        struct hmac_sha512_ctx sha512;
+        struct hmac_ripemd160_ctx ripemd160;
+    } ctx;
+
+    if (iterations > UINT_MAX) {
+        error_setg_errno(errp, ERANGE,
+                         "PBKDF iterations %llu must be less than %u",
+                         (long long unsigned)iterations, UINT_MAX);
+        return -1;
+    }
+
     switch (hash) {
+    case QCRYPTO_HASH_ALG_MD5:
+        hmac_md5_set_key(&ctx.md5, nkey, key);
+        PBKDF2(&ctx.md5, hmac_md5_update, hmac_md5_digest,
+               MD5_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
+        break;
+
     case QCRYPTO_HASH_ALG_SHA1:
-        pbkdf2_hmac_sha1(nkey, key,
-                         iterations,
-                         nsalt, salt,
-                         nout, out);
+        hmac_sha1_set_key(&ctx.sha1, nkey, key);
+        PBKDF2(&ctx.sha1, hmac_sha1_update, hmac_sha1_digest,
+               SHA1_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA224:
+        hmac_sha224_set_key(&ctx.sha224, nkey, key);
+        PBKDF2(&ctx.sha224, hmac_sha224_update, hmac_sha224_digest,
+               SHA224_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
         break;
 
     case QCRYPTO_HASH_ALG_SHA256:
-        pbkdf2_hmac_sha256(nkey, key,
-                           iterations,
-                           nsalt, salt,
-                           nout, out);
+        hmac_sha256_set_key(&ctx.sha256, nkey, key);
+        PBKDF2(&ctx.sha256, hmac_sha256_update, hmac_sha256_digest,
+               SHA256_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA384:
+        hmac_sha384_set_key(&ctx.sha384, nkey, key);
+        PBKDF2(&ctx.sha384, hmac_sha384_update, hmac_sha384_digest,
+               SHA384_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
+        break;
+
+    case QCRYPTO_HASH_ALG_SHA512:
+        hmac_sha512_set_key(&ctx.sha512, nkey, key);
+        PBKDF2(&ctx.sha512, hmac_sha512_update, hmac_sha512_digest,
+               SHA512_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
+        break;
+
+    case QCRYPTO_HASH_ALG_RIPEMD160:
+        hmac_ripemd160_set_key(&ctx.ripemd160, nkey, key);
+        PBKDF2(&ctx.ripemd160, hmac_ripemd160_update, hmac_ripemd160_digest,
+               RIPEMD160_DIGEST_SIZE, iterations, nsalt, salt, nout, out);
         break;
 
     default:
         error_setg_errno(errp, ENOSYS,
-                         "PBKDF does not support hash algorithm %d", hash);
+                         "PBKDF does not support hash algorithm %s",
+                         QCryptoHashAlgorithm_lookup[hash]);
         return -1;
     }
     return 0;
diff --git a/crypto/pbkdf-stub.c b/crypto/pbkdf-stub.c
index 266a5051b7..a15044da42 100644
--- a/crypto/pbkdf-stub.c
+++ b/crypto/pbkdf-stub.c
@@ -32,7 +32,7 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash G_GNUC_UNUSED,
                    size_t nkey G_GNUC_UNUSED,
                    const uint8_t *salt G_GNUC_UNUSED,
                    size_t nsalt G_GNUC_UNUSED,
-                   unsigned int iterations G_GNUC_UNUSED,
+                   uint64_t iterations G_GNUC_UNUSED,
                    uint8_t *out G_GNUC_UNUSED,
                    size_t nout G_GNUC_UNUSED,
                    Error **errp)
diff --git a/crypto/pbkdf.c b/crypto/pbkdf.c
index 695cc35df1..f22e71d183 100644
--- a/crypto/pbkdf.c
+++ b/crypto/pbkdf.c
@@ -62,29 +62,33 @@ static int qcrypto_pbkdf2_get_thread_cpu(unsigned long long *val_ms,
 #endif
 }
 
-int qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
-                               const uint8_t *key, size_t nkey,
-                               const uint8_t *salt, size_t nsalt,
-                               Error **errp)
+uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
+                                    const uint8_t *key, size_t nkey,
+                                    const uint8_t *salt, size_t nsalt,
+                                    size_t nout,
+                                    Error **errp)
 {
-    uint8_t out[32];
-    long long int iterations = (1 << 15);
+    uint64_t ret = -1;
+    uint8_t *out;
+    uint64_t iterations = (1 << 15);
     unsigned long long delta_ms, start_ms, end_ms;
 
+    out = g_new(uint8_t, nout);
+
     while (1) {
         if (qcrypto_pbkdf2_get_thread_cpu(&start_ms, errp) < 0) {
-            return -1;
+            goto cleanup;
         }
         if (qcrypto_pbkdf2(hash,
                            key, nkey,
                            salt, nsalt,
                            iterations,
-                           out, sizeof(out),
+                           out, nout,
                            errp) < 0) {
-            return -1;
+            goto cleanup;
         }
         if (qcrypto_pbkdf2_get_thread_cpu(&end_ms, errp) < 0) {
-            return -1;
+            goto cleanup;
         }
 
         delta_ms = end_ms - start_ms;
@@ -100,11 +104,10 @@ int qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
 
     iterations = iterations * 1000 / delta_ms;
 
-    if (iterations > INT32_MAX) {
-        error_setg(errp, "Iterations %lld too large for a 32-bit int",
-                   iterations);
-        return -1;
-    }
+    ret = iterations;
 
-    return iterations;
+ cleanup:
+    memset(out, 0, nout);
+    g_free(out);
+    return ret;
 }
diff --git a/crypto/tlscredsx509.c b/crypto/tlscredsx509.c
index 520d34d77e..50eb54f6bb 100644
--- a/crypto/tlscredsx509.c
+++ b/crypto/tlscredsx509.c
@@ -615,7 +615,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
     }
 
     if (cert != NULL && key != NULL) {
-#if GNUTLS_VERSION_NUMBER >= 0x030111
+#if LIBGNUTLS_VERSION_NUMBER >= 0x030111
         char *password = NULL;
         if (creds->passwordid) {
             password = qcrypto_secret_lookup_as_utf8(creds->passwordid,
@@ -630,7 +630,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
                                                     password,
                                                     0);
         g_free(password);
-#else /* GNUTLS_VERSION_NUMBER < 0x030111 */
+#else /* LIBGNUTLS_VERSION_NUMBER < 0x030111 */
         if (creds->passwordid) {
             error_setg(errp, "PKCS8 decryption requires GNUTLS >= 3.1.11");
             goto cleanup;
@@ -638,7 +638,7 @@ qcrypto_tls_creds_x509_load(QCryptoTLSCredsX509 *creds,
         ret = gnutls_certificate_set_x509_key_file(creds->data,
                                                    cert, key,
                                                    GNUTLS_X509_FMT_PEM);
-#endif /* GNUTLS_VERSION_NUMBER < 0x030111 */
+#endif
         if (ret < 0) {
             error_setg(errp, "Cannot load certificate '%s' & key '%s': %s",
                        cert, key, gnutls_strerror(ret));
diff --git a/crypto/tlssession.c b/crypto/tlssession.c
index 2de42c61cb..96a02deb69 100644
--- a/crypto/tlssession.c
+++ b/crypto/tlssession.c
@@ -351,16 +351,22 @@ qcrypto_tls_session_check_credentials(QCryptoTLSSession *session,
 {
     if (object_dynamic_cast(OBJECT(session->creds),
                             TYPE_QCRYPTO_TLS_CREDS_ANON)) {
+        trace_qcrypto_tls_session_check_creds(session, "nop");
         return 0;
     } else if (object_dynamic_cast(OBJECT(session->creds),
                             TYPE_QCRYPTO_TLS_CREDS_X509)) {
         if (session->creds->verifyPeer) {
-            return qcrypto_tls_session_check_certificate(session,
-                                                         errp);
+            int ret = qcrypto_tls_session_check_certificate(session,
+                                                            errp);
+            trace_qcrypto_tls_session_check_creds(session,
+                                                  ret == 0 ? "pass" : "fail");
+            return ret;
         } else {
+            trace_qcrypto_tls_session_check_creds(session, "skip");
             return 0;
         }
     } else {
+        trace_qcrypto_tls_session_check_creds(session, "error");
         error_setg(errp, "Unexpected credential type %s",
                    object_get_typename(OBJECT(session->creds)));
         return -1;
diff --git a/crypto/trace-events b/crypto/trace-events
index 8181843723..dc6ddd30d6 100644
--- a/crypto/trace-events
+++ b/crypto/trace-events
@@ -17,3 +17,4 @@ qcrypto_tls_creds_x509_load_cert_list(void *creds, const char *file) "TLS creds
 
 # crypto/tlssession.c
 qcrypto_tls_session_new(void *session, void *creds, const char *hostname, const char *aclname, int endpoint) "TLS session new session=%p creds=%p hostname=%s aclname=%s endpoint=%d"
+qcrypto_tls_session_check_creds(void *session, const char *status) "TLS session check creds session=%p status=%s"
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 7a19863b18..6de3e16a3e 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -3,7 +3,6 @@
 include pci.mak
 include usb.mak
 CONFIG_VGA=y
-CONFIG_ISA_MMIO=y
 CONFIG_NAND=y
 CONFIG_ECC=y
 CONFIG_SERIAL=y
@@ -87,6 +86,8 @@ CONFIG_ZYNQ=y
 CONFIG_STM32F2XX_TIMER=y
 CONFIG_STM32F2XX_USART=y
 CONFIG_STM32F2XX_SYSCFG=y
+CONFIG_STM32F2XX_ADC=y
+CONFIG_STM32F2XX_SPI=y
 CONFIG_STM32F205_SOC=y
 
 CONFIG_VERSATILE_PCI=y
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index b177e52104..0b513602c8 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -30,14 +30,12 @@ CONFIG_I8257=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
-CONFIG_PIIX_PCI=y
 CONFIG_HPET=y
 CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
-CONFIG_PAM=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
 CONFIG_XEN_I386=$(CONFIG_XEN)
diff --git a/default-configs/mips-softmmu-common.mak b/default-configs/mips-softmmu-common.mak
index 0394514b93..f0676f52ac 100644
--- a/default-configs/mips-softmmu-common.mak
+++ b/default-configs/mips-softmmu-common.mak
@@ -17,6 +17,7 @@ CONFIG_FDC=y
 CONFIG_ACPI=y
 CONFIG_ACPI_X86=y
 CONFIG_ACPI_MEMORY_HOTPLUG=y
+CONFIG_ACPI_NVDIMM=y
 CONFIG_ACPI_CPU_HOTPLUG=y
 CONFIG_APM=y
 CONFIG_I8257=y
diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 4befde3c7c..d4d0f9b192 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -3,7 +3,6 @@
 include pci.mak
 include sound.mak
 include usb.mak
-CONFIG_ISA_MMIO=y
 CONFIG_ESCC=y
 CONFIG_M48T59=y
 CONFIG_SERIAL=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index c4be59f638..67a9bcaa67 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -4,7 +4,6 @@ include pci.mak
 include sound.mak
 include usb.mak
 CONFIG_VIRTIO_VGA=y
-CONFIG_ISA_MMIO=y
 CONFIG_ESCC=y
 CONFIG_M48T59=y
 CONFIG_SERIAL=y
@@ -40,6 +39,7 @@ CONFIG_I8259=y
 CONFIG_XILINX=y
 CONFIG_XILINX_ETHLITE=y
 CONFIG_PSERIES=y
+CONFIG_POWERNV=y
 CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
diff --git a/default-configs/sparc64-softmmu.mak b/default-configs/sparc64-softmmu.mak
index 123bb993ef..c0cdd644c8 100644
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -2,7 +2,6 @@
 
 include pci.mak
 include usb.mak
-CONFIG_ISA_MMIO=y
 CONFIG_M48T59=y
 CONFIG_PTIMER=y
 CONFIG_SERIAL=y
diff --git a/default-configs/unicore32-linux-user.mak b/default-configs/unicore32-linux-user.mak
deleted file mode 100644
index 6aafd21494..0000000000
--- a/default-configs/unicore32-linux-user.mak
+++ /dev/null
@@ -1 +0,0 @@
-# Default configuration for unicore32-linux-user
diff --git a/default-configs/x86_64-softmmu.mak b/default-configs/x86_64-softmmu.mak
index 6e3b312c5f..7f89503f95 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -30,14 +30,12 @@ CONFIG_I8257=y
 CONFIG_IDE_ISA=y
 CONFIG_IDE_PIIX=y
 CONFIG_NE2000_ISA=y
-CONFIG_PIIX_PCI=y
 CONFIG_HPET=y
 CONFIG_APPLESMC=y
 CONFIG_I8259=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_TPM_TIS=$(CONFIG_TPM)
 CONFIG_MC146818RTC=y
-CONFIG_PAM=y
 CONFIG_PCI_PIIX=y
 CONFIG_WDT_IB700=y
 CONFIG_XEN_I386=$(CONFIG_XEN)
diff --git a/disas.c b/disas.c
index 05a7a1260a..67f116a19b 100644
--- a/disas.c
+++ b/disas.c
@@ -310,8 +310,6 @@ void disas(FILE *out, void *code, unsigned long size)
     print_insn = print_insn_m68k;
 #elif defined(__s390__)
     print_insn = print_insn_s390;
-#elif defined(__hppa__)
-    print_insn = print_insn_hppa;
 #elif defined(__ia64__)
     print_insn = print_insn_ia64;
 #endif
diff --git a/disas/Makefile.objs b/disas/Makefile.objs
index abeba84661..09bc992883 100644
--- a/disas/Makefile.objs
+++ b/disas/Makefile.objs
@@ -9,7 +9,6 @@ libvixldir = $(SRC_PATH)/disas/libvixl
 # versions do not.
 arm-a64.o-cflags := -I$(libvixldir) -Wno-sign-compare
 common-obj-$(CONFIG_CRIS_DIS) += cris.o
-common-obj-$(CONFIG_HPPA_DIS) += hppa.o
 common-obj-$(CONFIG_I386_DIS) += i386.o
 common-obj-$(CONFIG_IA64_DIS) += ia64.o
 common-obj-$(CONFIG_M68K_DIS) += m68k.o
diff --git a/disas/arm.c b/disas/arm.c
index 426270fe82..93c650344c 100644
--- a/disas/arm.c
+++ b/disas/arm.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "disas/bfd.h"
-#define ISSPACE(x) ((x) == ' ' || (x) == '\t' || (x) == '\n')
 
 #define ARM_EXT_V1	 0
 #define ARM_EXT_V2	 0
@@ -73,15 +72,6 @@ static void floatformat_to_double (unsigned char *data, double *dest)
 
 /* End of qemu specific additions.  */
 
-/* FIXME: Belongs in global header.  */
-#ifndef strneq
-#define strneq(a,b,n)	(strncmp ((a), (b), (n)) == 0)
-#endif
-
-#ifndef NUM_ELEM
-#define NUM_ELEM(a)     (sizeof (a) / sizeof (a)[0])
-#endif
-
 struct opcode32
 {
   unsigned long arch;		/* Architecture defining this insn.  */
@@ -1528,7 +1518,6 @@ static const char *const iwmmxt_cregnames[] =
 /* Default to GCC register name set.  */
 static unsigned int regname_selected = 1;
 
-#define NUM_ARM_REGNAMES  NUM_ELEM (regnames)
 #define arm_regnames      regnames[regname_selected].reg_names
 
 static bfd_boolean force_thumb = false;
diff --git a/disas/hppa.c b/disas/hppa.c
deleted file mode 100644
index 43facdc47b..0000000000
--- a/disas/hppa.c
+++ /dev/null
@@ -1,2832 +0,0 @@
-/* Disassembler for the PA-RISC. Somewhat derived from sparc-pinsn.c.
-   Copyright 1989, 1990, 1992, 1993, 1994, 1995, 1998, 1999, 2000, 2001, 2003,
-   2005 Free Software Foundation, Inc.
-
-   Contributed by the Center for Software Science at the
-   University of Utah (pa-gdb-bugs@cs.utah.edu).
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, see <http://www.gnu.org/licenses/>. */
-
-#include "qemu/osdep.h"
-#include "disas/bfd.h"
-
-/* HP PA-RISC SOM object file format:  definitions internal to BFD.
-   Copyright 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000,
-   2003 Free Software Foundation, Inc.
-
-   Contributed by the Center for Software Science at the
-   University of Utah (pa-gdb-bugs@cs.utah.edu).
-
-   This file is part of BFD, the Binary File Descriptor library.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
-
-#ifndef _LIBHPPA_H
-#define _LIBHPPA_H
-
-#define BYTES_IN_WORD 4
-#define PA_PAGESIZE 0x1000
-
-/* The PA instruction set variants.  */
-enum pa_arch {pa10 = 10, pa11 = 11, pa20 = 20, pa20w = 25};
-
-/* HP PA-RISC relocation types */
-
-enum hppa_reloc_field_selector_type
-  {
-    R_HPPA_FSEL = 0x0,
-    R_HPPA_LSSEL = 0x1,
-    R_HPPA_RSSEL = 0x2,
-    R_HPPA_LSEL = 0x3,
-    R_HPPA_RSEL = 0x4,
-    R_HPPA_LDSEL = 0x5,
-    R_HPPA_RDSEL = 0x6,
-    R_HPPA_LRSEL = 0x7,
-    R_HPPA_RRSEL = 0x8,
-    R_HPPA_NSEL  = 0x9,
-    R_HPPA_NLSEL  = 0xa,
-    R_HPPA_NLRSEL  = 0xb,
-    R_HPPA_PSEL = 0xc,
-    R_HPPA_LPSEL = 0xd,
-    R_HPPA_RPSEL = 0xe,
-    R_HPPA_TSEL = 0xf,
-    R_HPPA_LTSEL = 0x10,
-    R_HPPA_RTSEL = 0x11,
-    R_HPPA_LTPSEL = 0x12,
-    R_HPPA_RTPSEL = 0x13
-  };
-
-/* /usr/include/reloc.h defines these to constants.  We want to use
-   them in enums, so #undef them before we start using them.  We might
-   be able to fix this another way by simply managing not to include
-   /usr/include/reloc.h, but currently GDB picks up these defines
-   somewhere.  */
-#undef e_fsel
-#undef e_lssel
-#undef e_rssel
-#undef e_lsel
-#undef e_rsel
-#undef e_ldsel
-#undef e_rdsel
-#undef e_lrsel
-#undef e_rrsel
-#undef e_nsel
-#undef e_nlsel
-#undef e_nlrsel
-#undef e_psel
-#undef e_lpsel
-#undef e_rpsel
-#undef e_tsel
-#undef e_ltsel
-#undef e_rtsel
-#undef e_one
-#undef e_two
-#undef e_pcrel
-#undef e_con
-#undef e_plabel
-#undef e_abs
-
-/* for compatibility */
-enum hppa_reloc_field_selector_type_alt
-  {
-    e_fsel = R_HPPA_FSEL,
-    e_lssel = R_HPPA_LSSEL,
-    e_rssel = R_HPPA_RSSEL,
-    e_lsel = R_HPPA_LSEL,
-    e_rsel = R_HPPA_RSEL,
-    e_ldsel = R_HPPA_LDSEL,
-    e_rdsel = R_HPPA_RDSEL,
-    e_lrsel = R_HPPA_LRSEL,
-    e_rrsel = R_HPPA_RRSEL,
-    e_nsel = R_HPPA_NSEL,
-    e_nlsel = R_HPPA_NLSEL,
-    e_nlrsel = R_HPPA_NLRSEL,
-    e_psel = R_HPPA_PSEL,
-    e_lpsel = R_HPPA_LPSEL,
-    e_rpsel = R_HPPA_RPSEL,
-    e_tsel = R_HPPA_TSEL,
-    e_ltsel = R_HPPA_LTSEL,
-    e_rtsel = R_HPPA_RTSEL,
-    e_ltpsel = R_HPPA_LTPSEL,
-    e_rtpsel = R_HPPA_RTPSEL
-  };
-
-enum hppa_reloc_expr_type
-  {
-    R_HPPA_E_ONE = 0,
-    R_HPPA_E_TWO = 1,
-    R_HPPA_E_PCREL = 2,
-    R_HPPA_E_CON = 3,
-    R_HPPA_E_PLABEL = 7,
-    R_HPPA_E_ABS = 18
-  };
-
-/* for compatibility */
-enum hppa_reloc_expr_type_alt
-  {
-    e_one = R_HPPA_E_ONE,
-    e_two = R_HPPA_E_TWO,
-    e_pcrel = R_HPPA_E_PCREL,
-    e_con = R_HPPA_E_CON,
-    e_plabel = R_HPPA_E_PLABEL,
-    e_abs = R_HPPA_E_ABS
-  };
-
-
-/* Relocations for function calls must be accompanied by parameter
-   relocation bits.  These bits describe exactly where the caller has
-   placed the function's arguments and where it expects to find a return
-   value.
-
-   Both ELF and SOM encode this information within the addend field
-   of the call relocation.  (Note this could break very badly if one
-   was to make a call like bl foo + 0x12345678).
-
-   The high order 10 bits contain parameter relocation information,
-   the low order 22 bits contain the constant offset.  */
-
-#define HPPA_R_ARG_RELOC(a)	\
-  (((a) >> 22) & 0x3ff)
-#define HPPA_R_CONSTANT(a)	\
-  ((((bfd_signed_vma)(a)) << (BFD_ARCH_SIZE-22)) >> (BFD_ARCH_SIZE-22))
-#define HPPA_R_ADDEND(r, c)	\
-  (((r) << 22) + ((c) & 0x3fffff))
-
-
-/* Some functions to manipulate PA instructions.  */
-
-/* Declare the functions with the unused attribute to avoid warnings.  */
-static inline int sign_extend (int, int) ATTRIBUTE_UNUSED;
-static inline int low_sign_extend (int, int) ATTRIBUTE_UNUSED;
-static inline int sign_unext (int, int) ATTRIBUTE_UNUSED;
-static inline int low_sign_unext (int, int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_3 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_12 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_14 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_16 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_17 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_21 (int) ATTRIBUTE_UNUSED;
-static inline int re_assemble_22 (int) ATTRIBUTE_UNUSED;
-static inline bfd_signed_vma hppa_field_adjust
-  (bfd_vma, bfd_signed_vma, enum hppa_reloc_field_selector_type_alt)
-  ATTRIBUTE_UNUSED;
-static inline int hppa_rebuild_insn (int, int, int) ATTRIBUTE_UNUSED;
-
-
-/* The *sign_extend functions are used to assemble various bitfields
-   taken from an instruction and return the resulting immediate
-   value.  */
-
-static inline int
-sign_extend (int x, int len)
-{
-  int signbit = (1 << (len - 1));
-  int mask = (signbit << 1) - 1;
-  return ((x & mask) ^ signbit) - signbit;
-}
-
-static inline int
-low_sign_extend (int x, int len)
-{
-  return (x >> 1) - ((x & 1) << (len - 1));
-}
-
-
-/* The re_assemble_* functions prepare an immediate value for
-   insertion into an opcode. pa-risc uses all sorts of weird bitfields
-   in the instruction to hold the value.  */
-
-static inline int
-sign_unext (int x, int len)
-{
-  int len_ones;
-
-  len_ones = (1 << len) - 1;
-
-  return x & len_ones;
-}
-
-static inline int
-low_sign_unext (int x, int len)
-{
-  int temp;
-  int sign;
-
-  sign = (x >> (len-1)) & 1;
-
-  temp = sign_unext (x, len-1);
-
-  return (temp << 1) | sign;
-}
-
-static inline int
-re_assemble_3 (int as3)
-{
-  return ((  (as3 & 4) << (13-2))
-	  | ((as3 & 3) << (13+1)));
-}
-
-static inline int
-re_assemble_12 (int as12)
-{
-  return ((  (as12 & 0x800) >> 11)
-	  | ((as12 & 0x400) >> (10 - 2))
-	  | ((as12 & 0x3ff) << (1 + 2)));
-}
-
-static inline int
-re_assemble_14 (int as14)
-{
-  return ((  (as14 & 0x1fff) << 1)
-	  | ((as14 & 0x2000) >> 13));
-}
-
-static inline int
-re_assemble_16 (int as16)
-{
-  int s, t;
-
-  /* Unusual 16-bit encoding, for wide mode only.  */
-  t = (as16 << 1) & 0xffff;
-  s = (as16 & 0x8000);
-  return (t ^ s ^ (s >> 1)) | (s >> 15);
-}
-
-static inline int
-re_assemble_17 (int as17)
-{
-  return ((  (as17 & 0x10000) >> 16)
-	  | ((as17 & 0x0f800) << (16 - 11))
-	  | ((as17 & 0x00400) >> (10 - 2))
-	  | ((as17 & 0x003ff) << (1 + 2)));
-}
-
-static inline int
-re_assemble_21 (int as21)
-{
-  return ((  (as21 & 0x100000) >> 20)
-	  | ((as21 & 0x0ffe00) >> 8)
-	  | ((as21 & 0x000180) << 7)
-	  | ((as21 & 0x00007c) << 14)
-	  | ((as21 & 0x000003) << 12));
-}
-
-static inline int
-re_assemble_22 (int as22)
-{
-  return ((  (as22 & 0x200000) >> 21)
-	  | ((as22 & 0x1f0000) << (21 - 16))
-	  | ((as22 & 0x00f800) << (16 - 11))
-	  | ((as22 & 0x000400) >> (10 - 2))
-	  | ((as22 & 0x0003ff) << (1 + 2)));
-}
-
-
-/* Handle field selectors for PA instructions.
-   The L and R (and LS, RS etc.) selectors are used in pairs to form a
-   full 32 bit address.  eg.
-
-   LDIL	L'start,%r1		; put left part into r1
-   LDW	R'start(%r1),%r2	; add r1 and right part to form address
-
-   This function returns sign extended values in all cases.
-*/
-
-static inline bfd_signed_vma
-hppa_field_adjust (bfd_vma sym_val,
-		   bfd_signed_vma addend,
-		   enum hppa_reloc_field_selector_type_alt r_field)
-{
-  bfd_signed_vma value;
-
-  value = sym_val + addend;
-  switch (r_field)
-    {
-    case e_fsel:
-      /* F: No change.  */
-      break;
-
-    case e_nsel:
-      /* N: null selector.  I don't really understand what this is all
-	 about, but HP's documentation says "this indicates that zero
-	 bits are to be used for the displacement on the instruction.
-	 This fixup is used to identify three-instruction sequences to
-	 access data (for importing shared library data)."  */
-      value = 0;
-      break;
-
-    case e_lsel:
-    case e_nlsel:
-      /* L:  Select top 21 bits.  */
-      value = value >> 11;
-      break;
-
-    case e_rsel:
-      /* R:  Select bottom 11 bits.  */
-      value = value & 0x7ff;
-      break;
-
-    case e_lssel:
-      /* LS:  Round to nearest multiple of 2048 then select top 21 bits.  */
-      value = value + 0x400;
-      value = value >> 11;
-      break;
-
-    case e_rssel:
-      /* RS:  Select bottom 11 bits for LS.
-	 We need to return a value such that 2048 * LS'x + RS'x == x.
-	 ie. RS'x = x - ((x + 0x400) & -0x800)
-	 this is just a sign extension from bit 21.  */
-      value = ((value & 0x7ff) ^ 0x400) - 0x400;
-      break;
-
-    case e_ldsel:
-      /* LD:  Round to next multiple of 2048 then select top 21 bits.
-	 Yes, if we are already on a multiple of 2048, we go up to the
-	 next one.  RD in this case will be -2048.  */
-      value = value + 0x800;
-      value = value >> 11;
-      break;
-
-    case e_rdsel:
-      /* RD:  Set bits 0-20 to one.  */
-      value = value | -0x800;
-      break;
-
-    case e_lrsel:
-    case e_nlrsel:
-      /* LR:  L with rounding of the addend to nearest 8k.  */
-      value = sym_val + ((addend + 0x1000) & -0x2000);
-      value = value >> 11;
-      break;
-
-    case e_rrsel:
-      /* RR:  R with rounding of the addend to nearest 8k.
-	 We need to return a value such that 2048 * LR'x + RR'x == x
-	 ie. RR'x = s+a - (s + (((a + 0x1000) & -0x2000) & -0x800))
-	 .	  = s+a - ((s & -0x800) + ((a + 0x1000) & -0x2000))
-	 .	  = (s & 0x7ff) + a - ((a + 0x1000) & -0x2000)  */
-      value = (sym_val & 0x7ff) + (((addend & 0x1fff) ^ 0x1000) - 0x1000);
-      break;
-
-    default:
-      abort ();
-    }
-  return value;
-}
-
-/* PA-RISC OPCODES */
-#define get_opcode(insn)	(((insn) >> 26) & 0x3f)
-
-enum hppa_opcode_type
-{
-  /* None of the opcodes in the first group generate relocs, so we
-     aren't too concerned about them.  */
-  OP_SYSOP   = 0x00,
-  OP_MEMMNG  = 0x01,
-  OP_ALU     = 0x02,
-  OP_NDXMEM  = 0x03,
-  OP_SPOP    = 0x04,
-  OP_DIAG    = 0x05,
-  OP_FMPYADD = 0x06,
-  OP_UNDEF07 = 0x07,
-  OP_COPRW   = 0x09,
-  OP_COPRDW  = 0x0b,
-  OP_COPR    = 0x0c,
-  OP_FLOAT   = 0x0e,
-  OP_PRDSPEC = 0x0f,
-  OP_UNDEF15 = 0x15,
-  OP_UNDEF1d = 0x1d,
-  OP_FMPYSUB = 0x26,
-  OP_FPFUSED = 0x2e,
-  OP_SHEXDP0 = 0x34,
-  OP_SHEXDP1 = 0x35,
-  OP_SHEXDP2 = 0x36,
-  OP_UNDEF37 = 0x37,
-  OP_SHEXDP3 = 0x3c,
-  OP_SHEXDP4 = 0x3d,
-  OP_MULTMED = 0x3e,
-  OP_UNDEF3f = 0x3f,
-
-  OP_LDIL    = 0x08,
-  OP_ADDIL   = 0x0a,
-
-  OP_LDO     = 0x0d,
-  OP_LDB     = 0x10,
-  OP_LDH     = 0x11,
-  OP_LDW     = 0x12,
-  OP_LDWM    = 0x13,
-  OP_STB     = 0x18,
-  OP_STH     = 0x19,
-  OP_STW     = 0x1a,
-  OP_STWM    = 0x1b,
-
-  OP_LDD     = 0x14,
-  OP_STD     = 0x1c,
-
-  OP_FLDW    = 0x16,
-  OP_LDWL    = 0x17,
-  OP_FSTW    = 0x1e,
-  OP_STWL    = 0x1f,
-
-  OP_COMBT   = 0x20,
-  OP_COMIBT  = 0x21,
-  OP_COMBF   = 0x22,
-  OP_COMIBF  = 0x23,
-  OP_CMPBDT  = 0x27,
-  OP_ADDBT   = 0x28,
-  OP_ADDIBT  = 0x29,
-  OP_ADDBF   = 0x2a,
-  OP_ADDIBF  = 0x2b,
-  OP_CMPBDF  = 0x2f,
-  OP_BVB     = 0x30,
-  OP_BB      = 0x31,
-  OP_MOVB    = 0x32,
-  OP_MOVIB   = 0x33,
-  OP_CMPIBD  = 0x3b,
-
-  OP_COMICLR = 0x24,
-  OP_SUBI    = 0x25,
-  OP_ADDIT   = 0x2c,
-  OP_ADDI    = 0x2d,
-
-  OP_BE      = 0x38,
-  OP_BLE     = 0x39,
-  OP_BL      = 0x3a
-};
-
-
-/* Insert VALUE into INSN using R_FORMAT to determine exactly what
-   bits to change.  */
-
-static inline int
-hppa_rebuild_insn (int insn, int value, int r_format)
-{
-  switch (r_format)
-    {
-    case 11:
-      return (insn & ~ 0x7ff) | low_sign_unext (value, 11);
-
-    case 12:
-      return (insn & ~ 0x1ffd) | re_assemble_12 (value);
-
-
-    case 10:
-      return (insn & ~ 0x3ff1) | re_assemble_14 (value & -8);
-
-    case -11:
-      return (insn & ~ 0x3ff9) | re_assemble_14 (value & -4);
-
-    case 14:
-      return (insn & ~ 0x3fff) | re_assemble_14 (value);
-
-
-    case -10:
-      return (insn & ~ 0xfff1) | re_assemble_16 (value & -8);
-
-    case -16:
-      return (insn & ~ 0xfff9) | re_assemble_16 (value & -4);
-
-    case 16:
-      return (insn & ~ 0xffff) | re_assemble_16 (value);
-
-
-    case 17:
-      return (insn & ~ 0x1f1ffd) | re_assemble_17 (value);
-
-    case 21:
-      return (insn & ~ 0x1fffff) | re_assemble_21 (value);
-
-    case 22:
-      return (insn & ~ 0x3ff1ffd) | re_assemble_22 (value);
-
-    case 32:
-      return value;
-
-    default:
-      abort ();
-    }
-  return insn;
-}
-
-#endif /* _LIBHPPA_H */
-/* Table of opcodes for the PA-RISC.
-   Copyright 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000,
-   2001, 2002, 2003, 2004, 2005
-   Free Software Foundation, Inc.
-
-   Contributed by the Center for Software Science at the
-   University of Utah (pa-gdb-bugs@cs.utah.edu).
-
-This file is part of GAS, the GNU Assembler, and GDB, the GNU disassembler.
-
-GAS/GDB is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 1, or (at your option)
-any later version.
-
-GAS/GDB is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GAS or GDB; see the file COPYING.
-If not, see <http://www.gnu.org/licenses/>. */
-
-#if !defined(__STDC__) && !defined(const)
-#define const
-#endif
-
-/*
- * Structure of an opcode table entry.
- */
-
-/* There are two kinds of delay slot nullification: normal which is
- * controlled by the nullification bit, and conditional, which depends
- * on the direction of the branch and its success or failure.
- *
- * NONE is unfortunately #defined in the hiux system include files.
- * #undef it away.
- */
-#undef NONE
-struct pa_opcode
-{
-    const char *name;
-    unsigned long int match;	/* Bits that must be set...  */
-    unsigned long int mask;	/* ... in these bits. */
-    const char *args;
-    enum pa_arch arch;
-    char flags;
-};
-
-/* Enables strict matching.  Opcodes with match errors are skipped
-   when this bit is set.  */
-#define FLAG_STRICT 0x1
-
-/*
-   All hppa opcodes are 32 bits.
-
-   The match component is a mask saying which bits must match a
-   particular opcode in order for an instruction to be an instance
-   of that opcode.
-
-   The args component is a string containing one character for each operand of
-   the instruction.  Characters used as a prefix allow any second character to
-   be used without conflicting with the main operand characters.
-
-   Bit positions in this description follow HP usage of lsb = 31,
-   "at" is lsb of field.
-
-   In the args field, the following characters must match exactly:
-
-	'+,() '
-
-   In the args field, the following characters are unused:
-
-	'  "         -  /   34 6789:;    '
-	'@  C         M             [\]  '
-	'`    e g                     }  '
-
-   Here are all the characters:
-
-	' !"#$%&'()*+-,./0123456789:;<=>?'
-	'@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'
-	'`abcdefghijklmnopqrstuvwxyz{|}~ '
-
-Kinds of operands:
-   x    integer register field at 15.
-   b    integer register field at 10.
-   t    integer register field at 31.
-   a	integer register field at 10 and 15 (for PERMH)
-   5    5 bit immediate at 15.
-   s    2 bit space specifier at 17.
-   S    3 bit space specifier at 18.
-   V    5 bit immediate value at 31
-   i    11 bit immediate value at 31
-   j    14 bit immediate value at 31
-   k    21 bit immediate value at 31
-   l    16 bit immediate value at 31 (wide mode only, unusual encoding).
-   n	nullification for branch instructions
-   N	nullification for spop and copr instructions
-   w    12 bit branch displacement
-   W    17 bit branch displacement (PC relative)
-   X    22 bit branch displacement (PC relative)
-   z    17 bit branch displacement (just a number, not an address)
-
-Also these:
-
-   .    2 bit shift amount at 25
-   *    4 bit shift amount at 25
-   p    5 bit shift count at 26 (to support the SHD instruction) encoded as
-        31-p
-   ~    6 bit shift count at 20,22:26 encoded as 63-~.
-   P    5 bit bit position at 26
-   q    6 bit bit position at 20,22:26
-   T    5 bit field length at 31 (encoded as 32-T)
-   %	6 bit field length at 23,27:31 (variable extract/deposit)
-   |	6 bit field length at 19,27:31 (fixed extract/deposit)
-   A    13 bit immediate at 18 (to support the BREAK instruction)
-   ^	like b, but describes a control register
-   !    sar (cr11) register
-   D    26 bit immediate at 31 (to support the DIAG instruction)
-   $    9 bit immediate at 28 (to support POPBTS)
-
-   v    3 bit Special Function Unit identifier at 25
-   O    20 bit Special Function Unit operation split between 15 bits at 20
-        and 5 bits at 31
-   o    15 bit Special Function Unit operation at 20
-   2    22 bit Special Function Unit operation split between 17 bits at 20
-        and 5 bits at 31
-   1    15 bit Special Function Unit operation split between 10 bits at 20
-        and 5 bits at 31
-   0    10 bit Special Function Unit operation split between 5 bits at 20
-        and 5 bits at 31
-   u    3 bit coprocessor unit identifier at 25
-   F    Source Floating Point Operand Format Completer encoded 2 bits at 20
-   I    Source Floating Point Operand Format Completer encoded 1 bits at 20
-	(for 0xe format FP instructions)
-   G    Destination Floating Point Operand Format Completer encoded 2 bits at 18
-   H    Floating Point Operand Format at 26 for 'fmpyadd' and 'fmpysub'
-        (very similar to 'F')
-
-   r	5 bit immediate value at 31 (for the break instruction)
-	(very similar to V above, except the value is unsigned instead of
-	low_sign_ext)
-   R	5 bit immediate value at 15 (for the ssm, rsm, probei instructions)
-	(same as r above, except the value is in a different location)
-   U	10 bit immediate value at 15 (for SSM, RSM on pa2.0)
-   Q	5 bit immediate value at 10 (a bit position specified in
-	the bb instruction. It's the same as r above, except the
-        value is in a different location)
-   B	5 bit immediate value at 10 (a bit position specified in
-	the bb instruction. Similar to Q, but 64 bit handling is
-	different.
-   Z    %r1 -- implicit target of addil instruction.
-   L    ,%r2 completer for new syntax branch
-   {    Source format completer for fcnv
-   _    Destination format completer for fcnv
-   h    cbit for fcmp
-   =    gfx tests for ftest
-   d    14 bit offset for single precision FP long load/store.
-   #    14 bit offset for double precision FP load long/store.
-   J    Yet another 14 bit offset for load/store with ma,mb completers.
-   K    Yet another 14 bit offset for load/store with ma,mb completers.
-   y    16 bit offset for word aligned load/store (PA2.0 wide).
-   &    16 bit offset for dword aligned load/store (PA2.0 wide).
-   <    16 bit offset for load/store with ma,mb completers (PA2.0 wide).
-   >    16 bit offset for load/store with ma,mb completers (PA2.0 wide).
-   Y    %sr0,%r31 -- implicit target of be,l instruction.
-   @	implicit immediate value of 0
-
-Completer operands all have 'c' as the prefix:
-
-   cx   indexed load and store completer.
-   cX   indexed load and store completer.  Like cx, but emits a space
-	after in disassembler.
-   cm   short load and store completer.
-   cM   short load and store completer.  Like cm, but emits a space
-        after in disassembler.
-   cq   long load and store completer (like cm, but inserted into a
-	different location in the target instruction).
-   cs   store bytes short completer.
-   cA   store bytes short completer.  Like cs, but emits a space
-        after in disassembler.
-   ce   long load/store completer for LDW/STW with a different encoding
-	than the others
-   cc   load cache control hint
-   cd   load and clear cache control hint
-   cC   store cache control hint
-   co	ordered access
-
-   cp	branch link and push completer
-   cP	branch pop completer
-   cl	branch link completer
-   cg	branch gate completer
-
-   cw	read/write completer for PROBE
-   cW	wide completer for MFCTL
-   cL	local processor completer for cache control
-   cZ   System Control Completer (to support LPA, LHA, etc.)
-
-   ci	correction completer for DCOR
-   ca	add completer
-   cy	32 bit add carry completer
-   cY	64 bit add carry completer
-   cv	signed overflow trap completer
-   ct	trap on condition completer for ADDI, SUB
-   cT	trap on condition completer for UADDCM
-   cb	32 bit borrow completer for SUB
-   cB	64 bit borrow completer for SUB
-
-   ch	left/right half completer
-   cH	signed/unsigned saturation completer
-   cS	signed/unsigned completer at 21
-   cz	zero/sign extension completer.
-   c*	permutation completer
-
-Condition operands all have '?' as the prefix:
-
-   ?f   Floating point compare conditions (encoded as 5 bits at 31)
-
-   ?a	add conditions
-   ?A	64 bit add conditions
-   ?@   add branch conditions followed by nullify
-   ?d	non-negated add branch conditions
-   ?D	negated add branch conditions
-   ?w	wide mode non-negated add branch conditions
-   ?W	wide mode negated add branch conditions
-
-   ?s   compare/subtract conditions
-   ?S	64 bit compare/subtract conditions
-   ?t   non-negated compare and branch conditions
-   ?n   32 bit compare and branch conditions followed by nullify
-   ?N   64 bit compare and branch conditions followed by nullify
-   ?Q	64 bit compare and branch conditions for CMPIB instruction
-
-   ?l   logical conditions
-   ?L	64 bit logical conditions
-
-   ?b   branch on bit conditions
-   ?B	64 bit branch on bit conditions
-
-   ?x   shift/extract/deposit conditions
-   ?X	64 bit shift/extract/deposit conditions
-   ?y   shift/extract/deposit conditions followed by nullify for conditional
-        branches
-
-   ?u   unit conditions
-   ?U   64 bit unit conditions
-
-Floating point registers all have 'f' as a prefix:
-
-   ft	target register at 31
-   fT	target register with L/R halves at 31
-   fa	operand 1 register at 10
-   fA   operand 1 register with L/R halves at 10
-   fX   Same as fA, except prints a space before register during disasm
-   fb	operand 2 register at 15
-   fB   operand 2 register with L/R halves at 15
-   fC   operand 3 register with L/R halves at 16:18,21:23
-   fe   Like fT, but encoding is different.
-   fE   Same as fe, except prints a space before register during disasm.
-   fx	target register at 15 (only for PA 2.0 long format FLDD/FSTD).
-
-Float registers for fmpyadd and fmpysub:
-
-   fi	mult operand 1 register at 10
-   fj	mult operand 2 register at 15
-   fk	mult target register at 20
-   fl	add/sub operand register at 25
-   fm	add/sub target register at 31
-
-*/
-
-
-#if 0
-/* List of characters not to put a space after.  Note that
-   "," is included, as the "spopN" operations use literal
-   commas in their completer sections.  */
-static const char *const completer_chars = ",CcY<>?!@+&U~FfGHINnOoZMadu|/=0123%e$m}";
-#endif
-
-/* The order of the opcodes in this table is significant:
-
-   * The assembler requires that all instances of the same mnemonic be
-     consecutive.  If they aren't, the assembler will bomb at runtime.
-
-   * Immediate fields use pa_get_absolute_expression to parse the
-     string.  It will generate a "bad expression" error if passed
-     a register name.  Thus, register index variants of an opcode
-     need to precede immediate variants.
-
-   * The disassembler does not care about the order of the opcodes
-     except in cases where implicit addressing is used.
-
-   Here are the rules for ordering the opcodes of a mnemonic:
-
-   1) Opcodes with FLAG_STRICT should precede opcodes without
-      FLAG_STRICT.
-
-   2) Opcodes with FLAG_STRICT should be ordered as follows:
-      register index opcodes, short immediate opcodes, and finally
-      long immediate opcodes.  When both pa10 and pa11 variants
-      of the same opcode are available, the pa10 opcode should
-      come first for correct architectural promotion.
-
-   3) When implicit addressing is available for an opcode, the
-      implicit opcode should precede the explicit opcode.
-
-   4) Opcodes without FLAG_STRICT should be ordered as follows:
-      register index opcodes, long immediate opcodes, and finally
-      short immediate opcodes.  */
-
-static const struct pa_opcode pa_opcodes[] =
-{
-
-/* Pseudo-instructions.  */
-
-{ "ldi",	0x34000000, 0xffe00000, "l,x", pa20w, 0},/* ldo val(r0),r */
-{ "ldi",	0x34000000, 0xffe0c000, "j,x", pa10, 0},/* ldo val(r0),r */
-
-{ "cmpib",	0xec000000, 0xfc000000, "?Qn5,b,w", pa20, FLAG_STRICT},
-{ "cmpib", 	0x84000000, 0xf4000000, "?nn5,b,w", pa10, FLAG_STRICT},
-{ "comib", 	0x84000000, 0xfc000000, "?nn5,b,w", pa10, 0}, /* comib{tf}*/
-/* This entry is for the disassembler only.  It will never be used by
-   assembler.  */
-{ "comib", 	0x8c000000, 0xfc000000, "?nn5,b,w", pa10, 0}, /* comib{tf}*/
-{ "cmpb",	0x9c000000, 0xdc000000, "?Nnx,b,w", pa20, FLAG_STRICT},
-{ "cmpb",	0x80000000, 0xf4000000, "?nnx,b,w", pa10, FLAG_STRICT},
-{ "comb",	0x80000000, 0xfc000000, "?nnx,b,w", pa10, 0}, /* comb{tf} */
-/* This entry is for the disassembler only.  It will never be used by
-   assembler.  */
-{ "comb",	0x88000000, 0xfc000000, "?nnx,b,w", pa10, 0}, /* comb{tf} */
-{ "addb",	0xa0000000, 0xf4000000, "?Wnx,b,w", pa20w, FLAG_STRICT},
-{ "addb",	0xa0000000, 0xfc000000, "?@nx,b,w", pa10, 0}, /* addb{tf} */
-/* This entry is for the disassembler only.  It will never be used by
-   assembler.  */
-{ "addb",	0xa8000000, 0xfc000000, "?@nx,b,w", pa10, 0},
-{ "addib",	0xa4000000, 0xf4000000, "?Wn5,b,w", pa20w, FLAG_STRICT},
-{ "addib",	0xa4000000, 0xfc000000, "?@n5,b,w", pa10, 0}, /* addib{tf}*/
-/* This entry is for the disassembler only.  It will never be used by
-   assembler.  */
-{ "addib",	0xac000000, 0xfc000000, "?@n5,b,w", pa10, 0}, /* addib{tf}*/
-{ "nop",	0x08000240, 0xffffffff, "", pa10, 0},      /* or 0,0,0 */
-{ "copy",	0x08000240, 0xffe0ffe0, "x,t", pa10, 0},   /* or r,0,t */
-{ "mtsar",	0x01601840, 0xffe0ffff, "x", pa10, 0}, /* mtctl r,cr11 */
-
-/* Loads and Stores for integer registers.  */
-
-{ "ldd",	0x0c0000c0, 0xfc00d3c0, "cxccx(b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x0c0000c0, 0xfc0013c0, "cxccx(s,b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x0c0010e0, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x0c0010e0, 0xfc1f33e0, "cocc@(s,b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x0c0010c0, 0xfc00d3c0, "cmcc5(b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x0c0010c0, 0xfc0013c0, "cmcc5(s,b),t", pa20, FLAG_STRICT},
-{ "ldd",	0x50000000, 0xfc000002, "cq&(b),x", pa20w, FLAG_STRICT},
-{ "ldd",	0x50000000, 0xfc00c002, "cq#(b),x", pa20, FLAG_STRICT},
-{ "ldd",	0x50000000, 0xfc000002, "cq#(s,b),x", pa20, FLAG_STRICT},
-{ "ldw",	0x0c000080, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldw",	0x0c000080, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldw",	0x0c000080, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldw",	0x0c000080, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldw",	0x0c0010a0, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldw",	0x0c0010a0, 0xfc1f33e0, "cocc@(s,b),t", pa20, FLAG_STRICT},
-{ "ldw",	0x0c001080, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldw",	0x0c001080, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldw",	0x0c001080, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldw",	0x0c001080, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldw",	0x4c000000, 0xfc000000, "ce<(b),x", pa20w, FLAG_STRICT},
-{ "ldw",	0x5c000004, 0xfc000006, "ce>(b),x", pa20w, FLAG_STRICT},
-{ "ldw",	0x48000000, 0xfc000000, "l(b),x", pa20w, FLAG_STRICT},
-{ "ldw",	0x5c000004, 0xfc00c006, "ceK(b),x", pa20, FLAG_STRICT},
-{ "ldw",	0x5c000004, 0xfc000006, "ceK(s,b),x", pa20, FLAG_STRICT},
-{ "ldw",	0x4c000000, 0xfc00c000, "ceJ(b),x", pa10, FLAG_STRICT},
-{ "ldw",	0x4c000000, 0xfc000000, "ceJ(s,b),x", pa10, FLAG_STRICT},
-{ "ldw",	0x48000000, 0xfc00c000, "j(b),x", pa10, 0},
-{ "ldw",	0x48000000, 0xfc000000, "j(s,b),x", pa10, 0},
-{ "ldh",	0x0c000040, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldh",	0x0c000040, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldh",	0x0c000040, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldh",	0x0c000040, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldh",	0x0c001060, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldh",	0x0c001060, 0xfc1f33e0, "cocc@(s,b),t", pa20, FLAG_STRICT},
-{ "ldh",	0x0c001040, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldh",	0x0c001040, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldh",	0x0c001040, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldh",	0x0c001040, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldh",	0x44000000, 0xfc000000, "l(b),x", pa20w, FLAG_STRICT},
-{ "ldh",	0x44000000, 0xfc00c000, "j(b),x", pa10, 0},
-{ "ldh",	0x44000000, 0xfc000000, "j(s,b),x", pa10, 0},
-{ "ldb",	0x0c000000, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldb",	0x0c000000, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldb",	0x0c000000, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldb",	0x0c000000, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldb",	0x0c001020, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldb",	0x0c001020, 0xfc1f33e0, "cocc@(s,b),t", pa20, FLAG_STRICT},
-{ "ldb",	0x0c001000, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldb",	0x0c001000, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldb",	0x0c001000, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldb",	0x0c001000, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldb",	0x40000000, 0xfc000000, "l(b),x", pa20w, FLAG_STRICT},
-{ "ldb",	0x40000000, 0xfc00c000, "j(b),x", pa10, 0},
-{ "ldb",	0x40000000, 0xfc000000, "j(s,b),x", pa10, 0},
-{ "std",	0x0c0012e0, 0xfc00f3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "std",	0x0c0012e0, 0xfc0033ff, "cocCx,@(s,b)", pa20, FLAG_STRICT},
-{ "std",	0x0c0012c0, 0xfc00d3c0, "cmcCx,V(b)", pa20, FLAG_STRICT},
-{ "std",	0x0c0012c0, 0xfc0013c0, "cmcCx,V(s,b)", pa20, FLAG_STRICT},
-{ "std",	0x70000000, 0xfc000002, "cqx,&(b)", pa20w, FLAG_STRICT},
-{ "std",	0x70000000, 0xfc00c002, "cqx,#(b)", pa20, FLAG_STRICT},
-{ "std",	0x70000000, 0xfc000002, "cqx,#(s,b)", pa20, FLAG_STRICT},
-{ "stw",	0x0c0012a0, 0xfc00f3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "stw",	0x0c0012a0, 0xfc0033ff, "cocCx,@(s,b)", pa20, FLAG_STRICT},
-{ "stw",	0x0c001280, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stw",	0x0c001280, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "stw",	0x0c001280, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stw",	0x0c001280, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "stw",	0x6c000000, 0xfc000000, "cex,<(b)", pa20w, FLAG_STRICT},
-{ "stw",	0x7c000004, 0xfc000006, "cex,>(b)", pa20w, FLAG_STRICT},
-{ "stw",	0x68000000, 0xfc000000, "x,l(b)", pa20w, FLAG_STRICT},
-{ "stw",	0x7c000004, 0xfc00c006, "cex,K(b)", pa20, FLAG_STRICT},
-{ "stw",	0x7c000004, 0xfc000006, "cex,K(s,b)", pa20, FLAG_STRICT},
-{ "stw",	0x6c000000, 0xfc00c000, "cex,J(b)", pa10, FLAG_STRICT},
-{ "stw",	0x6c000000, 0xfc000000, "cex,J(s,b)", pa10, FLAG_STRICT},
-{ "stw",	0x68000000, 0xfc00c000, "x,j(b)", pa10, 0},
-{ "stw",	0x68000000, 0xfc000000, "x,j(s,b)", pa10, 0},
-{ "sth",	0x0c001260, 0xfc00f3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "sth",	0x0c001260, 0xfc0033ff, "cocCx,@(s,b)", pa20, FLAG_STRICT},
-{ "sth",	0x0c001240, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "sth",	0x0c001240, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "sth",	0x0c001240, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "sth",	0x0c001240, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "sth",	0x64000000, 0xfc000000, "x,l(b)", pa20w, FLAG_STRICT},
-{ "sth",	0x64000000, 0xfc00c000, "x,j(b)", pa10, 0},
-{ "sth",	0x64000000, 0xfc000000, "x,j(s,b)", pa10, 0},
-{ "stb",	0x0c001220, 0xfc00f3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "stb",	0x0c001220, 0xfc0033ff, "cocCx,@(s,b)", pa20, FLAG_STRICT},
-{ "stb",	0x0c001200, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stb",	0x0c001200, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "stb",	0x0c001200, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stb",	0x0c001200, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "stb",	0x60000000, 0xfc000000, "x,l(b)", pa20w, FLAG_STRICT},
-{ "stb",	0x60000000, 0xfc00c000, "x,j(b)", pa10, 0},
-{ "stb",	0x60000000, 0xfc000000, "x,j(s,b)", pa10, 0},
-{ "ldwm",	0x4c000000, 0xfc00c000, "j(b),x", pa10, 0},
-{ "ldwm",	0x4c000000, 0xfc000000, "j(s,b),x", pa10, 0},
-{ "stwm",	0x6c000000, 0xfc00c000, "x,j(b)", pa10, 0},
-{ "stwm",	0x6c000000, 0xfc000000, "x,j(s,b)", pa10, 0},
-{ "ldwx",	0x0c000080, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldwx",	0x0c000080, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldwx",	0x0c000080, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldwx",	0x0c000080, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldwx",	0x0c000080, 0xfc00dfc0, "cXx(b),t", pa10, 0},
-{ "ldwx",	0x0c000080, 0xfc001fc0, "cXx(s,b),t", pa10, 0},
-{ "ldhx",	0x0c000040, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldhx",	0x0c000040, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldhx",	0x0c000040, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldhx",	0x0c000040, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldhx",	0x0c000040, 0xfc00dfc0, "cXx(b),t", pa10, 0},
-{ "ldhx",	0x0c000040, 0xfc001fc0, "cXx(s,b),t", pa10, 0},
-{ "ldbx",	0x0c000000, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldbx",	0x0c000000, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldbx",	0x0c000000, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldbx",	0x0c000000, 0xfc0013c0, "cxccx(s,b),t", pa11, FLAG_STRICT},
-{ "ldbx",	0x0c000000, 0xfc00dfc0, "cXx(b),t", pa10, 0},
-{ "ldbx",	0x0c000000, 0xfc001fc0, "cXx(s,b),t", pa10, 0},
-{ "ldwa",	0x0c000180, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldwa",	0x0c000180, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldwa",	0x0c0011a0, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldwa",	0x0c001180, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldwa",	0x0c001180, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldcw",	0x0c0001c0, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldcw",	0x0c0001c0, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldcw",	0x0c0001c0, 0xfc00d3c0, "cxcdx(b),t", pa11, FLAG_STRICT},
-{ "ldcw",	0x0c0001c0, 0xfc0013c0, "cxcdx(s,b),t", pa11, FLAG_STRICT},
-{ "ldcw",	0x0c0011c0, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldcw",	0x0c0011c0, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldcw",	0x0c0011c0, 0xfc00d3c0, "cmcd5(b),t", pa11, FLAG_STRICT},
-{ "ldcw",	0x0c0011c0, 0xfc0013c0, "cmcd5(s,b),t", pa11, FLAG_STRICT},
-{ "stwa",	0x0c0013a0, 0xfc00d3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "stwa",	0x0c001380, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stwa",	0x0c001380, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stby",	0x0c001300, 0xfc00dfc0, "cAx,V(b)", pa10, FLAG_STRICT},
-{ "stby",	0x0c001300, 0xfc001fc0, "cAx,V(s,b)", pa10, FLAG_STRICT},
-{ "stby",	0x0c001300, 0xfc00d3c0, "cscCx,V(b)", pa11, FLAG_STRICT},
-{ "stby",	0x0c001300, 0xfc0013c0, "cscCx,V(s,b)", pa11, FLAG_STRICT},
-{ "ldda",	0x0c000100, 0xfc00d3c0, "cxccx(b),t", pa20, FLAG_STRICT},
-{ "ldda",	0x0c001120, 0xfc1ff3e0, "cocc@(b),t", pa20, FLAG_STRICT},
-{ "ldda",	0x0c001100, 0xfc00d3c0, "cmcc5(b),t", pa20, FLAG_STRICT},
-{ "ldcd",	0x0c000140, 0xfc00d3c0, "cxcdx(b),t", pa20, FLAG_STRICT},
-{ "ldcd",	0x0c000140, 0xfc0013c0, "cxcdx(s,b),t", pa20, FLAG_STRICT},
-{ "ldcd",	0x0c001140, 0xfc00d3c0, "cmcd5(b),t", pa20, FLAG_STRICT},
-{ "ldcd",	0x0c001140, 0xfc0013c0, "cmcd5(s,b),t", pa20, FLAG_STRICT},
-{ "stda",	0x0c0013e0, 0xfc00f3ff, "cocCx,@(b)", pa20, FLAG_STRICT},
-{ "stda",	0x0c0013c0, 0xfc00d3c0, "cmcCx,V(b)", pa20, FLAG_STRICT},
-{ "ldwax",	0x0c000180, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldwax",	0x0c000180, 0xfc00d3c0, "cxccx(b),t", pa11, FLAG_STRICT},
-{ "ldwax",	0x0c000180, 0xfc00dfc0, "cXx(b),t", pa10, 0},
-{ "ldcwx",	0x0c0001c0, 0xfc00dfc0, "cXx(b),t", pa10, FLAG_STRICT},
-{ "ldcwx",	0x0c0001c0, 0xfc001fc0, "cXx(s,b),t", pa10, FLAG_STRICT},
-{ "ldcwx",	0x0c0001c0, 0xfc00d3c0, "cxcdx(b),t", pa11, FLAG_STRICT},
-{ "ldcwx",	0x0c0001c0, 0xfc0013c0, "cxcdx(s,b),t", pa11, FLAG_STRICT},
-{ "ldcwx",	0x0c0001c0, 0xfc00dfc0, "cXx(b),t", pa10, 0},
-{ "ldcwx",	0x0c0001c0, 0xfc001fc0, "cXx(s,b),t", pa10, 0},
-{ "ldws",	0x0c001080, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldws",	0x0c001080, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldws",	0x0c001080, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldws",	0x0c001080, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldws",	0x0c001080, 0xfc00dfc0, "cM5(b),t", pa10, 0},
-{ "ldws",	0x0c001080, 0xfc001fc0, "cM5(s,b),t", pa10, 0},
-{ "ldhs",	0x0c001040, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldhs",	0x0c001040, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldhs",	0x0c001040, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldhs",	0x0c001040, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldhs",	0x0c001040, 0xfc00dfc0, "cM5(b),t", pa10, 0},
-{ "ldhs",	0x0c001040, 0xfc001fc0, "cM5(s,b),t", pa10, 0},
-{ "ldbs",	0x0c001000, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldbs",	0x0c001000, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldbs",	0x0c001000, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldbs",	0x0c001000, 0xfc0013c0, "cmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "ldbs",	0x0c001000, 0xfc00dfc0, "cM5(b),t", pa10, 0},
-{ "ldbs",	0x0c001000, 0xfc001fc0, "cM5(s,b),t", pa10, 0},
-{ "ldwas",	0x0c001180, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldwas",	0x0c001180, 0xfc00d3c0, "cmcc5(b),t", pa11, FLAG_STRICT},
-{ "ldwas",	0x0c001180, 0xfc00dfc0, "cM5(b),t", pa10, 0},
-{ "ldcws",	0x0c0011c0, 0xfc00dfc0, "cM5(b),t", pa10, FLAG_STRICT},
-{ "ldcws",	0x0c0011c0, 0xfc001fc0, "cM5(s,b),t", pa10, FLAG_STRICT},
-{ "ldcws",	0x0c0011c0, 0xfc00d3c0, "cmcd5(b),t", pa11, FLAG_STRICT},
-{ "ldcws",	0x0c0011c0, 0xfc0013c0, "cmcd5(s,b),t", pa11, FLAG_STRICT},
-{ "ldcws",	0x0c0011c0, 0xfc00dfc0, "cM5(b),t", pa10, 0},
-{ "ldcws",	0x0c0011c0, 0xfc001fc0, "cM5(s,b),t", pa10, 0},
-{ "stws",	0x0c001280, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stws",	0x0c001280, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "stws",	0x0c001280, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stws",	0x0c001280, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "stws",	0x0c001280, 0xfc00dfc0, "cMx,V(b)", pa10, 0},
-{ "stws",	0x0c001280, 0xfc001fc0, "cMx,V(s,b)", pa10, 0},
-{ "sths",	0x0c001240, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "sths",	0x0c001240, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "sths",	0x0c001240, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "sths",	0x0c001240, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "sths",	0x0c001240, 0xfc00dfc0, "cMx,V(b)", pa10, 0},
-{ "sths",	0x0c001240, 0xfc001fc0, "cMx,V(s,b)", pa10, 0},
-{ "stbs",	0x0c001200, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stbs",	0x0c001200, 0xfc001fc0, "cMx,V(s,b)", pa10, FLAG_STRICT},
-{ "stbs",	0x0c001200, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stbs",	0x0c001200, 0xfc0013c0, "cmcCx,V(s,b)", pa11, FLAG_STRICT},
-{ "stbs",	0x0c001200, 0xfc00dfc0, "cMx,V(b)", pa10, 0},
-{ "stbs",	0x0c001200, 0xfc001fc0, "cMx,V(s,b)", pa10, 0},
-{ "stwas",	0x0c001380, 0xfc00dfc0, "cMx,V(b)", pa10, FLAG_STRICT},
-{ "stwas",	0x0c001380, 0xfc00d3c0, "cmcCx,V(b)", pa11, FLAG_STRICT},
-{ "stwas",	0x0c001380, 0xfc00dfc0, "cMx,V(b)", pa10, 0},
-{ "stdby",	0x0c001340, 0xfc00d3c0, "cscCx,V(b)", pa20, FLAG_STRICT},
-{ "stdby",	0x0c001340, 0xfc0013c0, "cscCx,V(s,b)", pa20, FLAG_STRICT},
-{ "stbys",	0x0c001300, 0xfc00dfc0, "cAx,V(b)", pa10, FLAG_STRICT},
-{ "stbys",	0x0c001300, 0xfc001fc0, "cAx,V(s,b)", pa10, FLAG_STRICT},
-{ "stbys",	0x0c001300, 0xfc00d3c0, "cscCx,V(b)", pa11, FLAG_STRICT},
-{ "stbys",	0x0c001300, 0xfc0013c0, "cscCx,V(s,b)", pa11, FLAG_STRICT},
-{ "stbys",	0x0c001300, 0xfc00dfc0, "cAx,V(b)", pa10, 0},
-{ "stbys",	0x0c001300, 0xfc001fc0, "cAx,V(s,b)", pa10, 0},
-
-/* Immediate instructions.  */
-{ "ldo",	0x34000000, 0xfc000000, "l(b),x", pa20w, 0},
-{ "ldo",	0x34000000, 0xfc00c000, "j(b),x", pa10, 0},
-{ "ldil",	0x20000000, 0xfc000000, "k,b", pa10, 0},
-{ "addil",	0x28000000, 0xfc000000, "k,b,Z", pa10, 0},
-{ "addil",	0x28000000, 0xfc000000, "k,b", pa10, 0},
-
-/* Branching instructions.  */
-{ "b",		0xe8008000, 0xfc00e000, "cpnXL", pa20, FLAG_STRICT},
-{ "b",		0xe800a000, 0xfc00e000, "clnXL", pa20, FLAG_STRICT},
-{ "b",		0xe8000000, 0xfc00e000, "clnW,b", pa10, FLAG_STRICT},
-{ "b",		0xe8002000, 0xfc00e000, "cgnW,b", pa10, FLAG_STRICT},
-{ "b",		0xe8000000, 0xffe0e000, "nW", pa10, 0},  /* b,l foo,r0 */
-{ "bl",		0xe8000000, 0xfc00e000, "nW,b", pa10, 0},
-{ "gate",	0xe8002000, 0xfc00e000, "nW,b", pa10, 0},
-{ "blr",	0xe8004000, 0xfc00e001, "nx,b", pa10, 0},
-{ "bv",		0xe800c000, 0xfc00fffd, "nx(b)", pa10, 0},
-{ "bv",		0xe800c000, 0xfc00fffd, "n(b)", pa10, 0},
-{ "bve",	0xe800f001, 0xfc1ffffd, "cpn(b)L", pa20, FLAG_STRICT},
-{ "bve",	0xe800f000, 0xfc1ffffd, "cln(b)L", pa20, FLAG_STRICT},
-{ "bve",	0xe800d001, 0xfc1ffffd, "cPn(b)", pa20, FLAG_STRICT},
-{ "bve",	0xe800d000, 0xfc1ffffd, "n(b)", pa20, FLAG_STRICT},
-{ "be",		0xe4000000, 0xfc000000, "clnz(S,b),Y", pa10, FLAG_STRICT},
-{ "be",		0xe4000000, 0xfc000000, "clnz(b),Y", pa10, FLAG_STRICT},
-{ "be",		0xe0000000, 0xfc000000, "nz(S,b)", pa10, 0},
-{ "be",		0xe0000000, 0xfc000000, "nz(b)", pa10, 0},
-{ "ble",	0xe4000000, 0xfc000000, "nz(S,b)", pa10, 0},
-{ "movb",	0xc8000000, 0xfc000000, "?ynx,b,w", pa10, 0},
-{ "movib",	0xcc000000, 0xfc000000, "?yn5,b,w", pa10, 0},
-{ "combt",	0x80000000, 0xfc000000, "?tnx,b,w", pa10, 0},
-{ "combf",	0x88000000, 0xfc000000, "?tnx,b,w", pa10, 0},
-{ "comibt",	0x84000000, 0xfc000000, "?tn5,b,w", pa10, 0},
-{ "comibf",	0x8c000000, 0xfc000000, "?tn5,b,w", pa10, 0},
-{ "addbt",	0xa0000000, 0xfc000000, "?dnx,b,w", pa10, 0},
-{ "addbf",	0xa8000000, 0xfc000000, "?dnx,b,w", pa10, 0},
-{ "addibt",	0xa4000000, 0xfc000000, "?dn5,b,w", pa10, 0},
-{ "addibf",	0xac000000, 0xfc000000, "?dn5,b,w", pa10, 0},
-{ "bb",		0xc0004000, 0xffe06000, "?bnx,!,w", pa10, FLAG_STRICT},
-{ "bb",		0xc0006000, 0xffe06000, "?Bnx,!,w", pa20, FLAG_STRICT},
-{ "bb",		0xc4004000, 0xfc006000, "?bnx,Q,w", pa10, FLAG_STRICT},
-{ "bb",		0xc4004000, 0xfc004000, "?Bnx,B,w", pa20, FLAG_STRICT},
-{ "bvb",	0xc0004000, 0xffe04000, "?bnx,w", pa10, 0},
-{ "clrbts",	0xe8004005, 0xffffffff, "", pa20, FLAG_STRICT},
-{ "popbts",	0xe8004005, 0xfffff007, "$", pa20, FLAG_STRICT},
-{ "pushnom",	0xe8004001, 0xffffffff, "", pa20, FLAG_STRICT},
-{ "pushbts",	0xe8004001, 0xffe0ffff, "x", pa20, FLAG_STRICT},
-
-/* Computation Instructions.  */
-
-{ "cmpclr",	0x080008a0, 0xfc000fe0, "?Sx,b,t", pa20, FLAG_STRICT},
-{ "cmpclr",	0x08000880, 0xfc000fe0, "?sx,b,t", pa10, FLAG_STRICT},
-{ "comclr",	0x08000880, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "or",		0x08000260, 0xfc000fe0, "?Lx,b,t", pa20, FLAG_STRICT},
-{ "or",		0x08000240, 0xfc000fe0, "?lx,b,t", pa10, 0},
-{ "xor",	0x080002a0, 0xfc000fe0, "?Lx,b,t", pa20, FLAG_STRICT},
-{ "xor",	0x08000280, 0xfc000fe0, "?lx,b,t", pa10, 0},
-{ "and",	0x08000220, 0xfc000fe0, "?Lx,b,t", pa20, FLAG_STRICT},
-{ "and",	0x08000200, 0xfc000fe0, "?lx,b,t", pa10, 0},
-{ "andcm",	0x08000020, 0xfc000fe0, "?Lx,b,t", pa20, FLAG_STRICT},
-{ "andcm",	0x08000000, 0xfc000fe0, "?lx,b,t", pa10, 0},
-{ "uxor",	0x080003a0, 0xfc000fe0, "?Ux,b,t", pa20, FLAG_STRICT},
-{ "uxor",	0x08000380, 0xfc000fe0, "?ux,b,t", pa10, 0},
-{ "uaddcm",	0x080009a0, 0xfc000fa0, "cT?Ux,b,t", pa20, FLAG_STRICT},
-{ "uaddcm",	0x08000980, 0xfc000fa0, "cT?ux,b,t", pa10, FLAG_STRICT},
-{ "uaddcm",	0x08000980, 0xfc000fe0, "?ux,b,t", pa10, 0},
-{ "uaddcmt",	0x080009c0, 0xfc000fe0, "?ux,b,t", pa10, 0},
-{ "dcor",	0x08000ba0, 0xfc1f0fa0, "ci?Ub,t", pa20, FLAG_STRICT},
-{ "dcor",	0x08000b80, 0xfc1f0fa0, "ci?ub,t", pa10, FLAG_STRICT},
-{ "dcor",	0x08000b80, 0xfc1f0fe0, "?ub,t",   pa10, 0},
-{ "idcor",	0x08000bc0, 0xfc1f0fe0, "?ub,t",   pa10, 0},
-{ "addi",	0xb0000000, 0xfc000000, "ct?ai,b,x", pa10, FLAG_STRICT},
-{ "addi",	0xb4000000, 0xfc000000, "cv?ai,b,x", pa10, FLAG_STRICT},
-{ "addi",	0xb4000000, 0xfc000800, "?ai,b,x", pa10, 0},
-{ "addio",	0xb4000800, 0xfc000800, "?ai,b,x", pa10, 0},
-{ "addit",	0xb0000000, 0xfc000800, "?ai,b,x", pa10, 0},
-{ "addito",	0xb0000800, 0xfc000800, "?ai,b,x", pa10, 0},
-{ "add",	0x08000720, 0xfc0007e0, "cY?Ax,b,t", pa20, FLAG_STRICT},
-{ "add",	0x08000700, 0xfc0007e0, "cy?ax,b,t", pa10, FLAG_STRICT},
-{ "add",	0x08000220, 0xfc0003e0, "ca?Ax,b,t", pa20, FLAG_STRICT},
-{ "add",	0x08000200, 0xfc0003e0, "ca?ax,b,t", pa10, FLAG_STRICT},
-{ "add",	0x08000600, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "addl",	0x08000a00, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "addo",	0x08000e00, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "addc",	0x08000700, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "addco",	0x08000f00, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sub",	0x080004e0, 0xfc0007e0, "ct?Sx,b,t", pa20, FLAG_STRICT},
-{ "sub",	0x080004c0, 0xfc0007e0, "ct?sx,b,t", pa10, FLAG_STRICT},
-{ "sub",	0x08000520, 0xfc0007e0, "cB?Sx,b,t", pa20, FLAG_STRICT},
-{ "sub",	0x08000500, 0xfc0007e0, "cb?sx,b,t", pa10, FLAG_STRICT},
-{ "sub",	0x08000420, 0xfc0007e0, "cv?Sx,b,t", pa20, FLAG_STRICT},
-{ "sub",	0x08000400, 0xfc0007e0, "cv?sx,b,t", pa10, FLAG_STRICT},
-{ "sub",	0x08000400, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subo",	0x08000c00, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subb",	0x08000500, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subbo",	0x08000d00, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subt",	0x080004c0, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subto",	0x08000cc0, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "ds",		0x08000440, 0xfc000fe0, "?sx,b,t", pa10, 0},
-{ "subi",	0x94000000, 0xfc000000, "cv?si,b,x", pa10, FLAG_STRICT},
-{ "subi",	0x94000000, 0xfc000800, "?si,b,x", pa10, 0},
-{ "subio",	0x94000800, 0xfc000800, "?si,b,x", pa10, 0},
-{ "cmpiclr",	0x90000800, 0xfc000800, "?Si,b,x", pa20, FLAG_STRICT},
-{ "cmpiclr",	0x90000000, 0xfc000800, "?si,b,x", pa10, FLAG_STRICT},
-{ "comiclr",	0x90000000, 0xfc000800, "?si,b,x", pa10, 0},
-{ "shladd",	0x08000220, 0xfc000320, "ca?Ax,.,b,t", pa20, FLAG_STRICT},
-{ "shladd",	0x08000200, 0xfc000320, "ca?ax,.,b,t", pa10, FLAG_STRICT},
-{ "sh1add",	0x08000640, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh1addl",	0x08000a40, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh1addo",	0x08000e40, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh2add",	0x08000680, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh2addl",	0x08000a80, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh2addo",	0x08000e80, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh3add",	0x080006c0, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh3addl",	0x08000ac0, 0xfc000fe0, "?ax,b,t", pa10, 0},
-{ "sh3addo",	0x08000ec0, 0xfc000fe0, "?ax,b,t", pa10, 0},
-
-/* Subword Operation Instructions.  */
-
-{ "hadd",	0x08000300, 0xfc00ff20, "cHx,b,t", pa20, FLAG_STRICT},
-{ "havg",	0x080002c0, 0xfc00ffe0, "x,b,t", pa20, FLAG_STRICT},
-{ "hshl",	0xf8008800, 0xffe0fc20, "x,*,t", pa20, FLAG_STRICT},
-{ "hshladd",	0x08000700, 0xfc00ff20, "x,.,b,t", pa20, FLAG_STRICT},
-{ "hshr",	0xf800c800, 0xfc1ff820, "cSb,*,t", pa20, FLAG_STRICT},
-{ "hshradd",	0x08000500, 0xfc00ff20, "x,.,b,t", pa20, FLAG_STRICT},
-{ "hsub",	0x08000100, 0xfc00ff20, "cHx,b,t", pa20, FLAG_STRICT},
-{ "mixh",	0xf8008400, 0xfc009fe0, "chx,b,t", pa20, FLAG_STRICT},
-{ "mixw",	0xf8008000, 0xfc009fe0, "chx,b,t", pa20, FLAG_STRICT},
-{ "permh",	0xf8000000, 0xfc009020, "c*a,t", pa20, FLAG_STRICT},
-
-
-/* Extract and Deposit Instructions.  */
-
-{ "shrpd",	0xd0000200, 0xfc001fe0, "?Xx,b,!,t", pa20, FLAG_STRICT},
-{ "shrpd",	0xd0000400, 0xfc001400, "?Xx,b,~,t", pa20, FLAG_STRICT},
-{ "shrpw",	0xd0000000, 0xfc001fe0, "?xx,b,!,t", pa10, FLAG_STRICT},
-{ "shrpw",	0xd0000800, 0xfc001c00, "?xx,b,p,t", pa10, FLAG_STRICT},
-{ "vshd",	0xd0000000, 0xfc001fe0, "?xx,b,t", pa10, 0},
-{ "shd",	0xd0000800, 0xfc001c00, "?xx,b,p,t", pa10, 0},
-{ "extrd",	0xd0001200, 0xfc001ae0, "cS?Xb,!,%,x", pa20, FLAG_STRICT},
-{ "extrd",	0xd8000000, 0xfc000000, "cS?Xb,q,|,x", pa20, FLAG_STRICT},
-{ "extrw",	0xd0001000, 0xfc001be0, "cS?xb,!,T,x", pa10, FLAG_STRICT},
-{ "extrw",	0xd0001800, 0xfc001800, "cS?xb,P,T,x", pa10, FLAG_STRICT},
-{ "vextru",	0xd0001000, 0xfc001fe0, "?xb,T,x", pa10, 0},
-{ "vextrs",	0xd0001400, 0xfc001fe0, "?xb,T,x", pa10, 0},
-{ "extru",	0xd0001800, 0xfc001c00, "?xb,P,T,x", pa10, 0},
-{ "extrs",	0xd0001c00, 0xfc001c00, "?xb,P,T,x", pa10, 0},
-{ "depd",	0xd4000200, 0xfc001ae0, "cz?Xx,!,%,b", pa20, FLAG_STRICT},
-{ "depd",	0xf0000000, 0xfc000000, "cz?Xx,~,|,b", pa20, FLAG_STRICT},
-{ "depdi",	0xd4001200, 0xfc001ae0, "cz?X5,!,%,b", pa20, FLAG_STRICT},
-{ "depdi",	0xf4000000, 0xfc000000, "cz?X5,~,|,b", pa20, FLAG_STRICT},
-{ "depw",	0xd4000000, 0xfc001be0, "cz?xx,!,T,b", pa10, FLAG_STRICT},
-{ "depw",	0xd4000800, 0xfc001800, "cz?xx,p,T,b", pa10, FLAG_STRICT},
-{ "depwi",	0xd4001000, 0xfc001be0, "cz?x5,!,T,b", pa10, FLAG_STRICT},
-{ "depwi",	0xd4001800, 0xfc001800, "cz?x5,p,T,b", pa10, FLAG_STRICT},
-{ "zvdep",	0xd4000000, 0xfc001fe0, "?xx,T,b", pa10, 0},
-{ "vdep",	0xd4000400, 0xfc001fe0, "?xx,T,b", pa10, 0},
-{ "zdep",	0xd4000800, 0xfc001c00, "?xx,p,T,b", pa10, 0},
-{ "dep",	0xd4000c00, 0xfc001c00, "?xx,p,T,b", pa10, 0},
-{ "zvdepi",	0xd4001000, 0xfc001fe0, "?x5,T,b", pa10, 0},
-{ "vdepi",	0xd4001400, 0xfc001fe0, "?x5,T,b", pa10, 0},
-{ "zdepi",	0xd4001800, 0xfc001c00, "?x5,p,T,b", pa10, 0},
-{ "depi",	0xd4001c00, 0xfc001c00, "?x5,p,T,b", pa10, 0},
-
-/* System Control Instructions.  */
-
-{ "break",	0x00000000, 0xfc001fe0, "r,A", pa10, 0},
-{ "rfi",	0x00000c00, 0xffffff1f, "cr", pa10, FLAG_STRICT},
-{ "rfi",	0x00000c00, 0xffffffff, "", pa10, 0},
-{ "rfir",	0x00000ca0, 0xffffffff, "", pa11, 0},
-{ "ssm",	0x00000d60, 0xfc00ffe0, "U,t", pa20, FLAG_STRICT},
-{ "ssm",	0x00000d60, 0xffe0ffe0, "R,t", pa10, 0},
-{ "rsm",	0x00000e60, 0xfc00ffe0, "U,t", pa20, FLAG_STRICT},
-{ "rsm",	0x00000e60, 0xffe0ffe0, "R,t", pa10, 0},
-{ "mtsm",	0x00001860, 0xffe0ffff, "x", pa10, 0},
-{ "ldsid",	0x000010a0, 0xfc1fffe0, "(b),t", pa10, 0},
-{ "ldsid",	0x000010a0, 0xfc1f3fe0, "(s,b),t", pa10, 0},
-{ "mtsp",	0x00001820, 0xffe01fff, "x,S", pa10, 0},
-{ "mtctl",	0x00001840, 0xfc00ffff, "x,^", pa10, 0},
-{ "mtsarcm",	0x016018C0, 0xffe0ffff, "x", pa20, FLAG_STRICT},
-{ "mfia",	0x000014A0, 0xffffffe0, "t", pa20, FLAG_STRICT},
-{ "mfsp",	0x000004a0, 0xffff1fe0, "S,t", pa10, 0},
-{ "mfctl",	0x016048a0, 0xffffffe0, "cW!,t", pa20, FLAG_STRICT},
-{ "mfctl",	0x000008a0, 0xfc1fffe0, "^,t", pa10, 0},
-{ "sync",	0x00000400, 0xffffffff, "", pa10, 0},
-{ "syncdma",	0x00100400, 0xffffffff, "", pa10, 0},
-{ "probe",	0x04001180, 0xfc00ffa0, "cw(b),x,t", pa10, FLAG_STRICT},
-{ "probe",	0x04001180, 0xfc003fa0, "cw(s,b),x,t", pa10, FLAG_STRICT},
-{ "probei",	0x04003180, 0xfc00ffa0, "cw(b),R,t", pa10, FLAG_STRICT},
-{ "probei",	0x04003180, 0xfc003fa0, "cw(s,b),R,t", pa10, FLAG_STRICT},
-{ "prober",	0x04001180, 0xfc00ffe0, "(b),x,t", pa10, 0},
-{ "prober",	0x04001180, 0xfc003fe0, "(s,b),x,t", pa10, 0},
-{ "proberi",	0x04003180, 0xfc00ffe0, "(b),R,t", pa10, 0},
-{ "proberi",	0x04003180, 0xfc003fe0, "(s,b),R,t", pa10, 0},
-{ "probew",	0x040011c0, 0xfc00ffe0, "(b),x,t", pa10, 0},
-{ "probew",	0x040011c0, 0xfc003fe0, "(s,b),x,t", pa10, 0},
-{ "probewi",	0x040031c0, 0xfc00ffe0, "(b),R,t", pa10, 0},
-{ "probewi",	0x040031c0, 0xfc003fe0, "(s,b),R,t", pa10, 0},
-{ "lpa",	0x04001340, 0xfc00ffc0, "cZx(b),t", pa10, 0},
-{ "lpa",	0x04001340, 0xfc003fc0, "cZx(s,b),t", pa10, 0},
-{ "lci",	0x04001300, 0xfc00ffe0, "x(b),t", pa11, 0},
-{ "lci",	0x04001300, 0xfc003fe0, "x(s,b),t", pa11, 0},
-{ "pdtlb",	0x04001600, 0xfc00ffdf, "cLcZx(b)", pa20, FLAG_STRICT},
-{ "pdtlb",	0x04001600, 0xfc003fdf, "cLcZx(s,b)", pa20, FLAG_STRICT},
-{ "pdtlb",	0x04001600, 0xfc1fffdf, "cLcZ@(b)", pa20, FLAG_STRICT},
-{ "pdtlb",	0x04001600, 0xfc1f3fdf, "cLcZ@(s,b)", pa20, FLAG_STRICT},
-{ "pdtlb",	0x04001200, 0xfc00ffdf, "cZx(b)", pa10, 0},
-{ "pdtlb",	0x04001200, 0xfc003fdf, "cZx(s,b)", pa10, 0},
-{ "pitlb",	0x04000600, 0xfc001fdf, "cLcZx(S,b)", pa20, FLAG_STRICT},
-{ "pitlb",	0x04000600, 0xfc1f1fdf, "cLcZ@(S,b)", pa20, FLAG_STRICT},
-{ "pitlb",	0x04000200, 0xfc001fdf, "cZx(S,b)", pa10, 0},
-{ "pdtlbe",	0x04001240, 0xfc00ffdf, "cZx(b)", pa10, 0},
-{ "pdtlbe",	0x04001240, 0xfc003fdf, "cZx(s,b)", pa10, 0},
-{ "pitlbe",	0x04000240, 0xfc001fdf, "cZx(S,b)", pa10, 0},
-{ "idtlba",	0x04001040, 0xfc00ffff, "x,(b)", pa10, 0},
-{ "idtlba",	0x04001040, 0xfc003fff, "x,(s,b)", pa10, 0},
-{ "iitlba",	0x04000040, 0xfc001fff, "x,(S,b)", pa10, 0},
-{ "idtlbp",	0x04001000, 0xfc00ffff, "x,(b)", pa10, 0},
-{ "idtlbp",	0x04001000, 0xfc003fff, "x,(s,b)", pa10, 0},
-{ "iitlbp",	0x04000000, 0xfc001fff, "x,(S,b)", pa10, 0},
-{ "pdc",	0x04001380, 0xfc00ffdf, "cZx(b)", pa10, 0},
-{ "pdc",	0x04001380, 0xfc003fdf, "cZx(s,b)", pa10, 0},
-{ "fdc",	0x04001280, 0xfc00ffdf, "cZx(b)", pa10, FLAG_STRICT},
-{ "fdc",	0x04001280, 0xfc003fdf, "cZx(s,b)", pa10, FLAG_STRICT},
-{ "fdc",	0x04003280, 0xfc00ffff, "5(b)", pa20, FLAG_STRICT},
-{ "fdc",	0x04003280, 0xfc003fff, "5(s,b)", pa20, FLAG_STRICT},
-{ "fdc",	0x04001280, 0xfc00ffdf, "cZx(b)", pa10, 0},
-{ "fdc",	0x04001280, 0xfc003fdf, "cZx(s,b)", pa10, 0},
-{ "fic",	0x040013c0, 0xfc00dfdf, "cZx(b)", pa20, FLAG_STRICT},
-{ "fic",	0x04000280, 0xfc001fdf, "cZx(S,b)", pa10, 0},
-{ "fdce",	0x040012c0, 0xfc00ffdf, "cZx(b)", pa10, 0},
-{ "fdce",	0x040012c0, 0xfc003fdf, "cZx(s,b)", pa10, 0},
-{ "fice",	0x040002c0, 0xfc001fdf, "cZx(S,b)", pa10, 0},
-{ "diag",	0x14000000, 0xfc000000, "D", pa10, 0},
-{ "idtlbt",	0x04001800, 0xfc00ffff, "x,b", pa20, FLAG_STRICT},
-{ "iitlbt",	0x04000800, 0xfc00ffff, "x,b", pa20, FLAG_STRICT},
-
-/* These may be specific to certain versions of the PA.  Joel claimed
-   they were 72000 (7200?) specific.  However, I'm almost certain the
-   mtcpu/mfcpu were undocumented, but available in the older 700 machines.  */
-{ "mtcpu",	0x14001600, 0xfc00ffff, "x,^", pa10, 0},
-{ "mfcpu",	0x14001A00, 0xfc00ffff, "^,x", pa10, 0},
-{ "tocen",	0x14403600, 0xffffffff, "", pa10, 0},
-{ "tocdis",	0x14401620, 0xffffffff, "", pa10, 0},
-{ "shdwgr",	0x14402600, 0xffffffff, "", pa10, 0},
-{ "grshdw",	0x14400620, 0xffffffff, "", pa10, 0},
-
-/* gfw and gfr are not in the HP PA 1.1 manual, but they are in either
-   the Timex FPU or the Mustang ERS (not sure which) manual.  */
-{ "gfw",	0x04001680, 0xfc00ffdf, "cZx(b)", pa11, 0},
-{ "gfw",	0x04001680, 0xfc003fdf, "cZx(s,b)", pa11, 0},
-{ "gfr",	0x04001a80, 0xfc00ffdf, "cZx(b)", pa11, 0},
-{ "gfr",	0x04001a80, 0xfc003fdf, "cZx(s,b)", pa11, 0},
-
-/* Floating Point Coprocessor Instructions.  */
-
-{ "fldw",	0x24000000, 0xfc00df80, "cXx(b),fT", pa10, FLAG_STRICT},
-{ "fldw",	0x24000000, 0xfc001f80, "cXx(s,b),fT", pa10, FLAG_STRICT},
-{ "fldw",	0x24000000, 0xfc00d380, "cxccx(b),fT", pa11, FLAG_STRICT},
-{ "fldw",	0x24000000, 0xfc001380, "cxccx(s,b),fT", pa11, FLAG_STRICT},
-{ "fldw",	0x24001020, 0xfc1ff3a0, "cocc@(b),fT", pa20, FLAG_STRICT},
-{ "fldw",	0x24001020, 0xfc1f33a0, "cocc@(s,b),fT", pa20, FLAG_STRICT},
-{ "fldw",	0x24001000, 0xfc00df80, "cM5(b),fT", pa10, FLAG_STRICT},
-{ "fldw",	0x24001000, 0xfc001f80, "cM5(s,b),fT", pa10, FLAG_STRICT},
-{ "fldw",	0x24001000, 0xfc00d380, "cmcc5(b),fT", pa11, FLAG_STRICT},
-{ "fldw",	0x24001000, 0xfc001380, "cmcc5(s,b),fT", pa11, FLAG_STRICT},
-{ "fldw",	0x5c000000, 0xfc000004, "y(b),fe", pa20w, FLAG_STRICT},
-{ "fldw",	0x58000000, 0xfc000000, "cJy(b),fe", pa20w, FLAG_STRICT},
-{ "fldw",	0x5c000000, 0xfc00c004, "d(b),fe", pa20, FLAG_STRICT},
-{ "fldw",	0x5c000000, 0xfc000004, "d(s,b),fe", pa20, FLAG_STRICT},
-{ "fldw",	0x58000000, 0xfc00c000, "cJd(b),fe", pa20, FLAG_STRICT},
-{ "fldw",	0x58000000, 0xfc000000, "cJd(s,b),fe", pa20, FLAG_STRICT},
-{ "fldd",	0x2c000000, 0xfc00dfc0, "cXx(b),ft", pa10, FLAG_STRICT},
-{ "fldd",	0x2c000000, 0xfc001fc0, "cXx(s,b),ft", pa10, FLAG_STRICT},
-{ "fldd",	0x2c000000, 0xfc00d3c0, "cxccx(b),ft", pa11, FLAG_STRICT},
-{ "fldd",	0x2c000000, 0xfc0013c0, "cxccx(s,b),ft", pa11, FLAG_STRICT},
-{ "fldd",	0x2c001020, 0xfc1ff3e0, "cocc@(b),ft", pa20, FLAG_STRICT},
-{ "fldd",	0x2c001020, 0xfc1f33e0, "cocc@(s,b),ft", pa20, FLAG_STRICT},
-{ "fldd",	0x2c001000, 0xfc00dfc0, "cM5(b),ft", pa10, FLAG_STRICT},
-{ "fldd",	0x2c001000, 0xfc001fc0, "cM5(s,b),ft", pa10, FLAG_STRICT},
-{ "fldd",	0x2c001000, 0xfc00d3c0, "cmcc5(b),ft", pa11, FLAG_STRICT},
-{ "fldd",	0x2c001000, 0xfc0013c0, "cmcc5(s,b),ft", pa11, FLAG_STRICT},
-{ "fldd",	0x50000002, 0xfc000002, "cq&(b),fx", pa20w, FLAG_STRICT},
-{ "fldd",	0x50000002, 0xfc00c002, "cq#(b),fx", pa20, FLAG_STRICT},
-{ "fldd",	0x50000002, 0xfc000002, "cq#(s,b),fx", pa20, FLAG_STRICT},
-{ "fstw",	0x24000200, 0xfc00df80, "cXfT,x(b)", pa10, FLAG_STRICT},
-{ "fstw",	0x24000200, 0xfc001f80, "cXfT,x(s,b)", pa10, FLAG_STRICT},
-{ "fstw",	0x24000200, 0xfc00d380, "cxcCfT,x(b)", pa11, FLAG_STRICT},
-{ "fstw",	0x24000200, 0xfc001380, "cxcCfT,x(s,b)", pa11, FLAG_STRICT},
-{ "fstw",	0x24001220, 0xfc1ff3a0, "cocCfT,@(b)", pa20, FLAG_STRICT},
-{ "fstw",	0x24001220, 0xfc1f33a0, "cocCfT,@(s,b)", pa20, FLAG_STRICT},
-{ "fstw",	0x24001200, 0xfc00df80, "cMfT,5(b)", pa10, FLAG_STRICT},
-{ "fstw",	0x24001200, 0xfc001f80, "cMfT,5(s,b)", pa10, FLAG_STRICT},
-{ "fstw",	0x24001200, 0xfc00df80, "cMfT,5(b)", pa10, FLAG_STRICT},
-{ "fstw",	0x24001200, 0xfc001f80, "cMfT,5(s,b)", pa10, FLAG_STRICT},
-{ "fstw",	0x7c000000, 0xfc000004, "fE,y(b)", pa20w, FLAG_STRICT},
-{ "fstw",	0x78000000, 0xfc000000, "cJfE,y(b)", pa20w, FLAG_STRICT},
-{ "fstw",	0x7c000000, 0xfc00c004, "fE,d(b)", pa20, FLAG_STRICT},
-{ "fstw",	0x7c000000, 0xfc000004, "fE,d(s,b)", pa20, FLAG_STRICT},
-{ "fstw",	0x78000000, 0xfc00c000, "cJfE,d(b)", pa20, FLAG_STRICT},
-{ "fstw",	0x78000000, 0xfc000000, "cJfE,d(s,b)", pa20, FLAG_STRICT},
-{ "fstd",	0x2c000200, 0xfc00dfc0, "cXft,x(b)", pa10, FLAG_STRICT},
-{ "fstd",	0x2c000200, 0xfc001fc0, "cXft,x(s,b)", pa10, FLAG_STRICT},
-{ "fstd",	0x2c000200, 0xfc00d3c0, "cxcCft,x(b)", pa11, FLAG_STRICT},
-{ "fstd",	0x2c000200, 0xfc0013c0, "cxcCft,x(s,b)", pa11, FLAG_STRICT},
-{ "fstd",	0x2c001220, 0xfc1ff3e0, "cocCft,@(b)", pa20, FLAG_STRICT},
-{ "fstd",	0x2c001220, 0xfc1f33e0, "cocCft,@(s,b)", pa20, FLAG_STRICT},
-{ "fstd",	0x2c001200, 0xfc00dfc0, "cMft,5(b)", pa10, FLAG_STRICT},
-{ "fstd",	0x2c001200, 0xfc001fc0, "cMft,5(s,b)", pa10, FLAG_STRICT},
-{ "fstd",	0x2c001200, 0xfc00d3c0, "cmcCft,5(b)", pa11, FLAG_STRICT},
-{ "fstd",	0x2c001200, 0xfc0013c0, "cmcCft,5(s,b)", pa11, FLAG_STRICT},
-{ "fstd",	0x70000002, 0xfc000002, "cqfx,&(b)", pa20w, FLAG_STRICT},
-{ "fstd",	0x70000002, 0xfc00c002, "cqfx,#(b)", pa20, FLAG_STRICT},
-{ "fstd",	0x70000002, 0xfc000002, "cqfx,#(s,b)", pa20, FLAG_STRICT},
-{ "fldwx",	0x24000000, 0xfc00df80, "cXx(b),fT", pa10, FLAG_STRICT},
-{ "fldwx",	0x24000000, 0xfc001f80, "cXx(s,b),fT", pa10, FLAG_STRICT},
-{ "fldwx",	0x24000000, 0xfc00d380, "cxccx(b),fT", pa11, FLAG_STRICT},
-{ "fldwx",	0x24000000, 0xfc001380, "cxccx(s,b),fT", pa11, FLAG_STRICT},
-{ "fldwx",	0x24000000, 0xfc00df80, "cXx(b),fT", pa10, 0},
-{ "fldwx",	0x24000000, 0xfc001f80, "cXx(s,b),fT", pa10, 0},
-{ "flddx",	0x2c000000, 0xfc00dfc0, "cXx(b),ft", pa10, FLAG_STRICT},
-{ "flddx",	0x2c000000, 0xfc001fc0, "cXx(s,b),ft", pa10, FLAG_STRICT},
-{ "flddx",	0x2c000000, 0xfc00d3c0, "cxccx(b),ft", pa11, FLAG_STRICT},
-{ "flddx",	0x2c000000, 0xfc0013c0, "cxccx(s,b),ft", pa11, FLAG_STRICT},
-{ "flddx",	0x2c000000, 0xfc00dfc0, "cXx(b),ft", pa10, 0},
-{ "flddx",	0x2c000000, 0xfc001fc0, "cXx(s,b),ft", pa10, 0},
-{ "fstwx",	0x24000200, 0xfc00df80, "cxfT,x(b)", pa10, FLAG_STRICT},
-{ "fstwx",	0x24000200, 0xfc001f80, "cxfT,x(s,b)", pa10, FLAG_STRICT},
-{ "fstwx",	0x24000200, 0xfc00d380, "cxcCfT,x(b)", pa11, FLAG_STRICT},
-{ "fstwx",	0x24000200, 0xfc001380, "cxcCfT,x(s,b)", pa11, FLAG_STRICT},
-{ "fstwx",	0x24000200, 0xfc00df80, "cxfT,x(b)", pa10, 0},
-{ "fstwx",	0x24000200, 0xfc001f80, "cxfT,x(s,b)", pa10, 0},
-{ "fstdx",	0x2c000200, 0xfc00dfc0, "cxft,x(b)", pa10, FLAG_STRICT},
-{ "fstdx",	0x2c000200, 0xfc001fc0, "cxft,x(s,b)", pa10, FLAG_STRICT},
-{ "fstdx",	0x2c000200, 0xfc00d3c0, "cxcCft,x(b)", pa11, FLAG_STRICT},
-{ "fstdx",	0x2c000200, 0xfc0013c0, "cxcCft,x(s,b)", pa11, FLAG_STRICT},
-{ "fstdx",	0x2c000200, 0xfc00dfc0, "cxft,x(b)", pa10, 0},
-{ "fstdx",	0x2c000200, 0xfc001fc0, "cxft,x(s,b)", pa10, 0},
-{ "fstqx",	0x3c000200, 0xfc00dfc0, "cxft,x(b)", pa10, 0},
-{ "fstqx",	0x3c000200, 0xfc001fc0, "cxft,x(s,b)", pa10, 0},
-{ "fldws",	0x24001000, 0xfc00df80, "cm5(b),fT", pa10, FLAG_STRICT},
-{ "fldws",	0x24001000, 0xfc001f80, "cm5(s,b),fT", pa10, FLAG_STRICT},
-{ "fldws",	0x24001000, 0xfc00d380, "cmcc5(b),fT", pa11, FLAG_STRICT},
-{ "fldws",	0x24001000, 0xfc001380, "cmcc5(s,b),fT", pa11, FLAG_STRICT},
-{ "fldws",	0x24001000, 0xfc00df80, "cm5(b),fT", pa10, 0},
-{ "fldws",	0x24001000, 0xfc001f80, "cm5(s,b),fT", pa10, 0},
-{ "fldds",	0x2c001000, 0xfc00dfc0, "cm5(b),ft", pa10, FLAG_STRICT},
-{ "fldds",	0x2c001000, 0xfc001fc0, "cm5(s,b),ft", pa10, FLAG_STRICT},
-{ "fldds",	0x2c001000, 0xfc00d3c0, "cmcc5(b),ft", pa11, FLAG_STRICT},
-{ "fldds",	0x2c001000, 0xfc0013c0, "cmcc5(s,b),ft", pa11, FLAG_STRICT},
-{ "fldds",	0x2c001000, 0xfc00dfc0, "cm5(b),ft", pa10, 0},
-{ "fldds",	0x2c001000, 0xfc001fc0, "cm5(s,b),ft", pa10, 0},
-{ "fstws",	0x24001200, 0xfc00df80, "cmfT,5(b)", pa10, FLAG_STRICT},
-{ "fstws",	0x24001200, 0xfc001f80, "cmfT,5(s,b)", pa10, FLAG_STRICT},
-{ "fstws",	0x24001200, 0xfc00d380, "cmcCfT,5(b)", pa11, FLAG_STRICT},
-{ "fstws",	0x24001200, 0xfc001380, "cmcCfT,5(s,b)", pa11, FLAG_STRICT},
-{ "fstws",	0x24001200, 0xfc00df80, "cmfT,5(b)", pa10, 0},
-{ "fstws",	0x24001200, 0xfc001f80, "cmfT,5(s,b)", pa10, 0},
-{ "fstds",	0x2c001200, 0xfc00dfc0, "cmft,5(b)", pa10, FLAG_STRICT},
-{ "fstds",	0x2c001200, 0xfc001fc0, "cmft,5(s,b)", pa10, FLAG_STRICT},
-{ "fstds",	0x2c001200, 0xfc00d3c0, "cmcCft,5(b)", pa11, FLAG_STRICT},
-{ "fstds",	0x2c001200, 0xfc0013c0, "cmcCft,5(s,b)", pa11, FLAG_STRICT},
-{ "fstds",	0x2c001200, 0xfc00dfc0, "cmft,5(b)", pa10, 0},
-{ "fstds",	0x2c001200, 0xfc001fc0, "cmft,5(s,b)", pa10, 0},
-{ "fstqs",	0x3c001200, 0xfc00dfc0, "cmft,5(b)", pa10, 0},
-{ "fstqs",	0x3c001200, 0xfc001fc0, "cmft,5(s,b)", pa10, 0},
-{ "fadd",	0x30000600, 0xfc00e7e0, "Ffa,fb,fT", pa10, 0},
-{ "fadd",	0x38000600, 0xfc00e720, "IfA,fB,fT", pa10, 0},
-{ "fsub",	0x30002600, 0xfc00e7e0, "Ffa,fb,fT", pa10, 0},
-{ "fsub",	0x38002600, 0xfc00e720, "IfA,fB,fT", pa10, 0},
-{ "fmpy",	0x30004600, 0xfc00e7e0, "Ffa,fb,fT", pa10, 0},
-{ "fmpy",	0x38004600, 0xfc00e720, "IfA,fB,fT", pa10, 0},
-{ "fdiv",	0x30006600, 0xfc00e7e0, "Ffa,fb,fT", pa10, 0},
-{ "fdiv",	0x38006600, 0xfc00e720, "IfA,fB,fT", pa10, 0},
-{ "fsqrt",	0x30008000, 0xfc1fe7e0, "Ffa,fT", pa10, 0},
-{ "fsqrt",	0x38008000, 0xfc1fe720, "FfA,fT", pa10, 0},
-{ "fabs",	0x30006000, 0xfc1fe7e0, "Ffa,fT", pa10, 0},
-{ "fabs",	0x38006000, 0xfc1fe720, "FfA,fT", pa10, 0},
-{ "frem",	0x30008600, 0xfc00e7e0, "Ffa,fb,fT", pa10, 0},
-{ "frem",	0x38008600, 0xfc00e720, "FfA,fB,fT", pa10, 0},
-{ "frnd",	0x3000a000, 0xfc1fe7e0, "Ffa,fT", pa10, 0},
-{ "frnd",	0x3800a000, 0xfc1fe720, "FfA,fT", pa10, 0},
-{ "fcpy",	0x30004000, 0xfc1fe7e0, "Ffa,fT", pa10, 0},
-{ "fcpy",	0x38004000, 0xfc1fe720, "FfA,fT", pa10, 0},
-{ "fcnvff",	0x30000200, 0xfc1f87e0, "FGfa,fT", pa10, 0},
-{ "fcnvff",	0x38000200, 0xfc1f8720, "FGfA,fT", pa10, 0},
-{ "fcnvxf",	0x30008200, 0xfc1f87e0, "FGfa,fT", pa10, 0},
-{ "fcnvxf",	0x38008200, 0xfc1f8720, "FGfA,fT", pa10, 0},
-{ "fcnvfx",	0x30010200, 0xfc1f87e0, "FGfa,fT", pa10, 0},
-{ "fcnvfx",	0x38010200, 0xfc1f8720, "FGfA,fT", pa10, 0},
-{ "fcnvfxt",	0x30018200, 0xfc1f87e0, "FGfa,fT", pa10, 0},
-{ "fcnvfxt",	0x38018200, 0xfc1f8720, "FGfA,fT", pa10, 0},
-{ "fmpyfadd",	0xb8000000, 0xfc000020, "IfA,fB,fC,fT", pa20, FLAG_STRICT},
-{ "fmpynfadd",	0xb8000020, 0xfc000020, "IfA,fB,fC,fT", pa20, FLAG_STRICT},
-{ "fneg",	0x3000c000, 0xfc1fe7e0, "Ffa,fT", pa20, FLAG_STRICT},
-{ "fneg",	0x3800c000, 0xfc1fe720, "IfA,fT", pa20, FLAG_STRICT},
-{ "fnegabs",	0x3000e000, 0xfc1fe7e0, "Ffa,fT", pa20, FLAG_STRICT},
-{ "fnegabs",	0x3800e000, 0xfc1fe720, "IfA,fT", pa20, FLAG_STRICT},
-{ "fcnv",	0x30000200, 0xfc1c0720, "{_fa,fT", pa20, FLAG_STRICT},
-{ "fcnv",	0x38000200, 0xfc1c0720, "FGfA,fT", pa20, FLAG_STRICT},
-{ "fcmp",	0x30000400, 0xfc00e7e0, "F?ffa,fb", pa10, FLAG_STRICT},
-{ "fcmp",	0x38000400, 0xfc00e720, "I?ffA,fB", pa10, FLAG_STRICT},
-{ "fcmp",	0x30000400, 0xfc0007e0, "F?ffa,fb,h", pa20, FLAG_STRICT},
-{ "fcmp",	0x38000400, 0xfc000720, "I?ffA,fB,h", pa20, FLAG_STRICT},
-{ "fcmp",	0x30000400, 0xfc00e7e0, "F?ffa,fb", pa10, 0},
-{ "fcmp",	0x38000400, 0xfc00e720, "I?ffA,fB", pa10, 0},
-{ "xmpyu",	0x38004700, 0xfc00e720, "fX,fB,fT", pa11, 0},
-{ "fmpyadd",	0x18000000, 0xfc000000, "Hfi,fj,fk,fl,fm", pa11, 0},
-{ "fmpysub",	0x98000000, 0xfc000000, "Hfi,fj,fk,fl,fm", pa11, 0},
-{ "ftest",	0x30002420, 0xffffffff, "", pa10, FLAG_STRICT},
-{ "ftest",	0x30002420, 0xffffffe0, ",=", pa20, FLAG_STRICT},
-{ "ftest",	0x30000420, 0xffff1fff, "m", pa20, FLAG_STRICT},
-{ "fid",	0x30000000, 0xffffffff, "", pa11, 0},
-
-/* Performance Monitor Instructions.  */
-
-{ "pmdis",	0x30000280, 0xffffffdf, "N", pa20, FLAG_STRICT},
-{ "pmenb",	0x30000680, 0xffffffff, "", pa20, FLAG_STRICT},
-
-/* Assist Instructions.  */
-
-{ "spop0",	0x10000000, 0xfc000600, "v,ON", pa10, 0},
-{ "spop1",	0x10000200, 0xfc000600, "v,oNt", pa10, 0},
-{ "spop2",	0x10000400, 0xfc000600, "v,1Nb", pa10, 0},
-{ "spop3",	0x10000600, 0xfc000600, "v,0Nx,b", pa10, 0},
-{ "copr",	0x30000000, 0xfc000000, "u,2N", pa10, 0},
-{ "cldw",	0x24000000, 0xfc00de00, "ucXx(b),t", pa10, FLAG_STRICT},
-{ "cldw",	0x24000000, 0xfc001e00, "ucXx(s,b),t", pa10, FLAG_STRICT},
-{ "cldw",	0x24000000, 0xfc00d200, "ucxccx(b),t", pa11, FLAG_STRICT},
-{ "cldw",	0x24000000, 0xfc001200, "ucxccx(s,b),t", pa11, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc00d200, "ucocc@(b),t", pa20, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc001200, "ucocc@(s,b),t", pa20, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc00de00, "ucM5(b),t", pa10, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc001e00, "ucM5(s,b),t", pa10, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc00d200, "ucmcc5(b),t", pa11, FLAG_STRICT},
-{ "cldw",	0x24001000, 0xfc001200, "ucmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "cldd",	0x2c000000, 0xfc00de00, "ucXx(b),t", pa10, FLAG_STRICT},
-{ "cldd",	0x2c000000, 0xfc001e00, "ucXx(s,b),t", pa10, FLAG_STRICT},
-{ "cldd",	0x2c000000, 0xfc00d200, "ucxccx(b),t", pa11, FLAG_STRICT},
-{ "cldd",	0x2c000000, 0xfc001200, "ucxccx(s,b),t", pa11, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc00d200, "ucocc@(b),t", pa20, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc001200, "ucocc@(s,b),t", pa20, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc00de00, "ucM5(b),t", pa10, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc001e00, "ucM5(s,b),t", pa10, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc00d200, "ucmcc5(b),t", pa11, FLAG_STRICT},
-{ "cldd",	0x2c001000, 0xfc001200, "ucmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "cstw",	0x24000200, 0xfc00de00, "ucXt,x(b)", pa10, FLAG_STRICT},
-{ "cstw",	0x24000200, 0xfc001e00, "ucXt,x(s,b)", pa10, FLAG_STRICT},
-{ "cstw",	0x24000200, 0xfc00d200, "ucxcCt,x(b)", pa11, FLAG_STRICT},
-{ "cstw",	0x24000200, 0xfc001200, "ucxcCt,x(s,b)", pa11, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc00d200, "ucocCt,@(b)", pa20, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc001200, "ucocCt,@(s,b)", pa20, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc00de00, "ucMt,5(b)", pa10, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc001e00, "ucMt,5(s,b)", pa10, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc00d200, "ucmcCt,5(b)", pa11, FLAG_STRICT},
-{ "cstw",	0x24001200, 0xfc001200, "ucmcCt,5(s,b)", pa11, FLAG_STRICT},
-{ "cstd",	0x2c000200, 0xfc00de00, "ucXt,x(b)", pa10, FLAG_STRICT},
-{ "cstd",	0x2c000200, 0xfc001e00, "ucXt,x(s,b)", pa10, FLAG_STRICT},
-{ "cstd",	0x2c000200, 0xfc00d200, "ucxcCt,x(b)", pa11, FLAG_STRICT},
-{ "cstd",	0x2c000200, 0xfc001200, "ucxcCt,x(s,b)", pa11, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc00d200, "ucocCt,@(b)", pa20, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc001200, "ucocCt,@(s,b)", pa20, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc00de00, "ucMt,5(b)", pa10, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc001e00, "ucMt,5(s,b)", pa10, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc00d200, "ucmcCt,5(b)", pa11, FLAG_STRICT},
-{ "cstd",	0x2c001200, 0xfc001200, "ucmcCt,5(s,b)", pa11, FLAG_STRICT},
-{ "cldwx",	0x24000000, 0xfc00de00, "ucXx(b),t", pa10, FLAG_STRICT},
-{ "cldwx",	0x24000000, 0xfc001e00, "ucXx(s,b),t", pa10, FLAG_STRICT},
-{ "cldwx",	0x24000000, 0xfc00d200, "ucxccx(b),t", pa11, FLAG_STRICT},
-{ "cldwx",	0x24000000, 0xfc001200, "ucxccx(s,b),t", pa11, FLAG_STRICT},
-{ "cldwx",	0x24000000, 0xfc00de00, "ucXx(b),t", pa10, 0},
-{ "cldwx",	0x24000000, 0xfc001e00, "ucXx(s,b),t", pa10, 0},
-{ "clddx",	0x2c000000, 0xfc00de00, "ucXx(b),t", pa10, FLAG_STRICT},
-{ "clddx",	0x2c000000, 0xfc001e00, "ucXx(s,b),t", pa10, FLAG_STRICT},
-{ "clddx",	0x2c000000, 0xfc00d200, "ucxccx(b),t", pa11, FLAG_STRICT},
-{ "clddx",	0x2c000000, 0xfc001200, "ucxccx(s,b),t", pa11, FLAG_STRICT},
-{ "clddx",	0x2c000000, 0xfc00de00, "ucXx(b),t", pa10, 0},
-{ "clddx",	0x2c000000, 0xfc001e00, "ucXx(s,b),t", pa10, 0},
-{ "cstwx",	0x24000200, 0xfc00de00, "ucXt,x(b)", pa10, FLAG_STRICT},
-{ "cstwx",	0x24000200, 0xfc001e00, "ucXt,x(s,b)", pa10, FLAG_STRICT},
-{ "cstwx",	0x24000200, 0xfc00d200, "ucxcCt,x(b)", pa11, FLAG_STRICT},
-{ "cstwx",	0x24000200, 0xfc001200, "ucxcCt,x(s,b)", pa11, FLAG_STRICT},
-{ "cstwx",	0x24000200, 0xfc00de00, "ucXt,x(b)", pa10, 0},
-{ "cstwx",	0x24000200, 0xfc001e00, "ucXt,x(s,b)", pa10, 0},
-{ "cstdx",	0x2c000200, 0xfc00de00, "ucXt,x(b)", pa10, FLAG_STRICT},
-{ "cstdx",	0x2c000200, 0xfc001e00, "ucXt,x(s,b)", pa10, FLAG_STRICT},
-{ "cstdx",	0x2c000200, 0xfc00d200, "ucxcCt,x(b)", pa11, FLAG_STRICT},
-{ "cstdx",	0x2c000200, 0xfc001200, "ucxcCt,x(s,b)", pa11, FLAG_STRICT},
-{ "cstdx",	0x2c000200, 0xfc00de00, "ucXt,x(b)", pa10, 0},
-{ "cstdx",	0x2c000200, 0xfc001e00, "ucXt,x(s,b)", pa10, 0},
-{ "cldws",	0x24001000, 0xfc00de00, "ucM5(b),t", pa10, FLAG_STRICT},
-{ "cldws",	0x24001000, 0xfc001e00, "ucM5(s,b),t", pa10, FLAG_STRICT},
-{ "cldws",	0x24001000, 0xfc00d200, "ucmcc5(b),t", pa11, FLAG_STRICT},
-{ "cldws",	0x24001000, 0xfc001200, "ucmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "cldws",	0x24001000, 0xfc00de00, "ucM5(b),t", pa10, 0},
-{ "cldws",	0x24001000, 0xfc001e00, "ucM5(s,b),t", pa10, 0},
-{ "cldds",	0x2c001000, 0xfc00de00, "ucM5(b),t", pa10, FLAG_STRICT},
-{ "cldds",	0x2c001000, 0xfc001e00, "ucM5(s,b),t", pa10, FLAG_STRICT},
-{ "cldds",	0x2c001000, 0xfc00d200, "ucmcc5(b),t", pa11, FLAG_STRICT},
-{ "cldds",	0x2c001000, 0xfc001200, "ucmcc5(s,b),t", pa11, FLAG_STRICT},
-{ "cldds",	0x2c001000, 0xfc00de00, "ucM5(b),t", pa10, 0},
-{ "cldds",	0x2c001000, 0xfc001e00, "ucM5(s,b),t", pa10, 0},
-{ "cstws",	0x24001200, 0xfc00de00, "ucMt,5(b)", pa10, FLAG_STRICT},
-{ "cstws",	0x24001200, 0xfc001e00, "ucMt,5(s,b)", pa10, FLAG_STRICT},
-{ "cstws",	0x24001200, 0xfc00d200, "ucmcCt,5(b)", pa11, FLAG_STRICT},
-{ "cstws",	0x24001200, 0xfc001200, "ucmcCt,5(s,b)", pa11, FLAG_STRICT},
-{ "cstws",	0x24001200, 0xfc00de00, "ucMt,5(b)", pa10, 0},
-{ "cstws",	0x24001200, 0xfc001e00, "ucMt,5(s,b)", pa10, 0},
-{ "cstds",	0x2c001200, 0xfc00de00, "ucMt,5(b)", pa10, FLAG_STRICT},
-{ "cstds",	0x2c001200, 0xfc001e00, "ucMt,5(s,b)", pa10, FLAG_STRICT},
-{ "cstds",	0x2c001200, 0xfc00d200, "ucmcCt,5(b)", pa11, FLAG_STRICT},
-{ "cstds",	0x2c001200, 0xfc001200, "ucmcCt,5(s,b)", pa11, FLAG_STRICT},
-{ "cstds",	0x2c001200, 0xfc00de00, "ucMt,5(b)", pa10, 0},
-{ "cstds",	0x2c001200, 0xfc001e00, "ucMt,5(s,b)", pa10, 0},
-
-/* More pseudo instructions which must follow the main table.  */
-{ "call",	0xe800f000, 0xfc1ffffd, "n(b)", pa20, FLAG_STRICT},
-{ "call",	0xe800a000, 0xffe0e000, "nW", pa10, FLAG_STRICT},
-{ "ret",	0xe840d000, 0xfffffffd, "n", pa20, FLAG_STRICT},
-
-};
-
-#define NUMOPCODES ((sizeof pa_opcodes)/(sizeof pa_opcodes[0]))
-
-/* SKV 12/18/92. Added some denotations for various operands.  */
-
-#define PA_IMM11_AT_31 'i'
-#define PA_IMM14_AT_31 'j'
-#define PA_IMM21_AT_31 'k'
-#define PA_DISP12 'w'
-#define PA_DISP17 'W'
-
-#define N_HPPA_OPERAND_FORMATS 5
-
-/* Integer register names, indexed by the numbers which appear in the
-   opcodes.  */
-static const char *const reg_names[] =
-{
-  "flags", "r1", "rp", "r3", "r4", "r5", "r6", "r7", "r8", "r9",
-  "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
-  "r20", "r21", "r22", "r23", "r24", "r25", "r26", "dp", "ret0", "ret1",
-  "sp", "r31"
-};
-
-/* Floating point register names, indexed by the numbers which appear in the
-   opcodes.  */
-static const char *const fp_reg_names[] =
-{
-  "fpsr", "fpe2", "fpe4", "fpe6",
-  "fr4", "fr5", "fr6", "fr7", "fr8",
-  "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
-  "fr16", "fr17", "fr18", "fr19", "fr20", "fr21", "fr22", "fr23",
-  "fr24", "fr25", "fr26", "fr27", "fr28", "fr29", "fr30", "fr31"
-};
-
-typedef unsigned int CORE_ADDR;
-
-/* Get at various relevant fields of an instruction word.  */
-
-#define MASK_5  0x1f
-#define MASK_10 0x3ff
-#define MASK_11 0x7ff
-#define MASK_14 0x3fff
-#define MASK_16 0xffff
-#define MASK_21 0x1fffff
-
-/* These macros get bit fields using HP's numbering (MSB = 0).  */
-
-#define GET_FIELD(X, FROM, TO) \
-  ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
-
-#define GET_BIT(X, WHICH) \
-  GET_FIELD (X, WHICH, WHICH)
-
-/* Some of these have been converted to 2-d arrays because they
-   consume less storage this way.  If the maintenance becomes a
-   problem, convert them back to const 1-d pointer arrays.  */
-static const char *const control_reg[] =
-{
-  "rctr", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",
-  "pidr1", "pidr2", "ccr", "sar", "pidr3", "pidr4",
-  "iva", "eiem", "itmr", "pcsq", "pcoq", "iir", "isr",
-  "ior", "ipsw", "eirr", "tr0", "tr1", "tr2", "tr3",
-  "tr4", "tr5", "tr6", "tr7"
-};
-
-static const char *const compare_cond_names[] =
-{
-  "", ",=", ",<", ",<=", ",<<", ",<<=", ",sv", ",od",
-  ",tr", ",<>", ",>=", ",>", ",>>=", ",>>", ",nsv", ",ev"
-};
-static const char *const compare_cond_64_names[] =
-{
-  "", ",*=", ",*<", ",*<=", ",*<<", ",*<<=", ",*sv", ",*od",
-  ",*tr", ",*<>", ",*>=", ",*>", ",*>>=", ",*>>", ",*nsv", ",*ev"
-};
-static const char *const cmpib_cond_64_names[] =
-{
-  ",*<<", ",*=", ",*<", ",*<=", ",*>>=", ",*<>", ",*>=", ",*>"
-};
-static const char *const add_cond_names[] =
-{
-  "", ",=", ",<", ",<=", ",nuv", ",znv", ",sv", ",od",
-  ",tr", ",<>", ",>=", ",>", ",uv", ",vnz", ",nsv", ",ev"
-};
-static const char *const add_cond_64_names[] =
-{
-  "", ",*=", ",*<", ",*<=", ",*nuv", ",*znv", ",*sv", ",*od",
-  ",*tr", ",*<>", ",*>=", ",*>", ",*uv", ",*vnz", ",*nsv", ",*ev"
-};
-static const char *const wide_add_cond_names[] =
-{
-  "", ",=", ",<", ",<=", ",nuv", ",*=", ",*<", ",*<=",
-  ",tr", ",<>", ",>=", ",>", ",uv", ",*<>", ",*>=", ",*>"
-};
-static const char *const logical_cond_names[] =
-{
-  "", ",=", ",<", ",<=", 0, 0, 0, ",od",
-  ",tr", ",<>", ",>=", ",>", 0, 0, 0, ",ev"};
-static const char *const logical_cond_64_names[] =
-{
-  "", ",*=", ",*<", ",*<=", 0, 0, 0, ",*od",
-  ",*tr", ",*<>", ",*>=", ",*>", 0, 0, 0, ",*ev"};
-static const char *const unit_cond_names[] =
-{
-  "", ",swz", ",sbz", ",shz", ",sdc", ",swc", ",sbc", ",shc",
-  ",tr", ",nwz", ",nbz", ",nhz", ",ndc", ",nwc", ",nbc", ",nhc"
-};
-static const char *const unit_cond_64_names[] =
-{
-  "", ",*swz", ",*sbz", ",*shz", ",*sdc", ",*swc", ",*sbc", ",*shc",
-  ",*tr", ",*nwz", ",*nbz", ",*nhz", ",*ndc", ",*nwc", ",*nbc", ",*nhc"
-};
-static const char *const shift_cond_names[] =
-{
-  "", ",=", ",<", ",od", ",tr", ",<>", ",>=", ",ev"
-};
-static const char *const shift_cond_64_names[] =
-{
-  "", ",*=", ",*<", ",*od", ",*tr", ",*<>", ",*>=", ",*ev"
-};
-static const char *const bb_cond_64_names[] =
-{
-  ",*<", ",*>="
-};
-static const char *const index_compl_names[] = {"", ",m", ",s", ",sm"};
-static const char *const short_ldst_compl_names[] = {"", ",ma", "", ",mb"};
-static const char *const short_bytes_compl_names[] =
-{
-  "", ",b,m", ",e", ",e,m"
-};
-static const char *const float_format_names[] = {",sgl", ",dbl", "", ",quad"};
-static const char *const fcnv_fixed_names[] = {",w", ",dw", "", ",qw"};
-static const char *const fcnv_ufixed_names[] = {",uw", ",udw", "", ",uqw"};
-static const char *const float_comp_names[] =
-{
-  ",false?", ",false", ",?", ",!<=>", ",=", ",=t", ",?=", ",!<>",
-  ",!?>=", ",<", ",?<", ",!>=", ",!?>", ",<=", ",?<=", ",!>",
-  ",!?<=", ",>", ",?>", ",!<=", ",!?<", ",>=", ",?>=", ",!<",
-  ",!?=", ",<>", ",!=", ",!=t", ",!?", ",<=>", ",true?", ",true"
-};
-static const char *const signed_unsigned_names[] = {",u", ",s"};
-static const char *const mix_half_names[] = {",l", ",r"};
-static const char *const saturation_names[] = {",us", ",ss", 0, ""};
-static const char *const read_write_names[] = {",r", ",w"};
-static const char *const add_compl_names[] = { 0, "", ",l", ",tsv" };
-
-/* For a bunch of different instructions form an index into a
-   completer name table.  */
-#define GET_COMPL(insn) (GET_FIELD (insn, 26, 26) | \
-			 GET_FIELD (insn, 18, 18) << 1)
-
-#define GET_COND(insn) (GET_FIELD ((insn), 16, 18) + \
-			(GET_FIELD ((insn), 19, 19) ? 8 : 0))
-
-/* Utility function to print registers.  Put these first, so gcc's function
-   inlining can do its stuff.  */
-
-#define fputs_filtered(STR,F)	(*info->fprintf_func) (info->stream, "%s", STR)
-
-static void
-fput_reg (unsigned reg, disassemble_info *info)
-{
-  (*info->fprintf_func) (info->stream, "%s", reg ? reg_names[reg] : "r0");
-}
-
-static void
-fput_fp_reg (unsigned reg, disassemble_info *info)
-{
-  (*info->fprintf_func) (info->stream, "%s", reg ? fp_reg_names[reg] : "fr0");
-}
-
-static void
-fput_fp_reg_r (unsigned reg, disassemble_info *info)
-{
-  /* Special case floating point exception registers.  */
-  if (reg < 4)
-    (*info->fprintf_func) (info->stream, "fpe%d", reg * 2 + 1);
-  else
-    (*info->fprintf_func) (info->stream, "%sR",
-			   reg ? fp_reg_names[reg] : "fr0");
-}
-
-static void
-fput_creg (unsigned reg, disassemble_info *info)
-{
-  (*info->fprintf_func) (info->stream, "%s", control_reg[reg]);
-}
-
-/* Print constants with sign.  */
-
-static void
-fput_const (unsigned num, disassemble_info *info)
-{
-  if ((int) num < 0)
-    (*info->fprintf_func) (info->stream, "-%x", - (int) num);
-  else
-    (*info->fprintf_func) (info->stream, "%x", num);
-}
-
-/* Routines to extract various sized constants out of hppa
-   instructions.  */
-
-/* Extract a 3-bit space register number from a be, ble, mtsp or mfsp.  */
-static int
-extract_3 (unsigned word)
-{
-  return GET_FIELD (word, 18, 18) << 2 | GET_FIELD (word, 16, 17);
-}
-
-static int
-extract_5_load (unsigned word)
-{
-  return low_sign_extend (word >> 16 & MASK_5, 5);
-}
-
-/* Extract the immediate field from a st{bhw}s instruction.  */
-
-static int
-extract_5_store (unsigned word)
-{
-  return low_sign_extend (word & MASK_5, 5);
-}
-
-/* Extract the immediate field from a break instruction.  */
-
-static unsigned
-extract_5r_store (unsigned word)
-{
-  return (word & MASK_5);
-}
-
-/* Extract the immediate field from a {sr}sm instruction.  */
-
-static unsigned
-extract_5R_store (unsigned word)
-{
-  return (word >> 16 & MASK_5);
-}
-
-/* Extract the 10 bit immediate field from a {sr}sm instruction.  */
-
-static unsigned
-extract_10U_store (unsigned word)
-{
-  return (word >> 16 & MASK_10);
-}
-
-/* Extract the immediate field from a bb instruction.  */
-
-static unsigned
-extract_5Q_store (unsigned word)
-{
-  return (word >> 21 & MASK_5);
-}
-
-/* Extract an 11 bit immediate field.  */
-
-static int
-extract_11 (unsigned word)
-{
-  return low_sign_extend (word & MASK_11, 11);
-}
-
-/* Extract a 14 bit immediate field.  */
-
-static int
-extract_14 (unsigned word)
-{
-  return low_sign_extend (word & MASK_14, 14);
-}
-
-/* Extract a 16 bit immediate field (PA2.0 wide only).  */
-
-static int
-extract_16 (unsigned word)
-{
-  int m15, m0, m1;
-
-  m0 = GET_BIT (word, 16);
-  m1 = GET_BIT (word, 17);
-  m15 = GET_BIT (word, 31);
-  word = (word >> 1) & 0x1fff;
-  word = word | (m15 << 15) | ((m15 ^ m0) << 14) | ((m15 ^ m1) << 13);
-  return sign_extend (word, 16);
-}
-
-/* Extract a 21 bit constant.  */
-
-static int
-extract_21 (unsigned word)
-{
-  int val;
-
-  word &= MASK_21;
-  word <<= 11;
-  val = GET_FIELD (word, 20, 20);
-  val <<= 11;
-  val |= GET_FIELD (word, 9, 19);
-  val <<= 2;
-  val |= GET_FIELD (word, 5, 6);
-  val <<= 5;
-  val |= GET_FIELD (word, 0, 4);
-  val <<= 2;
-  val |= GET_FIELD (word, 7, 8);
-  return sign_extend (val, 21) << 11;
-}
-
-/* Extract a 12 bit constant from branch instructions.  */
-
-static int
-extract_12 (unsigned word)
-{
-  return sign_extend (GET_FIELD (word, 19, 28)
-		      | GET_FIELD (word, 29, 29) << 10
-		      | (word & 0x1) << 11, 12) << 2;
-}
-
-/* Extract a 17 bit constant from branch instructions, returning the
-   19 bit signed value.  */
-
-static int
-extract_17 (unsigned word)
-{
-  return sign_extend (GET_FIELD (word, 19, 28)
-		      | GET_FIELD (word, 29, 29) << 10
-		      | GET_FIELD (word, 11, 15) << 11
-		      | (word & 0x1) << 16, 17) << 2;
-}
-
-static int
-extract_22 (unsigned word)
-{
-  return sign_extend (GET_FIELD (word, 19, 28)
-		      | GET_FIELD (word, 29, 29) << 10
-		      | GET_FIELD (word, 11, 15) << 11
-		      | GET_FIELD (word, 6, 10) << 16
-		      | (word & 0x1) << 21, 22) << 2;
-}
-
-/* Print one instruction.  */
-
-int
-print_insn_hppa (bfd_vma memaddr, disassemble_info *info)
-{
-  bfd_byte buffer[4];
-  unsigned int insn, i;
-
-  {
-    int status =
-      (*info->read_memory_func) (memaddr, buffer, sizeof (buffer), info);
-    if (status != 0)
-      {
-	(*info->memory_error_func) (status, memaddr, info);
-	return -1;
-      }
-  }
-
-  insn = bfd_getb32 (buffer);
-
-  for (i = 0; i < NUMOPCODES; ++i)
-    {
-      const struct pa_opcode *opcode = &pa_opcodes[i];
-
-      if ((insn & opcode->mask) == opcode->match)
-	{
-	  const char *s;
-#ifndef BFD64
-	  if (opcode->arch == pa20w)
-	    continue;
-#endif
-	  (*info->fprintf_func) (info->stream, "%s", opcode->name);
-
-	  if (!strchr ("cfCY?-+nHNZFIuv{", opcode->args[0]))
-	    (*info->fprintf_func) (info->stream, " ");
-	  for (s = opcode->args; *s != '\0'; ++s)
-	    {
-	      switch (*s)
-		{
-		case 'x':
-		  fput_reg (GET_FIELD (insn, 11, 15), info);
-		  break;
-		case 'a':
-		case 'b':
-		  fput_reg (GET_FIELD (insn, 6, 10), info);
-		  break;
-		case '^':
-		  fput_creg (GET_FIELD (insn, 6, 10), info);
-		  break;
-		case 't':
-		  fput_reg (GET_FIELD (insn, 27, 31), info);
-		  break;
-
-		  /* Handle floating point registers.  */
-		case 'f':
-		  switch (*++s)
-		    {
-		    case 't':
-		      fput_fp_reg (GET_FIELD (insn, 27, 31), info);
-		      break;
-		    case 'T':
-		      if (GET_FIELD (insn, 25, 25))
-			fput_fp_reg_r (GET_FIELD (insn, 27, 31), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 27, 31), info);
-		      break;
-		    case 'a':
-		      if (GET_FIELD (insn, 25, 25))
-			fput_fp_reg_r (GET_FIELD (insn, 6, 10), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 6, 10), info);
-		      break;
-
-		      /* 'fA' will not generate a space before the regsiter
-			 name.  Normally that is fine.  Except that it
-			 causes problems with xmpyu which has no FP format
-			 completer.  */
-		    case 'X':
-		      fputs_filtered (" ", info);
-		      /* FALLTHRU */
-
-		    case 'A':
-		      if (GET_FIELD (insn, 24, 24))
-			fput_fp_reg_r (GET_FIELD (insn, 6, 10), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 6, 10), info);
-		      break;
-		    case 'b':
-		      if (GET_FIELD (insn, 25, 25))
-			fput_fp_reg_r (GET_FIELD (insn, 11, 15), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 11, 15), info);
-		      break;
-		    case 'B':
-		      if (GET_FIELD (insn, 19, 19))
-			fput_fp_reg_r (GET_FIELD (insn, 11, 15), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 11, 15), info);
-		      break;
-		    case 'C':
-		      {
-			int reg = GET_FIELD (insn, 21, 22);
-			reg |= GET_FIELD (insn, 16, 18) << 2;
-			if (GET_FIELD (insn, 23, 23) != 0)
-			  fput_fp_reg_r (reg, info);
-			else
-			  fput_fp_reg (reg, info);
-			break;
-		      }
-		    case 'i':
-		      {
-			int reg = GET_FIELD (insn, 6, 10);
-
-			reg |= (GET_FIELD (insn, 26, 26) << 4);
-			fput_fp_reg (reg, info);
-			break;
-		      }
-		    case 'j':
-		      {
-			int reg = GET_FIELD (insn, 11, 15);
-
-			reg |= (GET_FIELD (insn, 26, 26) << 4);
-			fput_fp_reg (reg, info);
-			break;
-		      }
-		    case 'k':
-		      {
-			int reg = GET_FIELD (insn, 27, 31);
-
-			reg |= (GET_FIELD (insn, 26, 26) << 4);
-			fput_fp_reg (reg, info);
-			break;
-		      }
-		    case 'l':
-		      {
-			int reg = GET_FIELD (insn, 21, 25);
-
-			reg |= (GET_FIELD (insn, 26, 26) << 4);
-			fput_fp_reg (reg, info);
-			break;
-		      }
-		    case 'm':
-		      {
-			int reg = GET_FIELD (insn, 16, 20);
-
-			reg |= (GET_FIELD (insn, 26, 26) << 4);
-			fput_fp_reg (reg, info);
-			break;
-		      }
-
-		      /* 'fe' will not generate a space before the register
-			 name.  Normally that is fine.  Except that it
-			 causes problems with fstw fe,y(b) which has no FP
-			 format completer.  */
-		    case 'E':
-		      fputs_filtered (" ", info);
-		      /* FALLTHRU */
-
-		    case 'e':
-		      if (GET_FIELD (insn, 30, 30))
-			fput_fp_reg_r (GET_FIELD (insn, 11, 15), info);
-		      else
-			fput_fp_reg (GET_FIELD (insn, 11, 15), info);
-		      break;
-		    case 'x':
-		      fput_fp_reg (GET_FIELD (insn, 11, 15), info);
-		      break;
-		    }
-		  break;
-
-		case '5':
-		  fput_const (extract_5_load (insn), info);
-		  break;
-		case 's':
-		  {
-		    int space = GET_FIELD (insn, 16, 17);
-		    /* Zero means implicit addressing, not use of sr0.  */
-		    if (space != 0)
-		      (*info->fprintf_func) (info->stream, "sr%d", space);
-		  }
-		  break;
-
-		case 'S':
-		  (*info->fprintf_func) (info->stream, "sr%d",
-					 extract_3 (insn));
-		  break;
-
-		  /* Handle completers.  */
-		case 'c':
-		  switch (*++s)
-		    {
-		    case 'x':
-		      (*info->fprintf_func)
-			(info->stream, "%s",
-			 index_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 'X':
-		      (*info->fprintf_func)
-			(info->stream, "%s ",
-			 index_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 'm':
-		      (*info->fprintf_func)
-			(info->stream, "%s",
-			 short_ldst_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 'M':
-		      (*info->fprintf_func)
-			(info->stream, "%s ",
-			 short_ldst_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 'A':
-		      (*info->fprintf_func)
-			(info->stream, "%s ",
-			 short_bytes_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 's':
-		      (*info->fprintf_func)
-			(info->stream, "%s",
-			 short_bytes_compl_names[GET_COMPL (insn)]);
-		      break;
-		    case 'c':
-		    case 'C':
-		      switch (GET_FIELD (insn, 20, 21))
-			{
-			case 1:
-			  (*info->fprintf_func) (info->stream, ",bc ");
-			  break;
-			case 2:
-			  (*info->fprintf_func) (info->stream, ",sl ");
-			  break;
-			default:
-			  (*info->fprintf_func) (info->stream, " ");
-			}
-		      break;
-		    case 'd':
-		      switch (GET_FIELD (insn, 20, 21))
-			{
-			case 1:
-			  (*info->fprintf_func) (info->stream, ",co ");
-			  break;
-			default:
-			  (*info->fprintf_func) (info->stream, " ");
-			}
-		      break;
-		    case 'o':
-		      (*info->fprintf_func) (info->stream, ",o");
-		      break;
-		    case 'g':
-		      (*info->fprintf_func) (info->stream, ",gate");
-		      break;
-		    case 'p':
-		      (*info->fprintf_func) (info->stream, ",l,push");
-		      break;
-		    case 'P':
-		      (*info->fprintf_func) (info->stream, ",pop");
-		      break;
-		    case 'l':
-		    case 'L':
-		      (*info->fprintf_func) (info->stream, ",l");
-		      break;
-		    case 'w':
-		      (*info->fprintf_func)
-			(info->stream, "%s ",
-			 read_write_names[GET_FIELD (insn, 25, 25)]);
-		      break;
-		    case 'W':
-		      (*info->fprintf_func) (info->stream, ",w ");
-		      break;
-		    case 'r':
-		      if (GET_FIELD (insn, 23, 26) == 5)
-			(*info->fprintf_func) (info->stream, ",r");
-		      break;
-		    case 'Z':
-		      if (GET_FIELD (insn, 26, 26))
-			(*info->fprintf_func) (info->stream, ",m ");
-		      else
-			(*info->fprintf_func) (info->stream, " ");
-		      break;
-		    case 'i':
-		      if (GET_FIELD (insn, 25, 25))
-			(*info->fprintf_func) (info->stream, ",i");
-		      break;
-		    case 'z':
-		      if (!GET_FIELD (insn, 21, 21))
-			(*info->fprintf_func) (info->stream, ",z");
-		      break;
-		    case 'a':
-		      (*info->fprintf_func)
-			(info->stream, "%s",
-			 add_compl_names[GET_FIELD (insn, 20, 21)]);
-		      break;
-		    case 'Y':
-		      (*info->fprintf_func)
-			(info->stream, ",dc%s",
-			 add_compl_names[GET_FIELD (insn, 20, 21)]);
-		      break;
-		    case 'y':
-		      (*info->fprintf_func)
-			(info->stream, ",c%s",
-			 add_compl_names[GET_FIELD (insn, 20, 21)]);
-		      break;
-		    case 'v':
-		      if (GET_FIELD (insn, 20, 20))
-			(*info->fprintf_func) (info->stream, ",tsv");
-		      break;
-		    case 't':
-		      (*info->fprintf_func) (info->stream, ",tc");
-		      if (GET_FIELD (insn, 20, 20))
-			(*info->fprintf_func) (info->stream, ",tsv");
-		      break;
-		    case 'B':
-		      (*info->fprintf_func) (info->stream, ",db");
-		      if (GET_FIELD (insn, 20, 20))
-			(*info->fprintf_func) (info->stream, ",tsv");
-		      break;
-		    case 'b':
-		      (*info->fprintf_func) (info->stream, ",b");
-		      if (GET_FIELD (insn, 20, 20))
-			(*info->fprintf_func) (info->stream, ",tsv");
-		      break;
-		    case 'T':
-		      if (GET_FIELD (insn, 25, 25))
-			(*info->fprintf_func) (info->stream, ",tc");
-		      break;
-		    case 'S':
-		      /* EXTRD/W has a following condition.  */
-		      if (*(s + 1) == '?')
-			(*info->fprintf_func)
-			  (info->stream, "%s",
-			   signed_unsigned_names[GET_FIELD (insn, 21, 21)]);
-		      else
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   signed_unsigned_names[GET_FIELD (insn, 21, 21)]);
-		      break;
-		    case 'h':
-		      (*info->fprintf_func)
-			(info->stream, "%s",
-			 mix_half_names[GET_FIELD (insn, 17, 17)]);
-		      break;
-		    case 'H':
-		      (*info->fprintf_func)
-			(info->stream, "%s ",
-			 saturation_names[GET_FIELD (insn, 24, 25)]);
-		      break;
-		    case '*':
-		      (*info->fprintf_func)
-			(info->stream, ",%d%d%d%d ",
-			 GET_FIELD (insn, 17, 18), GET_FIELD (insn, 20, 21),
-			 GET_FIELD (insn, 22, 23), GET_FIELD (insn, 24, 25));
-		      break;
-
-		    case 'q':
-		      {
-			int m, a;
-
-			m = GET_FIELD (insn, 28, 28);
-			a = GET_FIELD (insn, 29, 29);
-
-			if (m && !a)
-			  fputs_filtered (",ma ", info);
-			else if (m && a)
-			  fputs_filtered (",mb ", info);
-			else
-			  fputs_filtered (" ", info);
-			break;
-		      }
-
-		    case 'J':
-		      {
-			int opc = GET_FIELD (insn, 0, 5);
-
-			if (opc == 0x16 || opc == 0x1e)
-			  {
-			    if (GET_FIELD (insn, 29, 29) == 0)
-			      fputs_filtered (",ma ", info);
-			    else
-			      fputs_filtered (",mb ", info);
-			  }
-			else
-			  fputs_filtered (" ", info);
-			break;
-		      }
-
-		    case 'e':
-		      {
-			int opc = GET_FIELD (insn, 0, 5);
-
-			if (opc == 0x13 || opc == 0x1b)
-			  {
-			    if (GET_FIELD (insn, 18, 18) == 1)
-			      fputs_filtered (",mb ", info);
-			    else
-			      fputs_filtered (",ma ", info);
-			  }
-			else if (opc == 0x17 || opc == 0x1f)
-			  {
-			    if (GET_FIELD (insn, 31, 31) == 1)
-			      fputs_filtered (",ma ", info);
-			    else
-			      fputs_filtered (",mb ", info);
-			  }
-			else
-			  fputs_filtered (" ", info);
-
-			break;
-		      }
-		    }
-		  break;
-
-		  /* Handle conditions.  */
-		case '?':
-		  {
-		    s++;
-		    switch (*s)
-		      {
-		      case 'f':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   float_comp_names[GET_FIELD (insn, 27, 31)]);
-			break;
-
-			/* These four conditions are for the set of instructions
-			   which distinguish true/false conditions by opcode
-			   rather than by the 'f' bit (sigh): comb, comib,
-			   addb, addib.  */
-		      case 't':
-			fputs_filtered
-			  (compare_cond_names[GET_FIELD (insn, 16, 18)], info);
-			break;
-		      case 'n':
-			fputs_filtered
-			  (compare_cond_names[GET_FIELD (insn, 16, 18)
-					      + GET_FIELD (insn, 4, 4) * 8],
-			   info);
-			break;
-		      case 'N':
-			fputs_filtered
-			  (compare_cond_64_names[GET_FIELD (insn, 16, 18)
-						 + GET_FIELD (insn, 2, 2) * 8],
-			   info);
-			break;
-		      case 'Q':
-			fputs_filtered
-			  (cmpib_cond_64_names[GET_FIELD (insn, 16, 18)],
-			   info);
-			break;
-		      case '@':
-			fputs_filtered
-			  (add_cond_names[GET_FIELD (insn, 16, 18)
-					  + GET_FIELD (insn, 4, 4) * 8],
-			   info);
-			break;
-		      case 's':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   compare_cond_names[GET_COND (insn)]);
-			break;
-		      case 'S':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   compare_cond_64_names[GET_COND (insn)]);
-			break;
-		      case 'a':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   add_cond_names[GET_COND (insn)]);
-			break;
-		      case 'A':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   add_cond_64_names[GET_COND (insn)]);
-			break;
-		      case 'd':
-			(*info->fprintf_func)
-			  (info->stream, "%s",
-			   add_cond_names[GET_FIELD (insn, 16, 18)]);
-			break;
-
-		      case 'W':
-			(*info->fprintf_func)
-			  (info->stream, "%s",
-			   wide_add_cond_names[GET_FIELD (insn, 16, 18) +
-					       GET_FIELD (insn, 4, 4) * 8]);
-			break;
-
-		      case 'l':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   logical_cond_names[GET_COND (insn)]);
-			break;
-		      case 'L':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   logical_cond_64_names[GET_COND (insn)]);
-			break;
-		      case 'u':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   unit_cond_names[GET_COND (insn)]);
-			break;
-		      case 'U':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   unit_cond_64_names[GET_COND (insn)]);
-			break;
-		      case 'y':
-		      case 'x':
-		      case 'b':
-			(*info->fprintf_func)
-			  (info->stream, "%s",
-			   shift_cond_names[GET_FIELD (insn, 16, 18)]);
-
-			/* If the next character in args is 'n', it will handle
-			   putting out the space.  */
-			if (s[1] != 'n')
-			  (*info->fprintf_func) (info->stream, " ");
-			break;
-		      case 'X':
-			(*info->fprintf_func)
-			  (info->stream, "%s ",
-			   shift_cond_64_names[GET_FIELD (insn, 16, 18)]);
-			break;
-		      case 'B':
-			(*info->fprintf_func)
-			  (info->stream, "%s",
-			   bb_cond_64_names[GET_FIELD (insn, 16, 16)]);
-
-			/* If the next character in args is 'n', it will handle
-			   putting out the space.  */
-			if (s[1] != 'n')
-			  (*info->fprintf_func) (info->stream, " ");
-			break;
-		      }
-		    break;
-		  }
-
-		case 'V':
-		  fput_const (extract_5_store (insn), info);
-		  break;
-		case 'r':
-		  fput_const (extract_5r_store (insn), info);
-		  break;
-		case 'R':
-		  fput_const (extract_5R_store (insn), info);
-		  break;
-		case 'U':
-		  fput_const (extract_10U_store (insn), info);
-		  break;
-		case 'B':
-		case 'Q':
-		  fput_const (extract_5Q_store (insn), info);
-		  break;
-		case 'i':
-		  fput_const (extract_11 (insn), info);
-		  break;
-		case 'j':
-		  fput_const (extract_14 (insn), info);
-		  break;
-		case 'k':
-		  fputs_filtered ("L%", info);
-		  fput_const (extract_21 (insn), info);
-		  break;
-		case '<':
-		case 'l':
-		  /* 16-bit long disp., PA2.0 wide only.  */
-		  fput_const (extract_16 (insn), info);
-		  break;
-		case 'n':
-		  if (insn & 0x2)
-		    (*info->fprintf_func) (info->stream, ",n ");
-		  else
-		    (*info->fprintf_func) (info->stream, " ");
-		  break;
-		case 'N':
-		  if ((insn & 0x20) && s[1])
-		    (*info->fprintf_func) (info->stream, ",n ");
-		  else if (insn & 0x20)
-		    (*info->fprintf_func) (info->stream, ",n");
-		  else if (s[1])
-		    (*info->fprintf_func) (info->stream, " ");
-		  break;
-		case 'w':
-		  (*info->print_address_func)
-		    (memaddr + 8 + extract_12 (insn), info);
-		  break;
-		case 'W':
-		  /* 17 bit PC-relative branch.  */
-		  (*info->print_address_func)
-		    ((memaddr + 8 + extract_17 (insn)), info);
-		  break;
-		case 'z':
-		  /* 17 bit displacement.  This is an offset from a register
-		     so it gets disasssembled as just a number, not any sort
-		     of address.  */
-		  fput_const (extract_17 (insn), info);
-		  break;
-
-		case 'Z':
-		  /* addil %r1 implicit output.  */
-		  fputs_filtered ("r1", info);
-		  break;
-
-		case 'Y':
-		  /* be,l %sr0,%r31 implicit output.  */
-		  fputs_filtered ("sr0,r31", info);
-		  break;
-
-		case '@':
-		  (*info->fprintf_func) (info->stream, "0");
-		  break;
-
-		case '.':
-		  (*info->fprintf_func) (info->stream, "%d",
-					 GET_FIELD (insn, 24, 25));
-		  break;
-		case '*':
-		  (*info->fprintf_func) (info->stream, "%d",
-					 GET_FIELD (insn, 22, 25));
-		  break;
-		case '!':
-		  fputs_filtered ("sar", info);
-		  break;
-		case 'p':
-		  (*info->fprintf_func) (info->stream, "%d",
-					 31 - GET_FIELD (insn, 22, 26));
-		  break;
-		case '~':
-		  {
-		    int num;
-		    num = GET_FIELD (insn, 20, 20) << 5;
-		    num |= GET_FIELD (insn, 22, 26);
-		    (*info->fprintf_func) (info->stream, "%d", 63 - num);
-		    break;
-		  }
-		case 'P':
-		  (*info->fprintf_func) (info->stream, "%d",
-					 GET_FIELD (insn, 22, 26));
-		  break;
-		case 'q':
-		  {
-		    int num;
-		    num = GET_FIELD (insn, 20, 20) << 5;
-		    num |= GET_FIELD (insn, 22, 26);
-		    (*info->fprintf_func) (info->stream, "%d", num);
-		    break;
-		  }
-		case 'T':
-		  (*info->fprintf_func) (info->stream, "%d",
-					 32 - GET_FIELD (insn, 27, 31));
-		  break;
-		case '%':
-		  {
-		    int num;
-		    num = (GET_FIELD (insn, 23, 23) + 1) * 32;
-		    num -= GET_FIELD (insn, 27, 31);
-		    (*info->fprintf_func) (info->stream, "%d", num);
-		    break;
-		  }
-		case '|':
-		  {
-		    int num;
-		    num = (GET_FIELD (insn, 19, 19) + 1) * 32;
-		    num -= GET_FIELD (insn, 27, 31);
-		    (*info->fprintf_func) (info->stream, "%d", num);
-		    break;
-		  }
-		case '$':
-		  fput_const (GET_FIELD (insn, 20, 28), info);
-		  break;
-		case 'A':
-		  fput_const (GET_FIELD (insn, 6, 18), info);
-		  break;
-		case 'D':
-		  fput_const (GET_FIELD (insn, 6, 31), info);
-		  break;
-		case 'v':
-		  (*info->fprintf_func) (info->stream, ",%d",
-					 GET_FIELD (insn, 23, 25));
-		  break;
-		case 'O':
-		  fput_const ((GET_FIELD (insn, 6,20) << 5 |
-			       GET_FIELD (insn, 27, 31)), info);
-		  break;
-		case 'o':
-		  fput_const (GET_FIELD (insn, 6, 20), info);
-		  break;
-		case '2':
-		  fput_const ((GET_FIELD (insn, 6, 22) << 5 |
-			       GET_FIELD (insn, 27, 31)), info);
-		  break;
-		case '1':
-		  fput_const ((GET_FIELD (insn, 11, 20) << 5 |
-			       GET_FIELD (insn, 27, 31)), info);
-		  break;
-		case '0':
-		  fput_const ((GET_FIELD (insn, 16, 20) << 5 |
-			       GET_FIELD (insn, 27, 31)), info);
-		  break;
-		case 'u':
-		  (*info->fprintf_func) (info->stream, ",%d",
-					 GET_FIELD (insn, 23, 25));
-		  break;
-		case 'F':
-		  /* If no destination completer and not before a completer
-		     for fcmp, need a space here.  */
-		  if (s[1] == 'G' || s[1] == '?')
-		    fputs_filtered
-		      (float_format_names[GET_FIELD (insn, 19, 20)], info);
-		  else
-		    (*info->fprintf_func)
-		      (info->stream, "%s ",
-		       float_format_names[GET_FIELD (insn, 19, 20)]);
-		  break;
-		case 'G':
-		  (*info->fprintf_func)
-		    (info->stream, "%s ",
-		     float_format_names[GET_FIELD (insn, 17, 18)]);
-		  break;
-		case 'H':
-		  if (GET_FIELD (insn, 26, 26) == 1)
-		    (*info->fprintf_func) (info->stream, "%s ",
-					   float_format_names[0]);
-		  else
-		    (*info->fprintf_func) (info->stream, "%s ",
-					   float_format_names[1]);
-		  break;
-		case 'I':
-		  /* If no destination completer and not before a completer
-		     for fcmp, need a space here.  */
-		  if (s[1] == '?')
-		    fputs_filtered
-		      (float_format_names[GET_FIELD (insn, 20, 20)], info);
-		  else
-		    (*info->fprintf_func)
-		      (info->stream, "%s ",
-		       float_format_names[GET_FIELD (insn, 20, 20)]);
-		  break;
-
-		case 'J':
-		  fput_const (extract_14 (insn), info);
-		  break;
-
-		case '#':
-		  {
-		    int sign = GET_FIELD (insn, 31, 31);
-		    int imm10 = GET_FIELD (insn, 18, 27);
-		    int disp;
-
-		    if (sign)
-		      disp = (-1 << 10) | imm10;
-		    else
-		      disp = imm10;
-
-		    disp <<= 3;
-		    fput_const (disp, info);
-		    break;
-		  }
-		case 'K':
-		case 'd':
-		  {
-		    int sign = GET_FIELD (insn, 31, 31);
-		    int imm11 = GET_FIELD (insn, 18, 28);
-		    int disp;
-
-		    if (sign)
-		      disp = (-1 << 11) | imm11;
-		    else
-		      disp = imm11;
-
-		    disp <<= 2;
-		    fput_const (disp, info);
-		    break;
-		  }
-
-		case '>':
-		case 'y':
-		  {
-		    /* 16-bit long disp., PA2.0 wide only.  */
-		    int disp = extract_16 (insn);
-		    disp &= ~3;
-		    fput_const (disp, info);
-		    break;
-		  }
-
-		case '&':
-		  {
-		    /* 16-bit long disp., PA2.0 wide only.  */
-		    int disp = extract_16 (insn);
-		    disp &= ~7;
-		    fput_const (disp, info);
-		    break;
-		  }
-
-		case '_':
-		  break; /* Dealt with by '{' */
-
-		case '{':
-		  {
-		    int sub = GET_FIELD (insn, 14, 16);
-		    int df = GET_FIELD (insn, 17, 18);
-		    int sf = GET_FIELD (insn, 19, 20);
-		    const char * const * source = float_format_names;
-		    const char * const * dest = float_format_names;
-		    const char *t = "";
-
-		    if (sub == 4)
-		      {
-			fputs_filtered (",UND ", info);
-			break;
-		      }
-		    if ((sub & 3) == 3)
-		      t = ",t";
-		    if ((sub & 3) == 1)
-		      source = sub & 4 ? fcnv_ufixed_names : fcnv_fixed_names;
-		    if (sub & 2)
-		      dest = sub & 4 ? fcnv_ufixed_names : fcnv_fixed_names;
-
-		    (*info->fprintf_func) (info->stream, "%s%s%s ",
-					   t, source[sf], dest[df]);
-		    break;
-		  }
-
-		case 'm':
-		  {
-		    int y = GET_FIELD (insn, 16, 18);
-
-		    if (y != 1)
-		      fput_const ((y ^ 1) - 1, info);
-		  }
-		  break;
-
-		case 'h':
-		  {
-		    int cbit;
-
-		    cbit = GET_FIELD (insn, 16, 18);
-
-		    if (cbit > 0)
-		      (*info->fprintf_func) (info->stream, ",%d", cbit - 1);
-		    break;
-		  }
-
-		case '=':
-		  {
-		    int cond = GET_FIELD (insn, 27, 31);
-
-		    switch (cond)
-		      {
-		      case  0: fputs_filtered (" ", info); break;
-		      case  1: fputs_filtered ("acc ", info); break;
-		      case  2: fputs_filtered ("rej ", info); break;
-		      case  5: fputs_filtered ("acc8 ", info); break;
-		      case  6: fputs_filtered ("rej8 ", info); break;
-		      case  9: fputs_filtered ("acc6 ", info); break;
-		      case 13: fputs_filtered ("acc4 ", info); break;
-		      case 17: fputs_filtered ("acc2 ", info); break;
-		      default: break;
-		      }
-		    break;
-		  }
-
-		case 'X':
-		  (*info->print_address_func)
-		    (memaddr + 8 + extract_22 (insn), info);
-		  break;
-		case 'L':
-		  fputs_filtered (",rp", info);
-		  break;
-		default:
-		  (*info->fprintf_func) (info->stream, "%c", *s);
-		  break;
-		}
-	    }
-	  return sizeof (insn);
-	}
-    }
-  (*info->fprintf_func) (info->stream, "#%8x", insn);
-  return sizeof (insn);
-}
diff --git a/disas/ppc.c b/disas/ppc.c
index 052cebe851..bd05623a79 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -2286,6 +2286,10 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "vrlh",      VX(4,   68), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vrlw",      VX(4,  132), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vrsqrtefp", VX(4,  330), VX_MASK,	PPCVEC,		{ VD, VB } },
+{ "vrldmi",    VX(4,  197), VX_MASK,    PPCVEC,         { VD, VA, VB } },
+{ "vrldnm",    VX(4,  453), VX_MASK,    PPCVEC,         { VD, VA, VB } },
+{ "vrlwmi",    VX(4,  133), VX_MASK,    PPCVEC,         { VD, VA, VB} },
+{ "vrlwnm",    VX(4,  389), VX_MASK,    PPCVEC,         { VD, VA, VB } },
 { "vsel",      VXA(4,  42), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
 { "vsl",       VX(4,  452), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vslb",      VX(4,  260), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
diff --git a/disas/sh4.c b/disas/sh4.c
index 8b0415dfe9..6b66176bed 100644
--- a/disas/sh4.c
+++ b/disas/sh4.c
@@ -264,12 +264,6 @@ sh_dsp_reg_nums;
    be some confusion between DSP and FPU etc.  */
 #define SH_ARCH_UNKNOWN_ARCH 0xffffffff
 
-/* These are defined in bfd/cpu-sh.c .  */
-unsigned int sh_get_arch_from_bfd_mach (unsigned long mach);
-unsigned int sh_get_arch_up_from_bfd_mach (unsigned long mach);
-unsigned long sh_get_bfd_mach_from_arch_set (unsigned int arch_set);
-/* bfd_boolean sh_merge_bfd_arch (bfd *ibfd, bfd *obfd); */
-
 /* Below are the 'architecture sets'.
    They describe the following inheritance graph:
 
diff --git a/dma-helpers.c b/dma-helpers.c
index 9defc101b7..6f9d47ca50 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -73,6 +73,7 @@ typedef struct {
     AioContext *ctx;
     BlockAIOCB *acb;
     QEMUSGList *sg;
+    uint32_t align;
     uint64_t offset;
     DMADirection dir;
     int sg_cur_index;
@@ -160,8 +161,9 @@ static void dma_blk_cb(void *opaque, int ret)
         return;
     }
 
-    if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
-        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
+    if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
+        qemu_iovec_discard_back(&dbs->iov,
+                                QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
     }
 
     dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
@@ -199,7 +201,7 @@ static const AIOCBInfo dma_aiocb_info = {
 };
 
 BlockAIOCB *dma_blk_io(AioContext *ctx,
-    QEMUSGList *sg, uint64_t offset,
+    QEMUSGList *sg, uint64_t offset, uint32_t align,
     DMAIOFunc *io_func, void *io_func_opaque,
     BlockCompletionFunc *cb,
     void *opaque, DMADirection dir)
@@ -212,6 +214,7 @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
     dbs->sg = sg;
     dbs->ctx = ctx;
     dbs->offset = offset;
+    dbs->align = align;
     dbs->sg_cur_index = 0;
     dbs->sg_cur_byte = 0;
     dbs->dir = dir;
@@ -234,11 +237,11 @@ BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
 }
 
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t offset,
+                         QEMUSGList *sg, uint64_t offset, uint32_t align,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk_get_aio_context(blk),
-                      sg, offset, dma_blk_read_io_func, blk, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_read_io_func, blk, cb, opaque,
                       DMA_DIRECTION_FROM_DEVICE);
 }
 
@@ -252,11 +255,11 @@ BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
 }
 
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t offset,
+                          QEMUSGList *sg, uint64_t offset, uint32_t align,
                           void (*cb)(void *opaque, int ret), void *opaque)
 {
-    return dma_blk_io(blk_get_aio_context(blk),
-                      sg, offset, dma_blk_write_io_func, blk, cb, opaque,
+    return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
+                      dma_blk_write_io_func, blk, cb, opaque,
                       DMA_DIRECTION_TO_DEVICE);
 }
 
diff --git a/docs/COLO-FT.txt b/docs/COLO-FT.txt
new file mode 100644
index 0000000000..e289be2f41
--- /dev/null
+++ b/docs/COLO-FT.txt
@@ -0,0 +1,191 @@
+COarse-grained LOck-stepping Virtual Machines for Non-stop Service
+----------------------------------------
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+Copyright (c) 2016 Fujitsu, Corp.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+This document gives an overview of COLO's design and how to use it.
+
+== Background ==
+Virtual machine (VM) replication is a well known technique for providing
+application-agnostic software-implemented hardware fault tolerance,
+also known as "non-stop service".
+
+COLO (COarse-grained LOck-stepping) is a high availability solution.
+Both primary VM (PVM) and secondary VM (SVM) run in parallel. They receive the
+same request from client, and generate response in parallel too.
+If the response packets from PVM and SVM are identical, they are released
+immediately. Otherwise, a VM checkpoint (on demand) is conducted.
+
+== Architecture ==
+
+The architecture of COLO is shown in the diagram below.
+It consists of a pair of networked physical nodes:
+The primary node running the PVM, and the secondary node running the SVM
+to maintain a valid replica of the PVM.
+PVM and SVM execute in parallel and generate output of response packets for
+client requests according to the application semantics.
+
+The incoming packets from the client or external network are received by the
+primary node, and then forwarded to the secondary node, so that both the PVM
+and the SVM are stimulated with the same requests.
+
+COLO receives the outbound packets from both the PVM and SVM and compares them
+before allowing the output to be sent to clients.
+
+The SVM is qualified as a valid replica of the PVM, as long as it generates
+identical responses to all client requests. Once the differences in the outputs
+are detected between the PVM and SVM, COLO withholds transmission of the
+outbound packets until it has successfully synchronized the PVM state to the SVM.
+
+  Primary Node                                                            Secondary Node
++------------+  +-----------------------+       +------------------------+  +------------+
+|            |  |       HeartBeat       +<----->+       HeartBeat        |  |            |
+| Primary VM |  +-----------+-----------+       +-----------+------------+  |Secondary VM|
+|            |              |                               |               |            |
+|            |  +-----------|-----------+       +-----------|------------+  |            |
+|            |  |QEMU   +---v----+      |       |QEMU  +----v---+        |  |            |
+|            |  |       |Failover|      |       |      |Failover|        |  |            |
+|            |  |       +--------+      |       |      +--------+        |  |            |
+|            |  |   +---------------+   |       |   +---------------+    |  |            |
+|            |  |   | VM Checkpoint +-------------->+ VM Checkpoint |    |  |            |
+|            |  |   +---------------+   |       |   +---------------+    |  |            |
+|Requests<--------------------------\ /-----------------\ /--------------------->Requests|
+|            |  |                   ^ ^ |       |       | |              |  |            |
+|Responses+---------------------\ /-|-|------------\ /-------------------------+Responses|
+|            |  |               | | | | |       |  | |  | |              |  |            |
+|            |  | +-----------+ | | | | |       |  | |  | | +----------+ |  |            |
+|            |  | | COLO disk | | | | | |       |  | |  | | | COLO disk| |  |            |
+|            |  | |   Manager +---------------------------->| Manager  | |  |            |
+|            |  | ++----------+ v v | | |       |  | v  v | +---------++ |  |            |
+|            |  |  |+-----------+-+-+-++|       | ++-+--+-+---------+ |  |  |            |
+|            |  |  ||   COLO Proxy     ||       | |   COLO Proxy    | |  |  |            |
+|            |  |  || (compare packet  ||       | |(adjust sequence | |  |  |            |
+|            |  |  ||and mirror packet)||       | |    and ACK)     | |  |  |            |
+|            |  |  |+------------+---+-+|       | +-----------------+ |  |  |            |
++------------+  +-----------------------+       +------------------------+  +------------+
++------------+     |             |   |                                |     +------------+
+| VM Monitor |     |             |   |                                |     | VM Monitor |
++------------+     |             |   |                                |     +------------+
++---------------------------------------+       +----------------------------------------+
+|   Kernel         |             |   |  |       |   Kernel            |                  |
++---------------------------------------+       +----------------------------------------+
+                   |             |   |                                |
+    +--------------v+  +---------v---+--+       +------------------+ +v-------------+
+    |   Storage     |  |External Network|       | External Network | |   Storage    |
+    +---------------+  +----------------+       +------------------+ +--------------+
+
+
+== Components introduction ==
+
+You can see there are several components in COLO's diagram of architecture.
+Their functions are described below.
+
+HeartBeat:
+Runs on both the primary and secondary nodes, to periodically check platform
+availability. When the primary node suffers a hardware fail-stop failure,
+the heartbeat stops responding, the secondary node will trigger a failover
+as soon as it determines the absence.
+
+COLO disk Manager:
+When primary VM writes data into image, the colo disk manger captures this data
+and sends it to secondary VM's which makes sure the context of secondary VM's
+image is consistent with the context of primary VM 's image.
+For more details, please refer to docs/block-replication.txt.
+
+Checkpoint/Failover Controller:
+Modifications of save/restore flow to realize continuous migration,
+to make sure the state of VM in Secondary side is always consistent with VM in
+Primary side.
+
+COLO Proxy:
+Delivers packets to Primary and Seconday, and then compare the responses from
+both side. Then decide whether to start a checkpoint according to some rules.
+Please refer to docs/colo-proxy.txt for more informations.
+
+Note:
+HeartBeat has not been implemented yet, so you need to trigger failover process
+by using 'x-colo-lost-heartbeat' command.
+
+== Test procedure ==
+1. Startup qemu
+Primary:
+# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name primary \
+  -device piix3-usb-uhci \
+  -device usb-tablet -netdev tap,id=hn0,vhost=off \
+  -device virtio-net-pci,id=net-pci0,netdev=hn0 \
+  -drive if=virtio,id=primary-disk0,driver=quorum,read-pattern=fifo,vote-threshold=1,\
+         children.0.file.filename=1.raw,\
+         children.0.driver=raw -S
+Secondary:
+# qemu-kvm -enable-kvm -m 2048 -smp 2 -qmp stdio -vnc :7 -name secondary \
+  -device piix3-usb-uhci \
+  -device usb-tablet -netdev tap,id=hn0,vhost=off \
+  -device virtio-net-pci,id=net-pci0,netdev=hn0 \
+  -drive if=none,id=secondary-disk0,file.filename=1.raw,driver=raw,node-name=node0 \
+  -drive if=virtio,id=active-disk0,driver=replication,mode=secondary,\
+         file.driver=qcow2,top-id=active-disk0,\
+         file.file.filename=/mnt/ramfs/active_disk.img,\
+         file.backing.driver=qcow2,\
+         file.backing.file.filename=/mnt/ramfs/hidden_disk.img,\
+         file.backing.backing=secondary-disk0 \
+  -incoming tcp:0:8888
+
+2. On Secondary VM's QEMU monitor, issue command
+{'execute':'qmp_capabilities'}
+{ 'execute': 'nbd-server-start',
+  'arguments': {'addr': {'type': 'inet', 'data': {'host': 'xx.xx.xx.xx', 'port': '8889'} } }
+}
+{'execute': 'nbd-server-add', 'arguments': {'device': 'secondeary-disk0', 'writable': true } }
+
+Note:
+  a. The qmp command nbd-server-start and nbd-server-add must be run
+     before running the qmp command migrate on primary QEMU
+  b. Active disk, hidden disk and nbd target's length should be the
+     same.
+  c. It is better to put active disk and hidden disk in ramdisk.
+
+3. On Primary VM's QEMU monitor, issue command:
+{'execute':'qmp_capabilities'}
+{ 'execute': 'human-monitor-command',
+  'arguments': {'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xx.xx.xx.xx,file.port=8889,file.export=secondary-disk0,node-name=nbd_client0'}}
+{ 'execute':'x-blockdev-change', 'arguments':{'parent': 'primary-disk0', 'node': 'nbd_client0' } }
+{ 'execute': 'migrate-set-capabilities',
+      'arguments': {'capabilities': [ {'capability': 'x-colo', 'state': true } ] } }
+{ 'execute': 'migrate', 'arguments': {'uri': 'tcp:xx.xx.xx.xx:8888' } }
+
+  Note:
+  a. There should be only one NBD Client for each primary disk.
+  b. xx.xx.xx.xx is the secondary physical machine's hostname or IP
+  c. The qmp command line must be run after running qmp command line in
+     secondary qemu.
+
+4. After the above steps, you will see, whenever you make changes to PVM, SVM will be synced.
+You can issue command '{ "execute": "migrate-set-parameters" , "arguments":{ "x-checkpoint-delay": 2000 } }'
+to change the checkpoint period time
+
+5. Failover test
+You can kill Primary VM and run 'x_colo_lost_heartbeat' in Secondary VM's
+monitor at the same time, then SVM will failover and client will not detect this
+change.
+
+Before issuing '{ "execute": "x-colo-lost-heartbeat" }' command, we have to
+issue block related command to stop block replication.
+Primary:
+  Remove the nbd child from the quorum:
+  { 'execute': 'x-blockdev-change', 'arguments': {'parent': 'colo-disk0', 'child': 'children.1'}}
+  { 'execute': 'human-monitor-command','arguments': {'command-line': 'drive_del blk-buddy0'}}
+  Note: there is no qmp command to remove the blockdev now
+
+Secondary:
+  The primary host is down, so we should do the following thing:
+  { 'execute': 'nbd-server-stop' }
+
+== TODO ==
+1. Support continuous VM replication.
+2. Support shared storage.
+3. Develop the heartbeat part.
+4. Reduce checkpoint VM’s downtime while doing checkpoint.
diff --git a/docs/atomics.txt b/docs/atomics.txt
index c95950b6c5..3ef5d85b1b 100644
--- a/docs/atomics.txt
+++ b/docs/atomics.txt
@@ -15,7 +15,8 @@ Macros defined by qemu/atomic.h fall in three camps:
 - compiler barriers: barrier();
 
 - weak atomic access and manual memory barriers: atomic_read(),
-  atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_read_barrier_depends();
+  atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_mb_acquire(),
+  smp_mb_release(), smp_read_barrier_depends();
 
 - sequentially consistent atomic access: everything else.
 
@@ -111,8 +112,8 @@ consistent primitives.
 
 When using this model, variables are accessed with atomic_read() and
 atomic_set(), and restrictions to the ordering of accesses is enforced
-using the smp_rmb(), smp_wmb(), smp_mb() and smp_read_barrier_depends()
-memory barriers.
+using the memory barrier macros: smp_rmb(), smp_wmb(), smp_mb(),
+smp_mb_acquire(), smp_mb_release(), smp_read_barrier_depends().
 
 atomic_read() and atomic_set() prevents the compiler from using
 optimizations that might otherwise optimize accesses out of existence
@@ -124,7 +125,7 @@ other threads, and which are local to the current thread or protected
 by other, more mundane means.
 
 Memory barriers control the order of references to shared memory.
-They come in four kinds:
+They come in six kinds:
 
 - smp_rmb() guarantees that all the LOAD operations specified before
   the barrier will appear to happen before all the LOAD operations
@@ -142,6 +143,16 @@ They come in four kinds:
   In other words, smp_wmb() puts a partial ordering on stores, but is not
   required to have any effect on loads.
 
+- smp_mb_acquire() guarantees that all the LOAD operations specified before
+  the barrier will appear to happen before all the LOAD or STORE operations
+  specified after the barrier with respect to the other components of
+  the system.
+
+- smp_mb_release() guarantees that all the STORE operations specified *after*
+  the barrier will appear to happen after all the LOAD or STORE operations
+  specified *before* the barrier with respect to the other components of
+  the system.
+
 - smp_mb() guarantees that all the LOAD and STORE operations specified
   before the barrier will appear to happen before all the LOAD and
   STORE operations specified after the barrier with respect to the other
@@ -149,8 +160,9 @@ They come in four kinds:
 
   smp_mb() puts a partial ordering on both loads and stores.  It is
   stronger than both a read and a write memory barrier; it implies both
-  smp_rmb() and smp_wmb(), but it also prevents STOREs coming before the
-  barrier from overtaking LOADs coming after the barrier and vice versa.
+  smp_mb_acquire() and smp_mb_release(), but it also prevents STOREs
+  coming before the barrier from overtaking LOADs coming after the
+  barrier and vice versa.
 
 - smp_read_barrier_depends() is a weaker kind of read barrier.  On
   most processors, whenever two loads are performed such that the
@@ -173,24 +185,21 @@ They come in four kinds:
 This is the set of barriers that is required *between* two atomic_read()
 and atomic_set() operations to achieve sequential consistency:
 
-                    |               2nd operation             |
-                    |-----------------------------------------|
-     1st operation  | (after last) | atomic_read | atomic_set |
-     ---------------+--------------+-------------+------------|
-     (before first) |              | none        | smp_wmb()  |
-     ---------------+--------------+-------------+------------|
-     atomic_read    | smp_rmb()    | smp_rmb()*  | **         |
-     ---------------+--------------+-------------+------------|
-     atomic_set     | none         | smp_mb()*** | smp_wmb()  |
-     ---------------+--------------+-------------+------------|
+                    |               2nd operation                   |
+                    |-----------------------------------------------|
+     1st operation  | (after last)   | atomic_read | atomic_set     |
+     ---------------+----------------+-------------+----------------|
+     (before first) |                | none        | smp_mb_release |
+     ---------------+----------------+-------------+----------------|
+     atomic_read    | smp_mb_acquire | smp_rmb     | **             |
+     ---------------+----------------+-------------+----------------|
+     atomic_set     | none           | smp_mb()*** | smp_wmb()      |
+     ---------------+----------------+-------------+----------------|
 
        * Or smp_read_barrier_depends().
 
-      ** This requires a load-store barrier.  How to achieve this varies
-         depending on the machine, but in practice smp_rmb()+smp_wmb()
-         should have the desired effect.  For example, on PowerPC the
-         lwsync instruction is a combined load-load, load-store and
-         store-store barrier.
+      ** This requires a load-store barrier.  This is achieved by
+         either smp_mb_acquire() or smp_mb_release().
 
      *** This requires a store-load barrier.  On most machines, the only
          way to achieve this is a full barrier.
@@ -199,11 +208,11 @@ and atomic_set() operations to achieve sequential consistency:
 You can see that the two possible definitions of atomic_mb_read()
 and atomic_mb_set() are the following:
 
-    1) atomic_mb_read(p)   = atomic_read(p); smp_rmb()
-       atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v); smp_mb()
+    1) atomic_mb_read(p)   = atomic_read(p); smp_mb_acquire()
+       atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v); smp_mb()
 
-    2) atomic_mb_read(p)   = smp_mb() atomic_read(p); smp_rmb()
-       atomic_mb_set(p, v) = smp_wmb(); atomic_set(p, v);
+    2) atomic_mb_read(p)   = smp_mb() atomic_read(p); smp_mb_acquire()
+       atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v);
 
 Usually the former is used, because smp_mb() is expensive and a program
 normally has more reads than writes.  Therefore it makes more sense to
@@ -222,7 +231,7 @@ place barriers instead:
      thread 1                                thread 1
      -------------------------               ------------------------
      (other writes)
-                                             smp_wmb()
+                                             smp_mb_release()
      atomic_mb_set(&a, x)                    atomic_set(&a, x)
                                              smp_wmb()
      atomic_mb_set(&b, y)                    atomic_set(&b, y)
@@ -233,7 +242,13 @@ place barriers instead:
      y = atomic_mb_read(&b)                  y = atomic_read(&b)
                                              smp_rmb()
      x = atomic_mb_read(&a)                  x = atomic_read(&a)
-                                             smp_rmb()
+                                             smp_mb_acquire()
+
+  Note that the barrier between the stores in thread 1, and between
+  the loads in thread 2, has been optimized here to a write or a
+  read memory barrier respectively.  On some architectures, notably
+  ARMv7, smp_mb_acquire and smp_mb_release are just as expensive as
+  smp_mb, but smp_rmb and/or smp_wmb are more efficient.
 
 - sometimes, a thread is accessing many variables that are otherwise
   unrelated to each other (for example because, apart from the current
@@ -246,12 +261,12 @@ place barriers instead:
      n = 0;                                  n = 0;
      for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
        n += atomic_mb_read(&a[i]);             n += atomic_read(&a[i]);
-                                             smp_rmb();
+                                             smp_mb_acquire();
 
   Similarly, atomic_mb_set() can be transformed as follows:
   smp_mb():
 
-                                             smp_wmb();
+                                             smp_mb_release();
      for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
        atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
                                              smp_mb();
@@ -261,7 +276,7 @@ The two tricks can be combined.  In this case, splitting a loop in
 two lets you hoist the barriers out of the loops _and_ eliminate the
 expensive smp_mb():
 
-                                             smp_wmb();
+                                             smp_mb_release();
      for (i = 0; i < 10; i++) {        =>    for (i = 0; i < 10; i++)
        atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
        atomic_mb_set(&b[i], false);          smb_wmb();
@@ -312,8 +327,8 @@ access and for data dependency barriers:
                              smp_read_barrier_depends();
                              z = b[y];
 
-smp_wmb() also pairs with atomic_mb_read(), and smp_rmb() also pairs
-with atomic_mb_set().
+smp_wmb() also pairs with atomic_mb_read() and smp_mb_acquire().
+and smp_rmb() also pairs with atomic_mb_set() and smp_mb_release().
 
 
 COMPARISON WITH LINUX KERNEL MEMORY BARRIERS
@@ -359,8 +374,9 @@ and memory barriers, and the equivalents in QEMU:
   note that smp_store_mb() is a little weaker than atomic_mb_set().
   atomic_mb_read() compiles to the same instructions as Linux's
   smp_load_acquire(), but this should be treated as an implementation
-  detail.  If required, QEMU might later add atomic_load_acquire() and
-  atomic_store_release() macros.
+  detail.  QEMU does have atomic_load_acquire() and atomic_store_release()
+  macros, but for now they are only used within atomic.h.  This may
+  change in the future.
 
 
 SOURCES
diff --git a/docs/block-replication.txt b/docs/block-replication.txt
new file mode 100644
index 0000000000..6bde6737fb
--- /dev/null
+++ b/docs/block-replication.txt
@@ -0,0 +1,239 @@
+Block replication
+----------------------------------------
+Copyright Fujitsu, Corp. 2016
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+Block replication is used for continuous checkpoints. It is designed
+for COLO (COarse-grain LOck-stepping) where the Secondary VM is running.
+It can also be applied for FT/HA (Fault-tolerance/High Assurance) scenario,
+where the Secondary VM is not running.
+
+This document gives an overview of block replication's design.
+
+== Background ==
+High availability solutions such as micro checkpoint and COLO will do
+consecutive checkpoints. The VM state of the Primary and Secondary VM is
+identical right after a VM checkpoint, but becomes different as the VM
+executes till the next checkpoint. To support disk contents checkpoint,
+the modified disk contents in the Secondary VM must be buffered, and are
+only dropped at next checkpoint time. To reduce the network transportation
+effort during a vmstate checkpoint, the disk modification operations of
+the Primary disk are asynchronously forwarded to the Secondary node.
+
+== Workflow ==
+The following is the image of block replication workflow:
+
+        +----------------------+            +------------------------+
+        |Primary Write Requests|            |Secondary Write Requests|
+        +----------------------+            +------------------------+
+                  |                                       |
+                  |                                      (4)
+                  |                                       V
+                  |                              /-------------\
+                  |      Copy and Forward        |             |
+                  |---------(1)----------+       | Disk Buffer |
+                  |                      |       |             |
+                  |                     (3)      \-------------/
+                  |                 speculative      ^
+                  |                write through    (2)
+                  |                      |           |
+                  V                      V           |
+           +--------------+           +----------------+
+           | Primary Disk |           | Secondary Disk |
+           +--------------+           +----------------+
+
+    1) Primary write requests will be copied and forwarded to Secondary
+       QEMU.
+    2) Before Primary write requests are written to Secondary disk, the
+       original sector content will be read from Secondary disk and
+       buffered in the Disk buffer, but it will not overwrite the existing
+       sector content (it could be from either "Secondary Write Requests" or
+       previous COW of "Primary Write Requests") in the Disk buffer.
+    3) Primary write requests will be written to Secondary disk.
+    4) Secondary write requests will be buffered in the Disk buffer and it
+       will overwrite the existing sector content in the buffer.
+
+== Architecture ==
+We are going to implement block replication from many basic
+blocks that are already in QEMU.
+
+         virtio-blk       ||
+             ^            ||                            .----------
+             |            ||                            | Secondary
+        1 Quorum          ||                            '----------
+         /      \         ||
+        /        \        ||
+   Primary    2 filter
+     disk         ^                                                             virtio-blk
+                  |                                                                  ^
+                3 NBD  ------->  3 NBD                                               |
+                client    ||     server                                          2 filter
+                          ||        ^                                                ^
+--------.                 ||        |                                                |
+Primary |                 ||  Secondary disk <--------- hidden-disk 5 <--------- active-disk 4
+--------'                 ||        |          backing        ^       backing
+                          ||        |                         |
+                          ||        |                         |
+                          ||        '-------------------------'
+                          ||           drive-backup sync=none 6
+
+1) The disk on the primary is represented by a block device with two
+children, providing replication between a primary disk and the host that
+runs the secondary VM. The read pattern (fifo) for quorum can be extended
+to make the primary always read from the local disk instead of going through
+NBD.
+
+2) The new block filter (the name is replication) will control the block
+replication.
+
+3) The secondary disk receives writes from the primary VM through QEMU's
+embedded NBD server (speculative write-through).
+
+4) The disk on the secondary is represented by a custom block device
+(called active-disk). It should start as an empty disk, and the format
+should support bdrv_make_empty() and backing file.
+
+5) The hidden-disk is created automatically. It buffers the original content
+that is modified by the primary VM. It should also start as an empty disk,
+and the driver supports bdrv_make_empty() and backing file.
+
+6) The drive-backup job (sync=none) is run to allow hidden-disk to buffer
+any state that would otherwise be lost by the speculative write-through
+of the NBD server into the secondary disk. So before block replication,
+the primary disk and secondary disk should contain the same data.
+
+== Failure Handling ==
+There are 7 internal errors when block replication is running:
+1. I/O error on primary disk
+2. Forwarding primary write requests failed
+3. Backup failed
+4. I/O error on secondary disk
+5. I/O error on active disk
+6. Making active disk or hidden disk empty failed
+7. Doing failover failed
+In case 1 and 5, we just report the error to the disk layer. In case 2, 3,
+4 and 6, we just report block replication's error to FT/HA manager (which
+decides when to do a new checkpoint, when to do failover).
+In case 7, if active commit failed, we use replication failover failed state
+in Secondary's write operation (what decides which target to write).
+
+== New block driver interface ==
+We add four block driver interfaces to control block replication:
+a. replication_start_all()
+   Start block replication, called in migration/checkpoint thread.
+   We must call block_replication_start_all() in secondary QEMU before
+   calling block_replication_start_all() in primary QEMU. The caller
+   must hold the I/O mutex lock if it is in migration/checkpoint
+   thread.
+b. replication_do_checkpoint_all()
+   This interface is called after all VM state is transferred to
+   Secondary QEMU. The Disk buffer will be dropped in this interface.
+   The caller must hold the I/O mutex lock if it is in migration/checkpoint
+   thread.
+c. replication_get_error_all()
+   This interface is called to check if error happened in replication.
+   The caller must hold the I/O mutex lock if it is in migration/checkpoint
+   thread.
+d. replication_stop_all()
+   It is called on failover. We will flush the Disk buffer into
+   Secondary Disk and stop block replication. The vm should be stopped
+   before calling it if you use this API to shutdown the guest, or other
+   things except failover. The caller must hold the I/O mutex lock if it is
+   in migration/checkpoint thread.
+
+== Usage ==
+Primary:
+  -drive if=xxx,driver=quorum,read-pattern=fifo,id=colo1,vote-threshold=1,\
+         children.0.file.filename=1.raw,\
+         children.0.driver=raw
+
+  Run qmp command in primary qemu:
+    { 'execute': 'human-monitor-command',
+      'arguments': {
+          'command-line': 'drive_add -n buddy driver=replication,mode=primary,file.driver=nbd,file.host=xxxx,file.port=xxxx,file.export=colo1,node-name=nbd_client1'
+      }
+    }
+    { 'execute': 'x-blockdev-change',
+      'arguments': {
+          'parent': 'colo1',
+          'node': 'nbd_client1'
+      }
+    }
+  Note:
+  1. There should be only one NBD Client for each primary disk.
+  2. host is the secondary physical machine's hostname or IP
+  3. Each disk must have its own export name.
+  4. It is all a single argument to -drive and you should ignore the
+     leading whitespace.
+  5. The qmp command line must be run after running qmp command line in
+     secondary qemu.
+  6. After failover we need remove children.1 (replication driver).
+
+Secondary:
+  -drive if=none,driver=raw,file.filename=1.raw,id=colo1 \
+  -drive if=xxx,id=topxxx,driver=replication,mode=secondary,top-id=topxxx\
+         file.file.filename=active_disk.qcow2,\
+         file.driver=qcow2,\
+         file.backing.file.filename=hidden_disk.qcow2,\
+         file.backing.driver=qcow2,\
+         file.backing.backing=colo1
+
+  Then run qmp command in secondary qemu:
+    { 'execute': 'nbd-server-start',
+      'arguments': {
+          'addr': {
+              'type': 'inet',
+              'data': {
+                  'host': 'xxx',
+                  'port': 'xxx'
+              }
+          }
+      }
+    }
+    { 'execute': 'nbd-server-add',
+      'arguments': {
+          'device': 'colo1',
+          'writable': true
+      }
+    }
+
+  Note:
+  1. The export name in secondary QEMU command line is the secondary
+     disk's id.
+  2. The export name for the same disk must be the same
+  3. The qmp command nbd-server-start and nbd-server-add must be run
+     before running the qmp command migrate on primary QEMU
+  4. Active disk, hidden disk and nbd target's length should be the
+     same.
+  5. It is better to put active disk and hidden disk in ramdisk.
+  6. It is all a single argument to -drive, and you should ignore
+     the leading whitespace.
+
+After Failover:
+Primary:
+  The secondary host is down, so we should run the following qmp command
+  to remove the nbd child from the quorum:
+  { 'execute': 'x-blockdev-change',
+    'arguments': {
+        'parent': 'colo1',
+        'child': 'children.1'
+    }
+  }
+  { 'execute': 'human-monitor-command',
+    'arguments': {
+        'command-line': 'drive_del xxxx'
+    }
+  }
+  Note: there is no qmp command to remove the blockdev now
+
+Secondary:
+  The primary host is down, so we should do the following thing:
+  { 'execute': 'nbd-server-stop' }
+
+TODO:
+1. Continuous block replication
+2. Shared disk
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
new file mode 100644
index 0000000000..76767cb34f
--- /dev/null
+++ b/docs/colo-proxy.txt
@@ -0,0 +1,188 @@
+COLO-proxy
+----------
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+Copyright (c) 2016 Fujitsu, Corp.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+This document gives an overview of COLO proxy's design.
+
+== Background ==
+COLO-proxy is a part of COLO project. It is used
+to compare the network package to help COLO decide
+whether to do checkpoint. With COLO-proxy's help,
+COLO greatly improves the performance.
+
+The filter-redirector, filter-mirror, colo-compare
+and filter-rewriter compose the COLO-proxy.
+
+== Architecture ==
+
+COLO-Proxy is based on qemu netfilter and it's a plugin for qemu netfilter
+(except colo-compare). It keep Secondary VM connect normally to
+client and compare packets sent by PVM with sent by SVM.
+If the packet difference, notify COLO-frame to do checkpoint and send
+all primary packet has queued. Otherwise just send the queued primary
+packet and drop the queued secondary packet.
+
+Below is a COLO proxy ascii figure:
+
+ Primary qemu                                                           Secondary qemu
++--------------------------------------------------------------+       +----------------------------------------------------------------+
+| +----------------------------------------------------------+ |       |  +-----------------------------------------------------------+ |
+| |                                                          | |       |  |                                                           | |
+| |                        guest                             | |       |  |                        guest                              | |
+| |                                                          | |       |  |                                                           | |
+| +-------^--------------------------+-----------------------+ |       |  +---------------------+--------+----------------------------+ |
+|         |                          |                         |       |                        ^        |                              |
+|         |                          |                         |       |                        |        |                              |
+|         |  +------------------------------------------------------+  |                        |        |                              |
+|netfilter|  |                       |                         |    |  |   netfilter            |        |                              |
+| +----------+ +----------------------------+                  |    |  |  +-----------------------------------------------------------+ |
+| |       |  |                       |      |        out       |    |  |  |                     |        |  filter excute order       | |
+| |       |  |          +-----------------------------+        |    |  |  |                     |        | +------------------->      | |
+| |       |  |          |            |      |         |        |    |  |  |                     |        |   TCP                      | |
+| | +-----+--+-+  +-----v----+ +-----v----+ |pri +----+----+sec|    |  |  | +------------+  +---+----+---v+rewriter++  +------------+ | |
+| | |          |  |          | |          | |in  |         |in |    |  |  | |            |  |        |              |  |            | | |
+| | |  filter  |  |  filter  | |  filter  +------>  colo   <------+ +-------->  filter   +--> adjust |   adjust     +-->   filter   | | |
+| | |  mirror  |  |redirector| |redirector| |    | compare |   |  |    |  | | redirector |  | ack    |   seq        |  | redirector | | |
+| | |          |  |          | |          | |    |         |   |  |    |  | |            |  |        |              |  |            | | |
+| | +----^-----+  +----+-----+ +----------+ |    +---------+   |  |    |  | +------------+  +--------+--------------+  +---+--------+ | |
+| |      |   tx        |   rx           rx  |                  |  |    |  |            tx                        all       |  rx      | |
+| |      |             |                    |                  |  |    |  +-----------------------------------------------------------+ |
+| |      |             +--------------+     |                  |  |    |                                                   |            |
+| |      |   filter excute order      |     |                  |  |    |                                                   |            |
+| |      |  +---------------->        |     |                  |  +--------------------------------------------------------+            |
+| +-----------------------------------------+                  |       |                                                                |
+|        |                            |                        |       |                                                                |
++--------------------------------------------------------------+       +----------------------------------------------------------------+
+         |guest receive               | guest send
+         |                            |
++--------+----------------------------v------------------------+
+|                                                              |                          NOTE: filter direction is rx/tx/all
+|                         tap                                  |                          rx:receive packets sent to the netdev
+|                                                              |                          tx:receive packets sent by the netdev
++--------------------------------------------------------------+
+
+1.Guest receive packet route:
+
+Primary:
+
+Tap --> Mirror Client Filter
+Mirror client will send packet to guest,at the
+same time, copy and forward packet to secondary
+mirror server.
+
+Secondary:
+
+Mirror Server Filter --> TCP Rewriter
+If receive packet is TCP packet,we will adjust ack
+and update TCP checksum, then send to secondary
+guest. Otherwise directly send to guest.
+
+2.Guest send packet route:
+
+Primary:
+
+Guest --> Redirect Server Filter
+Redirect server filter receive primary guest packet
+but do nothing, just pass to next filter.
+
+Redirect Server Filter --> COLO-Compare
+COLO-compare receive primary guest packet then
+waiting scondary redirect packet to compare it.
+If packet same,send queued primary packet and clear
+queued secondary packet, Otherwise send primary packet
+and do checkpoint.
+
+COLO-Compare --> Another Redirector Filter
+The redirector get packet from colo-compare by use
+chardev socket.
+
+Redirector Filter --> Tap
+Send the packet.
+
+Secondary:
+
+Guest --> TCP Rewriter Filter
+If the packet is TCP packet,we will adjust seq
+and update TCP checksum. Then send it to
+redirect client filter. Otherwise directly send to
+redirect client filter.
+
+Redirect Client Filter --> Redirect Server Filter
+Forward packet to primary.
+
+== Components introduction ==
+
+Filter-mirror is a netfilter plugin.
+It gives qemu the ability to mirror
+packets to a chardev.
+
+Filter-redirector is a netfilter plugin.
+It gives qemu the ability to redirect net packet.
+Redirector can redirect filter's net packet to outdev,
+and redirect indev's packet to filter.
+
+                    filter
+                      +
+          redirector  |
+             +--------------+
+             |        |     |
+             |        |     |
+             |        |     |
+  indev +---------+   +---------->  outdev
+             |    |         |
+             |    |         |
+             |    |         |
+             +--------------+
+                  |
+                  v
+                filter
+
+COLO-compare, we do packet comparing job.
+Packets coming from the primary char indev will be sent to outdev.
+Packets coming from the secondary char dev will be dropped after comparing.
+COLO-comapre need two input chardev and one output chardev:
+primary_in=chardev1-id (source: primary send packet)
+secondary_in=chardev2-id (source: secondary send packet)
+outdev=chardev3-id
+
+Filter-rewriter will rewrite some of secondary packet to make
+secondary guest's tcp connection established successfully.
+In this module we will rewrite tcp packet's ack to the secondary
+from primary,and rewrite tcp packet's seq to the primary from
+secondary.
+
+== Usage ==
+
+Here, we use demo ip and port discribe more clearly.
+Primary(ip:3.3.3.3):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
+
+Secondary(ip:3.3.3.8):
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=red0,host=3.3.3.3,port=9003
+-chardev socket,id=red1,host=3.3.3.3,port=9004
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+
+Note:
+  a.COLO-proxy must work with COLO-frame and Block-replication.
+  b.Primary COLO must be started firstly, because COLO-proxy needs
+    chardev socket server running before secondary started.
+  c.Filter-rewriter only rewrite tcp packet.
diff --git a/docs/generic-loader.txt b/docs/generic-loader.txt
new file mode 100644
index 0000000000..31bbcd42f6
--- /dev/null
+++ b/docs/generic-loader.txt
@@ -0,0 +1,92 @@
+Copyright (c) 2016 Xilinx Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.  See
+the COPYING file in the top-level directory.
+
+
+The 'loader' device allows the user to load multiple images or values into
+QEMU at startup.
+
+Loading Data into Memory Values
+-------------------------------
+The loader device allows memory values to be set from the command line. This
+can be done by following the syntax below:
+
+     -device loader,addr=<addr>,data=<data>,data-len=<data-len>
+                   [,data-be=<data-be>][,cpu-num=<cpu-num>]
+
+    <addr>      - The address to store the data in.
+    <data>      - The value to be written to the address. The maximum size of
+                  the data is 8 bytes.
+    <data-len>  - The length of the data in bytes. This argument must be
+                  included if the data argument is.
+    <data-be>   - Set to true if the data to be stored on the guest should be
+                  written as big endian data. The default is to write little
+                  endian data.
+    <cpu-num>   - The number of the CPU's address space where the data should
+                  be loaded. If not specified the address space of the first
+                  CPU is used.
+
+All values are parsed using the standard QemuOps parsing. This allows the user
+to specify any values in any format supported. By default the values
+will be parsed as decimal. To use hex values the user should prefix the number
+with a '0x'.
+
+An example of loading value 0x8000000e to address 0xfd1a0104 is:
+    -device loader,addr=0xfd1a0104,data=0x8000000e,data-len=4
+
+Setting a CPU's Program Counter
+-------------------------------
+The loader device allows the CPU's PC to be set from the command line. This
+can be done by following the syntax below:
+
+     -device loader,addr=<addr>,cpu-num=<cpu-num>
+
+    <addr>      - The value to use as the CPU's PC.
+    <cpu-num>   - The number of the CPU whose PC should be set to the
+                  specified value.
+
+All values are parsed using the standard QemuOps parsing. This allows the user
+to specify any values in any format supported. By default the values
+will be parsed as decimal. To use hex values the user should prefix the number
+with a '0x'.
+
+An example of setting CPU 0's PC to 0x8000 is:
+    -device loader,addr=0x8000,cpu-num=0
+
+Loading Files
+-------------
+The loader device also allows files to be loaded into memory. It can load raw
+files and ELF executable files.  Raw files are loaded verbatim.  ELF executable
+files are loaded by an ELF loader.  The syntax is shown below:
+
+    -device loader,file=<file>[,addr=<addr>][,cpu-num=<cpu-num>][,force-raw=<raw>]
+
+    <file>      - A file to be loaded into memory
+    <addr>      - The addr in memory that the file should be loaded. This is
+                  ignored if you are using an ELF (unless force-raw is true).
+                  This is required if you aren't loading an ELF.
+    <cpu-num>   - This specifies the CPU that should be used. This is an
+                  optional argument and will cause the CPU's PC to be set to
+                  where the image is stored or in the case of an ELF file to
+                  the value in the header. This option should only be used
+                  for the boot image.
+                  This will also cause the image to be written to the specified
+                  CPU's address space. If not specified, the default is CPU 0.
+    <force-raw> - Setting force-raw=on forces the file to be treated as a raw
+                  image.  This can be used to load ELF files as if they were raw.
+
+All values are parsed using the standard QemuOps parsing. This allows the user
+to specify any values in any format supported. By default the values
+will be parsed as decimal. To use hex values the user should prefix the number
+with a '0x'.
+
+An example of loading an ELF file which CPU0 will boot is shown below:
+    -device loader,file=./images/boot.elf,cpu-num=0
+
+Restrictions and ToDos
+----------------------
+ - At the moment it is just assumed that if you specify a cpu-num then you
+   want to set the PC as well. This might not always be the case. In future
+   the internal state 'set_pc' (which exists in the generic loader now) should
+   be exposed to the user so that they can choose if the PC is set or not.
diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt
index a257087401..2211d14428 100644
--- a/docs/live-block-ops.txt
+++ b/docs/live-block-ops.txt
@@ -4,15 +4,20 @@ LIVE BLOCK OPERATIONS
 High level description of live block operations. Note these are not
 supported for use with the raw format at the moment.
 
+Note also that this document is incomplete and it currently only
+covers the 'stream' operation. Other operations supported by QEMU such
+as 'commit', 'mirror' and 'backup' are not described here yet. Please
+refer to the qapi/block-core.json file for an overview of those.
+
 Snapshot live merge
 ===================
 
 Given a snapshot chain, described in this document in the following
 format:
 
-[A] -> [B] -> [C] -> [D]
+[A] <- [B] <- [C] <- [D] <- [E]
 
-Where the rightmost object ([D] in the example) described is the current
+Where the rightmost object ([E] in the example) described is the current
 image which the guest OS has write access to. To the left of it is its base
 image, and so on accordingly until the leftmost image, which has no
 base.
@@ -21,11 +26,14 @@ The snapshot live merge operation transforms such a chain into a
 smaller one with fewer elements, such as this transformation relative
 to the first example:
 
-[A] -> [D]
+[A] <- [E]
+
+Data is copied in the right direction with destination being the
+rightmost image, but any other intermediate image can be specified
+instead. In this example data is copied from [C] into [D], so [D] can
+be backed by [B]:
 
-Currently only forward merge with target being the active image is
-supported, that is, data copy is performed in the right direction with
-destination being the rightmost image.
+[A] <- [B] <- [D] <- [E]
 
 The operation is implemented in QEMU through image streaming facilities.
 
@@ -35,14 +43,20 @@ streaming operation completes it raises a QMP event. 'block_stream'
 copies data from the backing file(s) into the active image. When finished,
 it adjusts the backing file pointer.
 
-The 'base' parameter specifies an image which data need not be streamed from.
-This image will be used as the backing file for the active image when the
-operation is finished.
+The 'base' parameter specifies an image which data need not be
+streamed from. This image will be used as the backing file for the
+destination image when the operation is finished.
+
+In the first example above, the command would be:
+
+(qemu) block_stream virtio0 file-A.img
 
-In the example above, the command would be:
+In order to specify a destination image different from the active
+(rightmost) one we can use its node name instead.
 
-(qemu) block_stream virtio0 A
+In the second example above, the command would be:
 
+(qemu) block_stream node-D file-B.img
 
 Live block copy
 ===============
diff --git a/docs/multiple-iothreads.txt b/docs/multiple-iothreads.txt
index 40b8419916..0e7cdb2c28 100644
--- a/docs/multiple-iothreads.txt
+++ b/docs/multiple-iothreads.txt
@@ -105,13 +105,10 @@ a BH in the target AioContext beforehand and then call qemu_bh_schedule().  No
 acquire/release or locking is needed for the qemu_bh_schedule() call.  But be
 sure to acquire the AioContext for aio_bh_new() if necessary.
 
-The relationship between AioContext and the block layer
--------------------------------------------------------
-The AioContext originates from the QEMU block layer because it provides a
-scoped way of running event loop iterations until all work is done.  This
-feature is used to complete all in-flight block I/O requests (see
-bdrv_drain_all()).  Nowadays AioContext is a generic event loop that can be
-used by any QEMU subsystem.
+AioContext and the block layer
+------------------------------
+The AioContext originates from the QEMU block layer, even though nowadays
+AioContext is a generic event loop that can be used by any QEMU subsystem.
 
 The block layer has support for AioContext integrated.  Each BlockDriverState
 is associated with an AioContext using bdrv_set_aio_context() and
@@ -122,13 +119,22 @@ Block layer code must therefore expect to run in an IOThread and avoid using
 old APIs that implicitly use the main loop.  See the "How to program for
 IOThreads" above for information on how to do that.
 
-If main loop code such as a QMP function wishes to access a BlockDriverState it
-must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure the
-IOThread does not run in parallel.
-
-Long-running jobs (usually in the form of coroutines) are best scheduled in the
-BlockDriverState's AioContext to avoid the need to acquire/release around each
-bdrv_*() call.  Be aware that there is currently no mechanism to get notified
-when bdrv_set_aio_context() moves this BlockDriverState to a different
-AioContext (see bdrv_detach_aio_context()/bdrv_attach_aio_context()), so you
-may need to add this if you want to support long-running jobs.
+If main loop code such as a QMP function wishes to access a BlockDriverState
+it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
+that callbacks in the IOThread do not run in parallel.
+
+Code running in the monitor typically needs to ensure that past
+requests from the guest are completed.  When a block device is running
+in an IOThread, the IOThread can also process requests from the guest
+(via ioeventfd).  To achieve both objects, wrap the code between
+bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
+section".  The functions must be called between aio_context_acquire()
+and aio_context_release().  You can freely release and re-acquire the
+AioContext within a drained section.
+
+Long-running jobs (usually in the form of coroutines) are best scheduled in
+the BlockDriverState's AioContext to avoid the need to acquire/release around
+each bdrv_*() call.  The functions bdrv_add/remove_aio_context_notifier,
+or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
+can be used to get a notification whenever bdrv_set_aio_context() moves a
+BlockDriverState to a different AioContext.
diff --git a/docs/pcie.txt b/docs/pcie.txt
new file mode 100644
index 0000000000..9fb20aaed9
--- /dev/null
+++ b/docs/pcie.txt
@@ -0,0 +1,310 @@
+PCI EXPRESS GUIDELINES
+======================
+
+1. Introduction
+================
+The doc proposes best practices on how to use PCI Express/PCI device
+in PCI Express based machines and explains the reasoning behind them.
+
+The following presentations accompany this document:
+ (1) Q35 overview.
+     http://wiki.qemu.org/images/4/4e/Q35.pdf
+ (2) A comparison between PCI and PCI Express technologies.
+     http://wiki.qemu.org/images/f/f6/PCIvsPCIe.pdf
+
+Note: The usage examples are not intended to replace the full
+documentation, please use QEMU help to retrieve all options.
+
+2. Device placement strategy
+============================
+QEMU does not have a clear socket-device matching mechanism
+and allows any PCI/PCI Express device to be plugged into any
+PCI/PCI Express slot.
+Plugging a PCI device into a PCI Express slot might not always work and
+is weird anyway since it cannot be done for "bare metal".
+Plugging a PCI Express device into a PCI slot will hide the Extended
+Configuration Space thus is also not recommended.
+
+The recommendation is to separate the PCI Express and PCI hierarchies.
+PCI Express devices should be plugged only into PCI Express Root Ports and
+PCI Express Downstream ports.
+
+2.1 Root Bus (pcie.0)
+=====================
+Place only the following kinds of devices directly on the Root Complex:
+    (1) PCI Devices (e.g. network card, graphics card, IDE controller),
+        not controllers. Place only legacy PCI devices on
+        the Root Complex. These will be considered Integrated Endpoints.
+        Note: Integrated Endpoints are not hot-pluggable.
+
+        Although the PCI Express spec does not forbid PCI Express devices as
+        Integrated Endpoints, existing hardware mostly integrates legacy PCI
+        devices with the Root Complex. Guest OSes are suspected to behave
+        strangely when PCI Express devices are integrated
+        with the Root Complex.
+
+    (2) PCI Express Root Ports (ioh3420), for starting exclusively PCI Express
+        hierarchies.
+
+    (3) DMI-PCI Bridges (i82801b11-bridge), for starting legacy PCI
+        hierarchies.
+
+    (4) Extra Root Complexes (pxb-pcie), if multiple PCI Express Root Buses
+        are needed.
+
+   pcie.0 bus
+   ----------------------------------------------------------------------------
+        |                |                    |                  |
+   -----------   ------------------   ------------------   --------------
+   | PCI Dev |   | PCIe Root Port |   | DMI-PCI Bridge |   |  pxb-pcie  |
+   -----------   ------------------   ------------------   --------------
+
+2.1.1 To plug a device into pcie.0 as a Root Complex Integrated Endpoint use:
+          -device <dev>[,bus=pcie.0]
+2.1.2 To expose a new PCI Express Root Bus use:
+          -device pxb-pcie,id=pcie.1,bus_nr=x[,numa_node=y][,addr=z]
+      Only PCI Express Root Ports and DMI-PCI bridges can be connected
+      to the pcie.1 bus:
+          -device ioh3420,id=root_port1[,bus=pcie.1][,chassis=x][,slot=y][,addr=z]                                     \
+          -device i82801b11-bridge,id=dmi_pci_bridge1,bus=pcie.1
+
+
+2.2 PCI Express only hierarchy
+==============================
+Always use PCI Express Root Ports to start PCI Express hierarchies.
+
+A PCI Express Root bus supports up to 32 devices. Since each
+PCI Express Root Port is a function and a multi-function
+device may support up to 8 functions, the maximum possible
+number of PCI Express Root Ports per PCI Express Root Bus is 256.
+
+Prefer grouping PCI Express Root Ports into multi-function devices
+to keep a simple flat hierarchy that is enough for most scenarios.
+Only use PCI Express Switches (x3130-upstream, xio3130-downstream)
+if there is no more room for PCI Express Root Ports.
+Please see section 4. for further justifications.
+
+Plug only PCI Express devices into PCI Express Ports.
+
+
+   pcie.0 bus
+   ----------------------------------------------------------------------------------
+        |                 |                                    |
+   -------------    -------------                        -------------
+   | Root Port |    | Root Port |                        | Root Port |
+   ------------     -------------                        -------------
+         |                            -------------------------|------------------------
+    ------------                      |                 -----------------              |
+    | PCIe Dev |                      |    PCI Express  | Upstream Port |              |
+    ------------                      |      Switch     -----------------              |
+                                      |                  |            |                |
+                                      |    -------------------    -------------------  |
+                                      |    | Downstream Port |    | Downstream Port |  |
+                                      |    -------------------    -------------------  |
+                                      -------------|-----------------------|------------
+                                             ------------
+                                             | PCIe Dev |
+                                             ------------
+
+2.2.1 Plugging a PCI Express device into a PCI Express Root Port:
+          -device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
+          -device <dev>,bus=root_port1
+2.2.2 Using multi-function PCI Express Root Ports:
+      -device ioh3420,id=root_port1,multifunction=on,chassis=x,slot=y[,bus=pcie.0][,addr=z.0] \
+      -device ioh3420,id=root_port2,chassis=x1,slot=y1[,bus=pcie.0][,addr=z.1] \
+      -device ioh3420,id=root_port3,chassis=x2,slot=y2[,bus=pcie.0][,addr=z.2] \
+2.2.2 Plugging a PCI Express device into a Switch:
+      -device ioh3420,id=root_port1,chassis=x,slot=y[,bus=pcie.0][,addr=z]  \
+      -device x3130-upstream,id=upstream_port1,bus=root_port1[,addr=x]          \
+      -device xio3130-downstream,id=downstream_port1,bus=upstream_port1,chassis=x1,slot=y1[,addr=z1]] \
+      -device <dev>,bus=downstream_port1
+
+Notes:
+  - (slot, chassis) pair is mandatory and must be
+     unique for each PCI Express Root Port.
+  - 'addr' parameter can be 0 for all the examples above.
+
+
+2.3 PCI only hierarchy
+======================
+Legacy PCI devices can be plugged into pcie.0 as Integrated Endpoints,
+but, as mentioned in section 5, doing so means the legacy PCI
+device in question will be incapable of hot-unplugging.
+Besides that use DMI-PCI Bridges (i82801b11-bridge) in combination
+with PCI-PCI Bridges (pci-bridge) to start PCI hierarchies.
+
+Prefer flat hierarchies. For most scenarios a single DMI-PCI Bridge
+(having 32 slots) and several PCI-PCI Bridges attached to it
+(each supporting also 32 slots) will support hundreds of legacy devices.
+The recommendation is to populate one PCI-PCI Bridge under the DMI-PCI Bridge
+until is full and then plug a new PCI-PCI Bridge...
+
+   pcie.0 bus
+   ----------------------------------------------
+        |                            |
+   -----------               ------------------
+   | PCI Dev |               | DMI-PCI BRIDGE |
+   ----------                ------------------
+                               |            |
+                  ------------------    ------------------
+                  | PCI-PCI Bridge |    | PCI-PCI Bridge |   ...
+                  ------------------    ------------------
+                                         |           |
+                                  -----------     -----------
+                                  | PCI Dev |     | PCI Dev |
+                                  -----------     -----------
+
+2.3.1 To plug a PCI device into pcie.0 as an Integrated Endpoint use:
+      -device <dev>[,bus=pcie.0]
+2.3.2 Plugging a PCI device into a PCI-PCI Bridge:
+      -device i82801b11-bridge,id=dmi_pci_bridge1[,bus=pcie.0]                        \
+      -device pci-bridge,id=pci_bridge1,bus=dmi_pci_bridge1[,chassis_nr=x][,addr=y]   \
+      -device <dev>,bus=pci_bridge1[,addr=x]
+      Note that 'addr' cannot be 0 unless shpc=off parameter is passed to
+      the PCI Bridge.
+
+3. IO space issues
+===================
+The PCI Express Root Ports and PCI Express Downstream ports are seen by
+Firmware/Guest OS as PCI-PCI Bridges. As required by the PCI spec, each
+such Port should be reserved a 4K IO range for, even though only one
+(multifunction) device can be plugged into each Port. This results in
+poor IO space utilization.
+
+The firmware used by QEMU (SeaBIOS/OVMF) may try further optimizations
+by not allocating IO space for each PCI Express Root / PCI Express
+Downstream port if:
+    (1) the port is empty, or
+    (2) the device behind the port has no IO BARs.
+
+The IO space is very limited, to 65536 byte-wide IO ports, and may even be
+fragmented by fixed IO ports owned by platform devices resulting in at most
+10 PCI Express Root Ports or PCI Express Downstream Ports per system
+if devices with IO BARs are used in the PCI Express hierarchy. Using the
+proposed device placing strategy solves this issue by using only
+PCI Express devices within PCI Express hierarchy.
+
+The PCI Express spec requires that PCI Express devices work properly
+without using IO ports. The PCI hierarchy has no such limitations.
+
+
+4. Bus numbers issues
+======================
+Each PCI domain can have up to only 256 buses and the QEMU PCI Express
+machines do not support multiple PCI domains even if extra Root
+Complexes (pxb-pcie) are used.
+
+Each element of the PCI Express hierarchy (Root Complexes,
+PCI Express Root Ports, PCI Express Downstream/Upstream ports)
+uses one bus number. Since only one (multifunction) device
+can be attached to a PCI Express Root Port or PCI Express Downstream
+Port it is advised to plan in advance for the expected number of
+devices to prevent bus number starvation.
+
+Avoiding PCI Express Switches (and thereby striving for a 'flatter' PCI
+Express hierarchy) enables the hierarchy to not spend bus numbers on
+Upstream Ports.
+
+The bus_nr properties of the pxb-pcie devices partition the 0..255 bus
+number space. All bus numbers assigned to the buses recursively behind a
+given pxb-pcie device's root bus must fit between the bus_nr property of
+that pxb-pcie device, and the lowest of the higher bus_nr properties
+that the command line sets for other pxb-pcie devices.
+
+
+5. Hot-plug
+============
+The PCI Express root buses (pcie.0 and the buses exposed by pxb-pcie devices)
+do not support hot-plug, so any devices plugged into Root Complexes
+cannot be hot-plugged/hot-unplugged:
+    (1) PCI Express Integrated Endpoints
+    (2) PCI Express Root Ports
+    (3) DMI-PCI Bridges
+    (4) pxb-pcie
+
+Be aware that PCI Express Downstream Ports can't be hot-plugged into
+an existing PCI Express Upstream Port.
+
+PCI devices can be hot-plugged into PCI-PCI Bridges. The PCI hot-plug is ACPI
+based and can work side by side with the PCI Express native hot-plug.
+
+PCI Express devices can be natively hot-plugged/hot-unplugged into/from
+PCI Express Root Ports (and PCI Express Downstream Ports).
+
+5.1 Planning for hot-plug:
+    (1) PCI hierarchy
+        Leave enough PCI-PCI Bridge slots empty or add one
+        or more empty PCI-PCI Bridges to the DMI-PCI Bridge.
+
+        For each such PCI-PCI Bridge the Guest Firmware is expected to reserve
+        4K IO space and 2M MMIO range to be used for all devices behind it.
+
+        Because of the hard IO limit of around 10 PCI Bridges (~ 40K space)
+        per system don't use more than 9 PCI-PCI Bridges, leaving 4K for the
+        Integrated Endpoints. (The PCI Express Hierarchy needs no IO space).
+
+    (2) PCI Express hierarchy:
+        Leave enough PCI Express Root Ports empty. Use multifunction
+        PCI Express Root Ports (up to 8 ports per pcie.0 slot)
+        on the Root Complex(es), for keeping the
+        hierarchy as flat as possible, thereby saving PCI bus numbers.
+        Don't use PCI Express Switches if you don't have
+        to, each one of those uses an extra PCI bus (for its Upstream Port)
+        that could be put to better use with another Root Port or Downstream
+        Port, which may come handy for hot-plugging another device.
+
+
+5.3 Hot-plug example:
+Using HMP: (add -monitor stdio to QEMU command line)
+  device_add <dev>,id=<id>,bus=<PCI Express Root Port Id/PCI Express Downstream Port Id/PCI-PCI Bridge Id/>
+
+
+6. Device assignment
+====================
+Host devices are mostly PCI Express and should be plugged only into
+PCI Express Root Ports or PCI Express Downstream Ports.
+PCI-PCI Bridge slots can be used for legacy PCI host devices.
+
+6.1 How to detect if a device is PCI Express:
+  > lspci -s 03:00.0 -v (as root)
+
+    03:00.0 Network controller: Intel Corporation Wireless 7260 (rev 83)
+    Subsystem: Intel Corporation Dual Band Wireless-AC 7260
+    Flags: bus master, fast devsel, latency 0, IRQ 50
+    Memory at f0400000 (64-bit, non-prefetchable) [size=8K]
+    Capabilities: [c8] Power Management version 3
+    Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
+    Capabilities: [40] Express Endpoint, MSI 00
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    Capabilities: [100] Advanced Error Reporting
+    Capabilities: [140] Device Serial Number 7c-7a-91-ff-ff-90-db-20
+    Capabilities: [14c] Latency Tolerance Reporting
+    Capabilities: [154] Vendor Specific Information: ID=cafe Rev=1 Len=014 
+
+If you can see the "Express Endpoint" capability in the
+output, then the device is indeed PCI Express.
+
+
+7. Virtio devices
+=================
+Virtio devices plugged into the PCI hierarchy or as Integrated Endpoints
+will remain PCI and have transitional behaviour as default.
+Transitional virtio devices work in both IO and MMIO modes depending on
+the guest support. The Guest firmware will assign both IO and MMIO resources
+to transitional virtio devices.
+
+Virtio devices plugged into PCI Express ports are PCI Express devices and
+have "1.0" behavior by default without IO support.
+In both cases disable-legacy and disable-modern properties can be used
+to override the behaviour.
+
+Note that setting disable-legacy=off will enable legacy mode (enabling
+legacy behavior) for PCI Express virtio devices causing them to
+require IO space, which, given the limited available IO space, may quickly
+lead to resource exhaustion, and is therefore strongly discouraged.
+
+
+8. Conclusion
+==============
+The proposal offers a usage model that is easy to understand and follow
+and at the same time overcomes the PCI Express architecture limitations.
diff --git a/docs/qapi-code-gen.txt b/docs/qapi-code-gen.txt
index de298dcaec..2841c5144a 100644
--- a/docs/qapi-code-gen.txt
+++ b/docs/qapi-code-gen.txt
@@ -964,9 +964,9 @@ Example:
 
 Used to generate the marshaling/dispatch functions for the commands
 defined in the schema. The generated code implements
-qmp_marshal_COMMAND() (mentioned in qmp-commands.hx, and registered
-automatically), and declares qmp_COMMAND() that the user must
-implement.  The following files are generated:
+qmp_marshal_COMMAND() (registered automatically), and declares
+qmp_COMMAND() that the user must implement.  The following files are
+generated:
 
 $(prefix)qmp-marshal.c: command marshal/dispatch functions for each
                         QMP command defined in the schema. Functions
@@ -1005,7 +1005,7 @@ Example:
         Error *err = NULL;
         Visitor *v;
 
-        v = qmp_output_visitor_new(ret_out);
+        v = qobject_output_visitor_new(ret_out);
         visit_type_UserDefOne(v, "unused", &ret_in, &err);
         if (!err) {
             visit_complete(v, ret_out);
@@ -1024,7 +1024,7 @@ Example:
         Visitor *v;
         UserDefOneList *arg1 = NULL;
 
-        v = qmp_input_visitor_new(QOBJECT(args), true);
+        v = qobject_input_visitor_new(QOBJECT(args), true);
         visit_start_struct(v, NULL, NULL, 0, &err);
         if (err) {
             goto out;
diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt
index 5bb06072d3..1fdd6f9ce7 100644
--- a/docs/qcow2-cache.txt
+++ b/docs/qcow2-cache.txt
@@ -160,5 +160,6 @@ If unset, the default value for this parameter is 0 and it disables
 this feature.
 
 Note that this functionality currently relies on the MADV_DONTNEED
-argument for madvise() to actually free the memory, so it is not
-useful in systems that don't follow that behavior.
+argument for madvise() to actually free the memory. This is a
+Linux-specific feature, so cache-clean-interval is not supported in
+other systems.
diff --git a/qmp-commands.hx b/docs/qmp-commands.txt
similarity index 79%
rename from qmp-commands.hx
rename to docs/qmp-commands.txt
index 6866264e64..abf210a596 100644
--- a/qmp-commands.hx
+++ b/docs/qmp-commands.txt
@@ -1,8 +1,3 @@
-HXCOMM QMP dispatch table and documentation
-HXCOMM Text between SQMP and EQMP is copied to the QMP documentation file and
-HXCOMM does not show up in the other formats.
-
-SQMP
                         QMP Supported Commands
                         ----------------------
 
@@ -25,7 +20,7 @@ Also, the following notation is used to denote data flow:
 -> data issued by the Client
 <- Server data response
 
-Please, refer to the QMP specification (QMP/qmp-spec.txt) for detailed
+Please, refer to the QMP specification (docs/qmp-spec.txt) for detailed
 information on the Server command and response formats.
 
 NOTE: This document is temporary and will be replaced soon.
@@ -58,15 +53,6 @@ If you're planning to adopt QMP, please observe the following:
 Server's responses in the examples below are always a success response, please
 refer to the QMP specification for more details on error responses.
 
-EQMP
-
-    {
-        .name       = "quit",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_quit,
-    },
-
-SQMP
 quit
 ----
 
@@ -79,41 +65,25 @@ Example:
 -> { "execute": "quit" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "eject",
-        .args_type  = "force:-f,device:B",
-        .mhandler.cmd_new = qmp_marshal_eject,
-    },
-
-SQMP
 eject
 -----
 
 Eject a removable medium.
 
-Arguments: 
+Arguments:
 
-- force: force ejection (json-bool, optional)
-- device: device name (json-string)
+- "force": force ejection (json-bool, optional)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 
 Example:
 
--> { "execute": "eject", "arguments": { "device": "ide1-cd0" } }
+-> { "execute": "eject", "arguments": { "id": "ide0-1-0" } }
 <- { "return": {} }
 
 Note: The "force" argument defaults to false.
 
-EQMP
-
-    {
-        .name       = "change",
-        .args_type  = "device:B,target:F,arg:s?",
-        .mhandler.cmd_new = qmp_marshal_change,
-    },
-
-SQMP
 change
 ------
 
@@ -141,15 +111,6 @@ Examples:
                             "arg": "foobar1" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "screendump",
-        .args_type  = "filename:F",
-        .mhandler.cmd_new = qmp_marshal_screendump,
-    },
-
-SQMP
 screendump
 ----------
 
@@ -164,15 +125,6 @@ Example:
 -> { "execute": "screendump", "arguments": { "filename": "/tmp/image" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "stop",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_stop,
-    },
-
-SQMP
 stop
 ----
 
@@ -185,15 +137,6 @@ Example:
 -> { "execute": "stop" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "cont",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_cont,
-    },
-
-SQMP
 cont
 ----
 
@@ -206,15 +149,6 @@ Example:
 -> { "execute": "cont" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "system_wakeup",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_system_wakeup,
-    },
-
-SQMP
 system_wakeup
 -------------
 
@@ -227,15 +161,6 @@ Example:
 -> { "execute": "system_wakeup" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "system_reset",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_system_reset,
-    },
-
-SQMP
 system_reset
 ------------
 
@@ -248,15 +173,6 @@ Example:
 -> { "execute": "system_reset" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "system_powerdown",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_system_powerdown,
-    },
-
-SQMP
 system_powerdown
 ----------------
 
@@ -269,17 +185,6 @@ Example:
 -> { "execute": "system_powerdown" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "device_add",
-        .args_type  = "device:O",
-        .params     = "driver[,prop=value][,...]",
-        .help       = "add device, like -device on the command line",
-        .mhandler.cmd_new = qmp_device_add,
-    },
-
-SQMP
 device_add
 ----------
 
@@ -305,15 +210,6 @@ Notes:
 (2) It's possible to list device properties by running QEMU with the
     "-device DEVICE,\?" command-line argument, where DEVICE is the device's name
 
-EQMP
-
-    {
-        .name       = "device_del",
-        .args_type  = "id:s",
-        .mhandler.cmd_new = qmp_marshal_device_del,
-    },
-
-SQMP
 device_del
 ----------
 
@@ -333,15 +229,6 @@ Example:
 -> { "execute": "device_del", "arguments": { "id": "/machine/peripheral-anon/device[0]" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "send-key",
-        .args_type  = "keys:q,hold-time:i?",
-        .mhandler.cmd_new = qmp_marshal_send_key,
-    },
-
-SQMP
 send-key
 ----------
 
@@ -364,15 +251,6 @@ Example:
                               { "type": "qcode", "data": "delete" } ] } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "cpu",
-        .args_type  = "index:i",
-        .mhandler.cmd_new = qmp_marshal_cpu,
-    },
-
-SQMP
 cpu
 ---
 
@@ -389,15 +267,6 @@ Example:
 
 Note: CPUs' indexes are obtained with the 'query-cpus' command.
 
-EQMP
-
-    {
-        .name       = "cpu-add",
-        .args_type  = "id:i",
-        .mhandler.cmd_new = qmp_marshal_cpu_add,
-    },
-
-SQMP
 cpu-add
 -------
 
@@ -412,15 +281,6 @@ Example:
 -> { "execute": "cpu-add", "arguments": { "id": 2 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "memsave",
-        .args_type  = "val:l,size:i,filename:s,cpu:i?",
-        .mhandler.cmd_new = qmp_marshal_memsave,
-    },
-
-SQMP
 memsave
 -------
 
@@ -441,15 +301,6 @@ Example:
                             "filename": "/tmp/virtual-mem-dump" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "pmemsave",
-        .args_type  = "val:l,size:i,filename:s",
-        .mhandler.cmd_new = qmp_marshal_pmemsave,
-    },
-
-SQMP
 pmemsave
 --------
 
@@ -469,15 +320,6 @@ Example:
                             "filename": "/tmp/physical-mem-dump" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "inject-nmi",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_inject_nmi,
-    },
-
-SQMP
 inject-nmi
 ----------
 
@@ -492,15 +334,6 @@ Example:
 
 Note: inject-nmi fails when the guest doesn't support injecting.
 
-EQMP
-
-    {
-        .name       = "ringbuf-write",
-        .args_type  = "device:s,data:s,format:s?",
-        .mhandler.cmd_new = qmp_marshal_ringbuf_write,
-    },
-
-SQMP
 ringbuf-write
 -------------
 
@@ -521,15 +354,6 @@ Example:
                                "format": "utf8" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "ringbuf-read",
-        .args_type  = "device:s,size:i,format:s?",
-        .mhandler.cmd_new = qmp_marshal_ringbuf_read,
-    },
-
-SQMP
 ringbuf-read
 -------------
 
@@ -557,15 +381,6 @@ Example:
                                "format": "utf8" } }
 <- {"return": "abcdefgh"}
 
-EQMP
-
-    {
-        .name       = "xen-save-devices-state",
-        .args_type  = "filename:F",
-    .mhandler.cmd_new = qmp_marshal_xen_save_devices_state,
-    },
-
-SQMP
 xen-save-devices-state
 -------
 
@@ -584,15 +399,6 @@ Example:
      "arguments": { "filename": "/tmp/save" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "xen-load-devices-state",
-        .args_type  = "filename:F",
-        .mhandler.cmd_new = qmp_marshal_xen_load_devices_state,
-    },
-
-SQMP
 xen-load-devices-state
 ----------------------
 
@@ -611,15 +417,6 @@ Example:
      "arguments": { "filename": "/tmp/resume" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "xen-set-global-dirty-log",
-        .args_type  = "enable:b",
-        .mhandler.cmd_new = qmp_marshal_xen_set_global_dirty_log,
-    },
-
-SQMP
 xen-set-global-dirty-log
 -------
 
@@ -635,15 +432,6 @@ Example:
      "arguments": { "enable": true } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .mhandler.cmd_new = qmp_marshal_migrate,
-    },
-
-SQMP
 migrate
 -------
 
@@ -668,15 +456,6 @@ Notes:
 (3) The user Monitor's "detach" argument is invalid in QMP and should not
     be used
 
-EQMP
-
-    {
-        .name       = "migrate_cancel",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_migrate_cancel,
-    },
-
-SQMP
 migrate_cancel
 --------------
 
@@ -689,15 +468,6 @@ Example:
 -> { "execute": "migrate_cancel" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "migrate-incoming",
-        .args_type  = "uri:s",
-        .mhandler.cmd_new = qmp_marshal_migrate_incoming,
-    },
-
-SQMP
 migrate-incoming
 ----------------
 
@@ -718,14 +488,6 @@ Notes:
     be used
 (2) The uri format is the same as for -incoming
 
-EQMP
-    {
-        .name       = "migrate-set-cache-size",
-        .args_type  = "value:o",
-        .mhandler.cmd_new = qmp_marshal_migrate_set_cache_size,
-    },
-
-SQMP
 migrate-set-cache-size
 ----------------------
 
@@ -741,14 +503,6 @@ Example:
 -> { "execute": "migrate-set-cache-size", "arguments": { "value": 536870912 } }
 <- { "return": {} }
 
-EQMP
-    {
-        .name       = "migrate-start-postcopy",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_migrate_start_postcopy,
-    },
-
-SQMP
 migrate-start-postcopy
 ----------------------
 
@@ -759,15 +513,6 @@ Example:
 -> { "execute": "migrate-start-postcopy" }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "query-migrate-cache-size",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_migrate_cache_size,
-    },
-
-SQMP
 query-migrate-cache-size
 ------------------------
 
@@ -781,15 +526,6 @@ Example:
 -> { "execute": "query-migrate-cache-size" }
 <- { "return": 67108864 }
 
-EQMP
-
-    {
-        .name       = "migrate_set_speed",
-        .args_type  = "value:o",
-        .mhandler.cmd_new = qmp_marshal_migrate_set_speed,
-    },
-
-SQMP
 migrate_set_speed
 -----------------
 
@@ -804,15 +540,6 @@ Example:
 -> { "execute": "migrate_set_speed", "arguments": { "value": 1024 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "migrate_set_downtime",
-        .args_type  = "value:T",
-        .mhandler.cmd_new = qmp_marshal_migrate_set_downtime,
-    },
-
-SQMP
 migrate_set_downtime
 --------------------
 
@@ -827,17 +554,16 @@ Example:
 -> { "execute": "migrate_set_downtime", "arguments": { "value": 0.1 } }
 <- { "return": {} }
 
-EQMP
+x-colo-lost-heartbeat
+--------------------
 
-    {
-        .name       = "client_migrate_info",
-        .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
-        .params     = "protocol hostname port tls-port cert-subject",
-        .help       = "set migration information for remote display",
-        .mhandler.cmd_new = qmp_marshal_client_migrate_info,
-    },
+Tell COLO that heartbeat is lost, a failover or takeover is needed.
+
+Example:
+
+-> { "execute": "x-colo-lost-heartbeat" }
+<- { "return": {} }
 
-SQMP
 client_migrate_info
 -------------------
 
@@ -861,17 +587,6 @@ Example:
                     "port": 1234 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "dump-guest-memory",
-        .args_type  = "paging:b,protocol:s,detach:b?,begin:i?,end:i?,format:s?",
-        .params     = "-p protocol [-d] [begin] [length] [format]",
-        .help       = "dump guest memory to file",
-        .mhandler.cmd_new = qmp_marshal_dump_guest_memory,
-    },
-
-SQMP
 dump
 
 
@@ -902,15 +617,6 @@ Notes:
 
 (1) All boolean arguments default to false
 
-EQMP
-
-    {
-        .name       = "query-dump-guest-memory-capability",
-        .args_type  = "",
-    .mhandler.cmd_new = qmp_marshal_query_dump_guest_memory_capability,
-    },
-
-SQMP
 query-dump-guest-memory-capability
 ----------
 
@@ -922,17 +628,6 @@ Example:
 <- { "return": { "formats":
                     ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] }
 
-EQMP
-
-    {
-        .name       = "query-dump",
-        .args_type  = "",
-        .params     = "",
-        .help       = "query background dump status",
-        .mhandler.cmd_new = qmp_marshal_query_dump,
-    },
-
-SQMP
 query-dump
 ----------
 
@@ -946,17 +641,6 @@ Example:
 <- { "return": { "status": "active", "completed": 1024000,
                  "total": 2048000 } }
 
-EQMP
-
-#if defined TARGET_S390X
-    {
-        .name       = "dump-skeys",
-        .args_type  = "filename:F",
-        .mhandler.cmd_new = qmp_marshal_dump_skeys,
-    },
-#endif
-
-SQMP
 dump-skeys
 ----------
 
@@ -971,15 +655,6 @@ Example:
 -> { "execute": "dump-skeys", "arguments": { "filename": "/tmp/skeys" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "netdev_add",
-        .args_type  = "netdev:O",
-        .mhandler.cmd_new = qmp_netdev_add,
-    },
-
-SQMP
 netdev_add
 ----------
 
@@ -1002,15 +677,6 @@ Note: The supported device options are the same ones supported by the '-netdev'
       command-line argument, which are listed in the '-help' output or QEMU's
       manual
 
-EQMP
-
-    {
-        .name       = "netdev_del",
-        .args_type  = "id:s",
-        .mhandler.cmd_new = qmp_marshal_netdev_del,
-    },
-
-SQMP
 netdev_del
 ----------
 
@@ -1026,15 +692,6 @@ Example:
 <- { "return": {} }
 
 
-EQMP
-
-    {
-        .name       = "object-add",
-        .args_type  = "qom-type:s,id:s,props:q?",
-        .mhandler.cmd_new = qmp_marshal_object_add,
-    },
-
-SQMP
 object-add
 ----------
 
@@ -1052,15 +709,6 @@ Example:
      "props": { "filename": "/dev/hwrng" } } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "object-del",
-        .args_type  = "id:s",
-        .mhandler.cmd_new = qmp_marshal_object_del,
-    },
-
-SQMP
 object-del
 ----------
 
@@ -1076,16 +724,6 @@ Example:
 <- { "return": {} }
 
 
-EQMP
-
-
-    {
-        .name       = "block_resize",
-        .args_type  = "device:s?,node-name:s?,size:o",
-        .mhandler.cmd_new = qmp_marshal_block_resize,
-    },
-
-SQMP
 block_resize
 ------------
 
@@ -1102,15 +740,6 @@ Example:
 -> { "execute": "block_resize", "arguments": { "device": "scratch", "size": 1073741824 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-stream",
-        .args_type  = "job-id:s?,device:B,base:s?,speed:o?,backing-file:s?,on-error:s?",
-        .mhandler.cmd_new = qmp_marshal_block_stream,
-    },
-
-SQMP
 block-stream
 ------------
 
@@ -1120,9 +749,12 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": The device's ID, must be unique (json-string)
-- "base": The file name of the backing image above which copying starts
-          (json-string, optional)
+- "device": The device name or node-name of a root node (json-string)
+- "base": The file name of the backing image above which copying starts.
+          It cannot be set if 'base-node' is also set (json-string, optional)
+- "base-node": the node name of the backing image above which copying starts.
+               It cannot be set if 'base' is also set.
+               (json-string, optional) (Since 2.8)
 - "backing-file": The backing file string to write into the active layer. This
                   filename is not validated.
 
@@ -1147,15 +779,6 @@ Example:
                                                "base": "/tmp/master.qcow2" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-commit",
-        .args_type  = "job-id:s?,device:B,base:s?,top:s?,backing-file:s?,speed:o?",
-        .mhandler.cmd_new = qmp_marshal_block_commit,
-    },
-
-SQMP
 block-commit
 ------------
 
@@ -1166,7 +789,7 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": The device's ID, must be unique (json-string)
+- "device": The device name or node-name of a root node (json-string)
 - "base": The file name of the backing image to write data into.
           If not specified, this is the deepest backing image
           (json-string, optional)
@@ -1212,16 +835,6 @@ Example:
                                               "top": "/tmp/snap1.qcow2" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "drive-backup",
-        .args_type  = "job-id:s?,sync:s,device:B,target:s,speed:i?,mode:s?,"
-                      "format:s?,bitmap:s?,on-source-error:s?,on-target-error:s?",
-        .mhandler.cmd_new = qmp_marshal_drive_backup,
-    },
-
-SQMP
 drive-backup
 ------------
 
@@ -1235,7 +848,7 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": the name of the device which should be copied.
+- "device": the device name or node-name of a root node which should be copied.
             (json-string)
 - "target": the target of the new image. If the file exists, or if it is a
             device, the existing file/device will be used as the new
@@ -1253,6 +866,8 @@ Arguments:
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')
 - "speed": the maximum speed, in bytes per second (json-int, optional)
+- "compress": true to compress data, if the target format supports it.
+              (json-bool, optional, default false)
 - "on-source-error": the action to take on an error on the source, default
                      'report'.  'stop' and 'enospc' can only be used
                      if the block device supports io-status.
@@ -1268,16 +883,6 @@ Example:
                                                "target": "backup.img" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-backup",
-        .args_type  = "job-id:s?,sync:s,device:B,target:B,speed:i?,"
-                      "on-source-error:s?,on-target-error:s?",
-        .mhandler.cmd_new = qmp_marshal_blockdev_backup,
-    },
-
-SQMP
 blockdev-backup
 ---------------
 
@@ -1288,7 +893,7 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": the name of the device which should be copied.
+- "device": the device name or node-name of a root node which should be copied.
             (json-string)
 - "target": the name of the backup target device. (json-string)
 - "sync": what parts of the disk image should be copied to the destination;
@@ -1296,6 +901,8 @@ Arguments:
           sectors allocated in the topmost image, or "none" to only replicate
           new I/O (MirrorSyncMode).
 - "speed": the maximum speed, in bytes per second (json-int, optional)
+- "compress": true to compress data, if the target format supports it.
+              (json-bool, optional, default false)
 - "on-source-error": the action to take on an error on the source, default
                      'report'.  'stop' and 'enospc' can only be used
                      if the block device supports io-status.
@@ -1311,41 +918,6 @@ Example:
                                                   "target": "tgt-id" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-job-set-speed",
-        .args_type  = "device:B,speed:o",
-        .mhandler.cmd_new = qmp_marshal_block_job_set_speed,
-    },
-
-    {
-        .name       = "block-job-cancel",
-        .args_type  = "device:B,force:b?",
-        .mhandler.cmd_new = qmp_marshal_block_job_cancel,
-    },
-    {
-        .name       = "block-job-pause",
-        .args_type  = "device:B",
-        .mhandler.cmd_new = qmp_marshal_block_job_pause,
-    },
-    {
-        .name       = "block-job-resume",
-        .args_type  = "device:B",
-        .mhandler.cmd_new = qmp_marshal_block_job_resume,
-    },
-    {
-        .name       = "block-job-complete",
-        .args_type  = "device:B",
-        .mhandler.cmd_new = qmp_marshal_block_job_complete,
-    },
-    {
-        .name       = "transaction",
-        .args_type  = "actions:q,properties:q?",
-        .mhandler.cmd_new = qmp_marshal_transaction,
-    },
-
-SQMP
 transaction
 -----------
 
@@ -1407,7 +979,8 @@ actions array:
       - "mode": whether and how QEMU should create the snapshot file
         (NewImageMode, optional, default "absolute-paths")
       When "type" is "blockdev-snapshot-internal-sync":
-      - "device": device name to snapshot (json-string)
+      - "device": the device name or node-name of a root node to snapshot
+                  (json-string)
       - "name": name of the new snapshot (json-string)
 
 Example:
@@ -1431,16 +1004,6 @@ Example:
                                          "name": "snapshot0" } } ] } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-dirty-bitmap-add",
-        .args_type  = "node:B,name:s,granularity:i?",
-        .mhandler.cmd_new = qmp_marshal_block_dirty_bitmap_add,
-    },
-
-SQMP
-
 block-dirty-bitmap-add
 ----------------------
 Since 2.4
@@ -1459,16 +1022,6 @@ Example:
                                                    "name": "bitmap0" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-dirty-bitmap-remove",
-        .args_type  = "node:B,name:s",
-        .mhandler.cmd_new = qmp_marshal_block_dirty_bitmap_remove,
-    },
-
-SQMP
-
 block-dirty-bitmap-remove
 -------------------------
 Since 2.4
@@ -1487,16 +1040,6 @@ Example:
                                                       "name": "bitmap0" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-dirty-bitmap-clear",
-        .args_type  = "node:B,name:s",
-        .mhandler.cmd_new = qmp_marshal_block_dirty_bitmap_clear,
-    },
-
-SQMP
-
 block-dirty-bitmap-clear
 ------------------------
 Since 2.4
@@ -1516,15 +1059,6 @@ Example:
                                                            "name": "bitmap0" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-snapshot-sync",
-        .args_type  = "device:s?,node-name:s?,snapshot-file:s,snapshot-node-name:s?,format:s?,mode:s?",
-        .mhandler.cmd_new = qmp_marshal_blockdev_snapshot_sync,
-    },
-
-SQMP
 blockdev-snapshot-sync
 ----------------------
 
@@ -1552,15 +1086,6 @@ Example:
                                                         "format": "qcow2" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-snapshot",
-        .args_type  = "node:s,overlay:s",
-        .mhandler.cmd_new = qmp_marshal_blockdev_snapshot,
-    },
-
-SQMP
 blockdev-snapshot
 -----------------
 Since 2.5
@@ -1578,11 +1103,11 @@ Arguments:
 Example:
 
 -> { "execute": "blockdev-add",
-                "arguments": { "options": { "driver": "qcow2",
-                                            "node-name": "node1534",
-                                            "file": { "driver": "file",
-                                                      "filename": "hd1.qcow2" },
-                                            "backing": "" } } }
+                "arguments": { "driver": "qcow2",
+                               "node-name": "node1534",
+                               "file": { "driver": "file",
+                                         "filename": "hd1.qcow2" },
+                               "backing": "" } }
 
 <- { "return": {} }
 
@@ -1590,15 +1115,6 @@ Example:
                                                     "overlay": "node1534" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-snapshot-internal-sync",
-        .args_type  = "device:B,name:s",
-        .mhandler.cmd_new = qmp_marshal_blockdev_snapshot_internal_sync,
-    },
-
-SQMP
 blockdev-snapshot-internal-sync
 -------------------------------
 
@@ -1608,7 +1124,8 @@ name already exists, the operation will fail.
 
 Arguments:
 
-- "device": device name to snapshot (json-string)
+- "device": the device name or node-name of a root node to snapshot
+            (json-string)
 - "name": name of the new snapshot (json-string)
 
 Example:
@@ -1619,16 +1136,6 @@ Example:
    }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-snapshot-delete-internal-sync",
-        .args_type  = "device:B,id:s?,name:s?",
-        .mhandler.cmd_new =
-                      qmp_marshal_blockdev_snapshot_delete_internal_sync,
-    },
-
-SQMP
 blockdev-snapshot-delete-internal-sync
 --------------------------------------
 
@@ -1639,7 +1146,7 @@ fail.
 
 Arguments:
 
-- "device": device name (json-string)
+- "device": the device name or node-name of a root node (json-string)
 - "id": ID of the snapshot (json-string, optional)
 - "name": name of the snapshot (json-string, optional)
 
@@ -1660,19 +1167,6 @@ Example:
      }
    }
 
-EQMP
-
-    {
-        .name       = "drive-mirror",
-        .args_type  = "job-id:s?,sync:s,device:B,target:s,speed:i?,mode:s?,"
-                      "format:s?,node-name:s?,replaces:s?,"
-                      "on-source-error:s?,on-target-error:s?,"
-                      "unmap:b?,"
-                      "granularity:i?,buf-size:i?",
-        .mhandler.cmd_new = qmp_marshal_drive_mirror,
-    },
-
-SQMP
 drive-mirror
 ------------
 
@@ -1687,7 +1181,8 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": device name to operate on (json-string)
+- "device": the device name or node-name of a root node whose writes should be
+            mirrored. (json-string)
 - "target": name of new image file (json-string)
 - "format": format of new image (json-string, optional)
 - "node-name": the name of the new block driver state in the node graph
@@ -1726,17 +1221,6 @@ Example:
                                                "format": "qcow2" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-mirror",
-        .args_type  = "job-id:s?,sync:s,device:B,target:B,replaces:s?,speed:i?,"
-                      "on-source-error:s?,on-target-error:s?,"
-                      "granularity:i?,buf-size:i?",
-        .mhandler.cmd_new = qmp_marshal_blockdev_mirror,
-    },
-
-SQMP
 blockdev-mirror
 ------------
 
@@ -1747,7 +1231,8 @@ Arguments:
 
 - "job-id": Identifier for the newly-created block job. If omitted,
             the device name will be used. (json-string, optional)
-- "device": device name to operate on (json-string)
+- "device": The device name or node-name of a root node whose writes should be
+            mirrored (json-string)
 - "target": device name to mirror to (json-string)
 - "replaces": the block driver node name to replace when finished
               (json-string, optional)
@@ -1777,14 +1262,6 @@ Example:
                                                   "sync": "full" } }
 <- { "return": {} }
 
-EQMP
-    {
-        .name       = "change-backing-file",
-        .args_type  = "device:s,image-node-name:s,backing-file:s",
-        .mhandler.cmd_new = qmp_marshal_change_backing_file,
-    },
-
-SQMP
 change-backing-file
 -------------------
 Since: 2.1
@@ -1803,7 +1280,8 @@ Arguments:
                         "device".
                         (json-string, optional)
 
-- "device":             The name of the device.
+- "device":             The device name or node-name of the root node that owns
+                        image-node-name.
                         (json-string)
 
 - "backing-file":       The string to write as the backing file.  This string is
@@ -1815,15 +1293,6 @@ Arguments:
 Returns: Nothing on success
          If "device" does not exist or cannot be determined, DeviceNotFound
 
-EQMP
-
-    {
-        .name       = "balloon",
-        .args_type  = "value:M",
-        .mhandler.cmd_new = qmp_marshal_balloon,
-    },
-
-SQMP
 balloon
 -------
 
@@ -1838,15 +1307,6 @@ Example:
 -> { "execute": "balloon", "arguments": { "value": 536870912 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "set_link",
-        .args_type  = "name:s,up:b",
-        .mhandler.cmd_new = qmp_marshal_set_link,
-    },
-
-SQMP
 set_link
 --------
 
@@ -1862,17 +1322,6 @@ Example:
 -> { "execute": "set_link", "arguments": { "name": "e1000.0", "up": false } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "getfd",
-        .args_type  = "fdname:s",
-        .params     = "getfd name",
-        .help       = "receive a file descriptor via SCM rights and assign it a name",
-        .mhandler.cmd_new = qmp_marshal_getfd,
-    },
-
-SQMP
 getfd
 -----
 
@@ -1895,17 +1344,6 @@ Notes:
 (2) The 'closefd' command can be used to explicitly close the file
     descriptor when it is no longer needed.
 
-EQMP
-
-    {
-        .name       = "closefd",
-        .args_type  = "fdname:s",
-        .params     = "closefd name",
-        .help       = "close a file descriptor previously passed via SCM rights",
-        .mhandler.cmd_new = qmp_marshal_closefd,
-    },
-
-SQMP
 closefd
 -------
 
@@ -1920,17 +1358,6 @@ Example:
 -> { "execute": "closefd", "arguments": { "fdname": "fd1" } }
 <- { "return": {} }
 
-EQMP
-
-     {
-        .name       = "add-fd",
-        .args_type  = "fdset-id:i?,opaque:s?",
-        .params     = "add-fd fdset-id opaque",
-        .help       = "Add a file descriptor, that was passed via SCM rights, to an fd set",
-        .mhandler.cmd_new = qmp_marshal_add_fd,
-    },
-
-SQMP
 add-fd
 -------
 
@@ -1959,17 +1386,6 @@ Notes:
 (1) The list of fd sets is shared by all monitor connections.
 (2) If "fdset-id" is not specified, a new fd set will be created.
 
-EQMP
-
-     {
-        .name       = "remove-fd",
-        .args_type  = "fdset-id:i,fd:i?",
-        .params     = "remove-fd fdset-id fd",
-        .help       = "Remove a file descriptor from an fd set",
-        .mhandler.cmd_new = qmp_marshal_remove_fd,
-    },
-
-SQMP
 remove-fd
 ---------
 
@@ -1992,16 +1408,6 @@ Notes:
 (2) If "fd" is not specified, all file descriptors in "fdset-id" will be
     removed.
 
-EQMP
-
-    {
-        .name       = "query-fdsets",
-        .args_type  = "",
-        .help       = "Return information describing all fd sets",
-        .mhandler.cmd_new = qmp_marshal_query_fdsets,
-    },
-
-SQMP
 query-fdsets
 -------------
 
@@ -2042,15 +1448,6 @@ Example:
 
 Note: The list of fd sets is shared by all monitor connections.
 
-EQMP
-
-    {
-        .name       = "block_passwd",
-        .args_type  = "device:s?,node-name:s?,password:s",
-        .mhandler.cmd_new = qmp_marshal_block_passwd,
-    },
-
-SQMP
 block_passwd
 ------------
 
@@ -2068,15 +1465,6 @@ Example:
                                                "password": "12345" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block_set_io_throttle",
-        .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,bps_max_length:l?,bps_rd_max_length:l?,bps_wr_max_length:l?,iops_max_length:l?,iops_rd_max_length:l?,iops_wr_max_length:l?,iops_size:l?,group:s?",
-        .mhandler.cmd_new = qmp_marshal_block_set_io_throttle,
-    },
-
-SQMP
 block_set_io_throttle
 ------------
 
@@ -2084,7 +1472,9 @@ Change I/O throttle limits for a block drive.
 
 Arguments:
 
-- "device": device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 - "bps": total throughput limit in bytes per second (json-int)
 - "bps_rd": read throughput limit in bytes per second (json-int)
 - "bps_wr": write throughput limit in bytes per second (json-int)
@@ -2108,7 +1498,7 @@ Arguments:
 
 Example:
 
--> { "execute": "block_set_io_throttle", "arguments": { "device": "virtio0",
+-> { "execute": "block_set_io_throttle", "arguments": { "id": "ide0-1-0",
                                                "bps": 1000000,
                                                "bps_rd": 0,
                                                "bps_wr": 0,
@@ -2125,15 +1515,6 @@ Example:
                                                "iops_size": 0 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "set_password",
-        .args_type  = "protocol:s,password:s,connected:s?",
-        .mhandler.cmd_new = qmp_marshal_set_password,
-    },
-
-SQMP
 set_password
 ------------
 
@@ -2151,15 +1532,6 @@ Example:
                                                "password": "secret" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "expire_password",
-        .args_type  = "protocol:s,time:s",
-        .mhandler.cmd_new = qmp_marshal_expire_password,
-    },
-
-SQMP
 expire_password
 ---------------
 
@@ -2176,15 +1548,6 @@ Example:
                                                   "time": "+60" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "add_client",
-        .args_type  = "protocol:s,fdname:s,skipauth:b?,tls:b?",
-        .mhandler.cmd_new = qmp_marshal_add_client,
-    },
-
-SQMP
 add_client
 ----------
 
@@ -2203,16 +1566,6 @@ Example:
                                              "fdname": "myclient" } }
 <- { "return": {} }
 
-EQMP
-    {
-        .name       = "qmp_capabilities",
-        .args_type  = "",
-        .params     = "",
-        .help       = "enable QMP capabilities",
-        .mhandler.cmd_new = qmp_capabilities,
-    },
-
-SQMP
 qmp_capabilities
 ----------------
 
@@ -2227,21 +1580,12 @@ Example:
 
 Note: This command must be issued before issuing any other command.
 
-EQMP
-
-    {
-        .name       = "human-monitor-command",
-        .args_type  = "command-line:s,cpu-index:i?",
-        .mhandler.cmd_new = qmp_marshal_human_monitor_command,
-    },
-
-SQMP
 human-monitor-command
 ---------------------
 
 Execute a Human Monitor command.
 
-Arguments: 
+Arguments:
 
 - command-line: the command name and its arguments, just like the
                 Human Monitor's shell (json-string)
@@ -2272,13 +1616,7 @@ Notes:
 3. Query Commands
 =================
 
-HXCOMM Each query command below is inside a SQMP/EQMP section, do NOT change
-HXCOMM this! We will possibly move query commands definitions inside those
-HXCOMM sections, just like regular commands.
-
-EQMP
 
-SQMP
 query-version
 -------------
 
@@ -2306,15 +1644,6 @@ Example:
       }
    }
 
-EQMP
-
-    {
-        .name       = "query-version",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_version,
-    },
-
-SQMP
 query-commands
 --------------
 
@@ -2343,15 +1672,6 @@ Example:
 
 Note: This example has been shortened as the real response is too long.
 
-EQMP
-
-    {
-        .name       = "query-commands",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_commands,
-    },
-
-SQMP
 query-events
 --------------
 
@@ -2380,15 +1700,6 @@ Example:
 
 Note: This example has been shortened as the real response is too long.
 
-EQMP
-
-    {
-        .name       = "query-events",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_events,
-    },
-
-SQMP
 query-qmp-schema
 ----------------
 
@@ -2397,15 +1708,6 @@ named schema entities.  Entities are commands, events and various
 types.  See docs/qapi-code-gen.txt for information on their structure
 and intended use.
 
-EQMP
-
-    {
-        .name       = "query-qmp-schema",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_query_qmp_schema,
-    },
-
-SQMP
 query-chardev
 -------------
 
@@ -2442,15 +1744,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-chardev",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_chardev,
-    },
-
-SQMP
 query-chardev-backends
 -------------
 
@@ -2483,15 +1776,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-chardev-backends",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_chardev_backends,
-    },
-
-SQMP
 query-block
 -----------
 
@@ -2519,7 +1803,7 @@ Each json-object contain the following:
                                 "file", "file", "ftp", "ftps", "host_cdrom",
                                 "host_device", "http", "https",
                                 "nbd", "parallels", "qcow", "qcow2", "raw",
-                                "tftp", "vdi", "vmdk", "vpc", "vvfat"
+                                "vdi", "vmdk", "vpc", "vvfat"
          - "backing_file": backing file name (json-string, optional)
          - "backing_file_depth": number of files in the backing file chain (json-int)
          - "encrypted": true if encrypted, false otherwise (json-bool)
@@ -2667,15 +1951,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-block",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_block,
-    },
-
-SQMP
 query-blockstats
 ----------------
 
@@ -2864,15 +2139,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-blockstats",
-        .args_type  = "query-nodes:b?",
-        .mhandler.cmd_new = qmp_marshal_query_blockstats,
-    },
-
-SQMP
 query-cpus
 ----------
 
@@ -2919,15 +2185,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-cpus",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_cpus,
-    },
-
-SQMP
 query-iothreads
 ---------------
 
@@ -2958,15 +2215,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-iothreads",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_iothreads,
-    },
-
-SQMP
 query-pci
 ---------
 
@@ -3063,7 +2311,7 @@ Example:
                   },
                   "function":0,
                   "regions":[
-   
+
                   ]
                },
                {
@@ -3080,7 +2328,7 @@ Example:
                   },
                   "function":0,
                   "regions":[
-   
+
                   ]
                },
                {
@@ -3175,15 +2423,6 @@ Example:
 
 Note: This example has been shortened as the real response is too long.
 
-EQMP
-
-    {
-        .name       = "query-pci",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_pci,
-    },
-
-SQMP
 query-kvm
 ---------
 
@@ -3199,15 +2438,6 @@ Example:
 -> { "execute": "query-kvm" }
 <- { "return": { "enabled": true, "present": true } }
 
-EQMP
-
-    {
-        .name       = "query-kvm",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_kvm,
-    },
-
-SQMP
 query-status
 ------------
 
@@ -3239,15 +2469,6 @@ Example:
 -> { "execute": "query-status" }
 <- { "return": { "running": true, "singlestep": false, "status": "running" } }
 
-EQMP
-    
-    {
-        .name       = "query-status",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_status,
-    },
-
-SQMP
 query-mice
 ----------
 
@@ -3283,15 +2504,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "query-mice",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_mice,
-    },
-
-SQMP
 query-vnc
 ---------
 
@@ -3346,20 +2558,6 @@ Example:
       }
    }
 
-EQMP
-
-    {
-        .name       = "query-vnc",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_vnc,
-    },
-    {
-        .name       = "query-vnc-servers",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_vnc_servers,
-    },
-
-SQMP
 query-spice
 -----------
 
@@ -3427,17 +2625,6 @@ Example:
       }
    }
 
-EQMP
-
-#if defined(CONFIG_SPICE)
-    {
-        .name       = "query-spice",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_spice,
-    },
-#endif
-
-SQMP
 query-name
 ----------
 
@@ -3452,15 +2639,6 @@ Example:
 -> { "execute": "query-name" }
 <- { "return": { "name": "qemu-name" } }
 
-EQMP
-
-    {
-        .name       = "query-name",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_name,
-    },
-
-SQMP
 query-uuid
 ----------
 
@@ -3475,15 +2653,6 @@ Example:
 -> { "execute": "query-uuid" }
 <- { "return": { "UUID": "550e8400-e29b-41d4-a716-446655440000" } }
 
-EQMP
-
-    {
-        .name       = "query-uuid",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_uuid,
-    },
-
-SQMP
 query-command-line-options
 --------------------------
 
@@ -3524,15 +2693,6 @@ Example:
      ]
    }
 
-EQMP
-
-    {
-        .name       = "query-command-line-options",
-        .args_type  = "option:s?",
-        .mhandler.cmd_new = qmp_marshal_query_command_line_options,
-    },
-
-SQMP
 query-migrate
 -------------
 
@@ -3552,8 +2712,8 @@ The main json-object contains the following:
 - "setup-time" amount of setup time in milliseconds _before_ the
                iterations begin but _after_ the QMP command is issued.
                This is designed to provide an accounting of any activities
-               (such as RDMA pinning) which may be expensive, but do not 
-               actually occur during the iterative migration rounds 
+               (such as RDMA pinning) which may be expensive, but do not
+               actually occur during the iterative migration rounds
                themselves. (json-int)
 - "downtime": only present when migration has finished correctly
               total amount in ms for downtime that happened (json-int)
@@ -3702,15 +2862,6 @@ Examples:
       }
    }
 
-EQMP
-
-    {
-        .name       = "query-migrate",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_migrate,
-    },
-
-SQMP
 migrate-set-capabilities
 ------------------------
 
@@ -3723,6 +2874,7 @@ Enable/Disable migration capabilities
 - "compress": use multiple compression threads to accelerate live migration
 - "events": generate events for each migration state change
 - "postcopy-ram": postcopy mode for live migration
+- "x-colo": COarse-Grain LOck Stepping (COLO) for Non-stop Service
 
 Arguments:
 
@@ -3731,15 +2883,6 @@ Example:
 -> { "execute": "migrate-set-capabilities" , "arguments":
      { "capabilities": [ { "capability": "xbzrle", "state": true } ] } }
 
-EQMP
-
-    {
-        .name       = "migrate-set-capabilities",
-        .args_type  = "capabilities:q",
-        .params     = "capability:s,state:b",
-        .mhandler.cmd_new = qmp_marshal_migrate_set_capabilities,
-    },
-SQMP
 query-migrate-capabilities
 --------------------------
 
@@ -3753,6 +2896,7 @@ Query current migration capabilities
          - "compress": Multiple compression threads state (json-bool)
          - "events": Migration state change event state (json-bool)
          - "postcopy-ram": postcopy ram state (json-bool)
+         - "x-colo": COarse-Grain LOck Stepping for Non-stop Service (json-bool)
 
 Arguments:
 
@@ -3766,18 +2910,10 @@ Example:
      {"state": false, "capability": "zero-blocks"},
      {"state": false, "capability": "compress"},
      {"state": true, "capability": "events"},
-     {"state": false, "capability": "postcopy-ram"}
+     {"state": false, "capability": "postcopy-ram"},
+     {"state": false, "capability": "x-colo"}
    ]}
 
-EQMP
-
-    {
-        .name       = "query-migrate-capabilities",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_migrate_capabilities,
-    },
-
-SQMP
 migrate-set-parameters
 ----------------------
 
@@ -3790,6 +2926,10 @@ Set migration parameters
                           throttled for auto-converge (json-int)
 - "cpu-throttle-increment": set throttle increasing percentage for
                             auto-converge (json-int)
+- "max-bandwidth": set maximum speed for migrations (in bytes/sec) (json-int)
+- "downtime-limit": set maximum tolerated downtime (in milliseconds) for
+                    migrations (json-int)
+- "x-checkpoint-delay": set the delay time for periodic checkpoint (json-int)
 
 Arguments:
 
@@ -3798,15 +2938,6 @@ Example:
 -> { "execute": "migrate-set-parameters" , "arguments":
       { "compress-level": 1 } }
 
-EQMP
-
-    {
-        .name       = "migrate-set-parameters",
-        .args_type  =
-            "compress-level:i?,compress-threads:i?,decompress-threads:i?,cpu-throttle-initial:i?,cpu-throttle-increment:i?",
-        .mhandler.cmd_new = qmp_marshal_migrate_set_parameters,
-    },
-SQMP
 query-migrate-parameters
 ------------------------
 
@@ -3820,7 +2951,10 @@ Query current migration parameters
                                     throttled (json-int)
          - "cpu-throttle-increment" : throttle increasing percentage for
                                       auto-converge (json-int)
-
+         - "max-bandwidth" : maximium migration speed in bytes per second
+                             (json-int)
+         - "downtime-limit" : maximum tolerated downtime of migration in
+                              milliseconds (json-int)
 Arguments:
 
 Example:
@@ -3832,19 +2966,12 @@ Example:
          "cpu-throttle-increment": 10,
          "compress-threads": 8,
          "compress-level": 1,
-         "cpu-throttle-initial": 20
+         "cpu-throttle-initial": 20,
+         "max-bandwidth": 33554432,
+         "downtime-limit": 300
       }
    }
 
-EQMP
-
-    {
-        .name       = "query-migrate-parameters",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_migrate_parameters,
-    },
-
-SQMP
 query-balloon
 -------------
 
@@ -3864,96 +2991,6 @@ Example:
       }
    }
 
-EQMP
-
-    {
-        .name       = "query-balloon",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_balloon,
-    },
-
-    {
-        .name       = "query-block-jobs",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_block_jobs,
-    },
-
-    {
-        .name       = "qom-list",
-        .args_type  = "path:s",
-        .mhandler.cmd_new = qmp_marshal_qom_list,
-    },
-
-    {
-        .name       = "qom-set",
-	.args_type  = "path:s,property:s,value:q",
-        .mhandler.cmd_new = qmp_marshal_qom_set,
-    },
-
-    {
-        .name       = "qom-get",
-	.args_type  = "path:s,property:s",
-        .mhandler.cmd_new = qmp_marshal_qom_get,
-    },
-
-    {
-        .name       = "nbd-server-start",
-        .args_type  = "addr:q,tls-creds:s?",
-        .mhandler.cmd_new = qmp_marshal_nbd_server_start,
-    },
-    {
-        .name       = "nbd-server-add",
-        .args_type  = "device:B,writable:b?",
-        .mhandler.cmd_new = qmp_marshal_nbd_server_add,
-    },
-    {
-        .name       = "nbd-server-stop",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_nbd_server_stop,
-    },
-
-    {
-        .name       = "change-vnc-password",
-        .args_type  = "password:s",
-        .mhandler.cmd_new = qmp_marshal_change_vnc_password,
-    },
-    {
-        .name       = "qom-list-types",
-        .args_type  = "implements:s?,abstract:b?",
-        .mhandler.cmd_new = qmp_marshal_qom_list_types,
-    },
-
-    {
-        .name       = "device-list-properties",
-        .args_type  = "typename:s",
-        .mhandler.cmd_new = qmp_marshal_device_list_properties,
-    },
-
-    {
-        .name       = "query-machines",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_machines,
-    },
-
-    {
-        .name       = "query-cpu-definitions",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_cpu_definitions,
-    },
-
-    {
-        .name       = "query-target",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_target,
-    },
-
-    {
-        .name       = "query-tpm",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_tpm,
-    },
-
-SQMP
 query-tpm
 ---------
 
@@ -3979,15 +3016,6 @@ Example:
      ]
    }
 
-EQMP
-
-    {
-        .name       = "query-tpm-models",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_tpm_models,
-    },
-
-SQMP
 query-tpm-models
 ----------------
 
@@ -4000,15 +3028,6 @@ Example:
 -> { "execute": "query-tpm-models" }
 <- { "return": [ "tpm-tis" ] }
 
-EQMP
-
-    {
-        .name       = "query-tpm-types",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_tpm_types,
-    },
-
-SQMP
 query-tpm-types
 ---------------
 
@@ -4021,15 +3040,6 @@ Example:
 -> { "execute": "query-tpm-types" }
 <- { "return": [ "passthrough" ] }
 
-EQMP
-
-    {
-        .name       = "chardev-add",
-        .args_type  = "id:s,backend:q",
-        .mhandler.cmd_new = qmp_marshal_chardev_add,
-    },
-
-SQMP
 chardev-add
 ----------------
 
@@ -4058,16 +3068,6 @@ Examples:
                      "backend" : { "type" : "pty", "data" : {} } } }
 <- { "return": { "pty" : "/dev/pty/42" } }
 
-EQMP
-
-    {
-        .name       = "chardev-remove",
-        .args_type  = "id:s",
-        .mhandler.cmd_new = qmp_marshal_chardev_remove,
-    },
-
-
-SQMP
 chardev-remove
 --------------
 
@@ -4082,14 +3082,6 @@ Example:
 -> { "execute": "chardev-remove", "arguments": { "id" : "foo" } }
 <- { "return": {} }
 
-EQMP
-    {
-        .name       = "query-rx-filter",
-        .args_type  = "name:s?",
-        .mhandler.cmd_new = qmp_marshal_query_rx_filter,
-    },
-
-SQMP
 query-rx-filter
 ---------------
 
@@ -4147,15 +3139,6 @@ Example:
       ]
    }
 
-EQMP
-
-    {
-        .name       = "blockdev-add",
-        .args_type  = "options:q",
-        .mhandler.cmd_new = qmp_marshal_blockdev_add,
-    },
-
-SQMP
 blockdev-add
 ------------
 
@@ -4165,72 +3148,50 @@ This command is still a work in progress.  It doesn't support all
 block drivers among other things.  Stay away from it unless you want
 to help with its development.
 
-Arguments:
-
-- "options": block driver options
+For the arguments, see the QAPI schema documentation of BlockdevOptions.
 
 Example (1):
 
 -> { "execute": "blockdev-add",
-    "arguments": { "options" : { "driver": "qcow2",
-                                 "file": { "driver": "file",
-                                           "filename": "test.qcow2" } } } }
+    "arguments": { "driver": "qcow2",
+                   "file": { "driver": "file",
+                             "filename": "test.qcow2" } } }
 <- { "return": {} }
 
 Example (2):
 
 -> { "execute": "blockdev-add",
      "arguments": {
-         "options": {
-           "driver": "qcow2",
-           "id": "my_disk",
-           "discard": "unmap",
-           "cache": {
-               "direct": true,
-               "writeback": true
-           },
-           "file": {
-               "driver": "file",
-               "filename": "/tmp/test.qcow2"
-           },
-           "backing": {
-               "driver": "raw",
-               "file": {
-                   "driver": "file",
-                   "filename": "/dev/fdset/4"
-               }
-           }
+         "driver": "qcow2",
+         "node-name": "my_disk",
+         "discard": "unmap",
+         "cache": {
+             "direct": true,
+             "writeback": true
+         },
+         "file": {
+             "driver": "file",
+             "filename": "/tmp/test.qcow2"
+         },
+         "backing": {
+             "driver": "raw",
+             "file": {
+                 "driver": "file",
+                 "filename": "/dev/fdset/4"
+             }
          }
        }
      }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "x-blockdev-del",
-        .args_type  = "id:s?,node-name:s?",
-        .mhandler.cmd_new = qmp_marshal_x_blockdev_del,
-    },
-
-SQMP
 x-blockdev-del
 ------------
 Since 2.5
 
-Deletes a block device thas has been added using blockdev-add.
-The selected device can be either a block backend or a graph node.
-
-In the former case the backend will be destroyed, along with its
-inserted medium if there's any. The command will fail if the backend
-or its medium are in use.
-
-In the latter case the node will be destroyed. The command will fail
-if the node is attached to a block backend or is otherwise being
-used.
-
-One of "id" or "node-name" must be specified, but not both.
+Deletes a block device that has been added using blockdev-add.
+The command will fail if the node is attached to a device or is
+otherwise being used.
 
 This command is still a work in progress and is considered
 experimental. Stay away from it unless you want to help with its
@@ -4238,20 +3199,17 @@ development.
 
 Arguments:
 
-- "id": Name of the block backend device to delete (json-string, optional)
-- "node-name": Name of the graph node to delete (json-string, optional)
+- "node-name": Name of the graph node to delete (json-string)
 
 Example:
 
 -> { "execute": "blockdev-add",
      "arguments": {
-         "options": {
-             "driver": "qcow2",
-             "id": "drive0",
-             "file": {
-                 "driver": "file",
-                 "filename": "test.qcow2"
-             }
+         "driver": "qcow2",
+         "node-name": "node0",
+         "file": {
+             "driver": "file",
+             "filename": "test.qcow2"
          }
      }
    }
@@ -4259,19 +3217,10 @@ Example:
 <- { "return": {} }
 
 -> { "execute": "x-blockdev-del",
-     "arguments": { "id": "drive0" }
+     "arguments": { "node-name": "node0" }
    }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-open-tray",
-        .args_type  = "device:s,force:b?",
-        .mhandler.cmd_new = qmp_marshal_blockdev_open_tray,
-    },
-
-SQMP
 blockdev-open-tray
 ------------------
 
@@ -4292,7 +3241,9 @@ which no such event will be generated, these include:
 
 Arguments:
 
-- "device": block device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 - "force": if false (the default), an eject request will be sent to the guest if
            it has locked the tray (and the tray will not be opened immediately);
            if true, the tray will be opened regardless of whether it is locked
@@ -4301,25 +3252,17 @@ Arguments:
 Example:
 
 -> { "execute": "blockdev-open-tray",
-     "arguments": { "device": "ide1-cd0" } }
+     "arguments": { "id": "ide0-1-0" } }
 
 <- { "timestamp": { "seconds": 1418751016,
                     "microseconds": 716996 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": true } }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "blockdev-close-tray",
-        .args_type  = "device:s",
-        .mhandler.cmd_new = qmp_marshal_blockdev_close_tray,
-    },
-
-SQMP
 blockdev-close-tray
 -------------------
 
@@ -4331,30 +3274,24 @@ If the tray was already closed before, this will be a no-op.
 
 Arguments:
 
-- "device": block device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 
 Example:
 
 -> { "execute": "blockdev-close-tray",
-     "arguments": { "device": "ide1-cd0" } }
+     "arguments": { "id": "ide0-1-0" } }
 
 <- { "timestamp": { "seconds": 1418751345,
                     "microseconds": 272147 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": false } }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "x-blockdev-remove-medium",
-        .args_type  = "device:s",
-        .mhandler.cmd_new = qmp_marshal_x_blockdev_remove_medium,
-    },
-
-SQMP
 x-blockdev-remove-medium
 ------------------------
 
@@ -4368,41 +3305,35 @@ Stay away from it unless you want to help with its development.
 
 Arguments:
 
-- "device": block device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 
 Example:
 
 -> { "execute": "x-blockdev-remove-medium",
-     "arguments": { "device": "ide1-cd0" } }
+     "arguments": { "id": "ide0-1-0" } }
 
 <- { "error": { "class": "GenericError",
-                "desc": "Tray of device 'ide1-cd0' is not open" } }
+                "desc": "Tray of device 'ide0-1-0' is not open" } }
 
 -> { "execute": "blockdev-open-tray",
-     "arguments": { "device": "ide1-cd0" } }
+     "arguments": { "id": "ide0-1-0" } }
 
 <- { "timestamp": { "seconds": 1418751627,
                     "microseconds": 549958 },
      "event": "DEVICE_TRAY_MOVED",
      "data": { "device": "ide1-cd0",
+               "id": "ide0-1-0",
                "tray-open": true } }
 
 <- { "return": {} }
 
 -> { "execute": "x-blockdev-remove-medium",
-     "arguments": { "device": "ide1-cd0" } }
+     "arguments": { "device": "ide0-1-0" } }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "x-blockdev-insert-medium",
-        .args_type  = "device:s,node-name:s",
-        .mhandler.cmd_new = qmp_marshal_x_blockdev_insert_medium,
-    },
-
-SQMP
 x-blockdev-insert-medium
 ------------------------
 
@@ -4415,34 +3346,27 @@ Stay away from it unless you want to help with its development.
 
 Arguments:
 
-- "device": block device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 - "node-name": root node of the BDS tree to insert into the block device
 
 Example:
 
 -> { "execute": "blockdev-add",
-     "arguments": { "options": { "node-name": "node0",
-                                 "driver": "raw",
-                                 "file": { "driver": "file",
-                                           "filename": "fedora.iso" } } } }
+     "arguments": { { "node-name": "node0",
+                      "driver": "raw",
+                      "file": { "driver": "file",
+                                "filename": "fedora.iso" } } }
 
 <- { "return": {} }
 
 -> { "execute": "x-blockdev-insert-medium",
-     "arguments": { "device": "ide1-cd0",
+     "arguments": { "id": "ide0-1-0",
                     "node-name": "node0" } }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "x-blockdev-change",
-        .args_type  = "parent:B,child:B?,node:B?",
-        .mhandler.cmd_new = qmp_marshal_x_blockdev_change,
-    },
-
-SQMP
 x-blockdev-change
 -----------------
 
@@ -4471,10 +3395,10 @@ Example:
 
 Add a new node to a quorum
 -> { "execute": "blockdev-add",
-     "arguments": { "options": { "driver": "raw",
-                                 "node-name": "new_node",
-                                 "file": { "driver": "file",
-                                           "filename": "test.raw" } } } }
+     "arguments": { "driver": "raw",
+                    "node-name": "new_node",
+                    "file": { "driver": "file",
+                              "filename": "test.raw" } } }
 <- { "return": {} }
 -> { "execute": "x-blockdev-change",
      "arguments": { "parent": "disk1",
@@ -4487,17 +3411,8 @@ Delete a quorum's node
                     "child": "children.1" } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "query-named-block-nodes",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_named_block_nodes,
-    },
-
-SQMP
-@query-named-block-nodes
-------------------------
+query-named-block-nodes
+-----------------------
 
 Return a list of BlockDeviceInfo for all the named block driver nodes
 
@@ -4549,15 +3464,6 @@ Example:
                       }
                    } } ] }
 
-EQMP
-
-    {
-        .name       = "blockdev-change-medium",
-        .args_type  = "device:B,filename:F,format:s?,read-only-mode:s?",
-        .mhandler.cmd_new = qmp_marshal_blockdev_change_medium,
-    },
-
-SQMP
 blockdev-change-medium
 ----------------------
 
@@ -4566,7 +3472,9 @@ and loading a new image file which is inserted as the new medium.
 
 Arguments:
 
-- "device": device name (json-string)
+- "device": block device name (deprecated, use @id instead)
+            (json-string, optional)
+- "id": the name or QOM path of the guest device (json-string, optional)
 - "filename": filename of the new image (json-string)
 - "format": format of the new image (json-string, optional)
 - "read-only-mode": new read-only mode (json-string, optional)
@@ -4577,7 +3485,7 @@ Examples:
 1. Change a removable medium
 
 -> { "execute": "blockdev-change-medium",
-             "arguments": { "device": "ide1-cd0",
+             "arguments": { "id": "ide0-1-0",
                             "filename": "/srv/images/Fedora-12-x86_64-DVD.iso",
                             "format": "raw" } }
 <- { "return": {} }
@@ -4585,7 +3493,7 @@ Examples:
 2. Load a read-only medium into a writable drive
 
 -> { "execute": "blockdev-change-medium",
-             "arguments": { "device": "isa-fd0",
+             "arguments": { "id": "floppyA",
                             "filename": "/srv/images/ro.img",
                             "format": "raw",
                             "read-only-mode": "retain" } }
@@ -4595,22 +3503,13 @@ Examples:
        "desc": "Could not open '/srv/images/ro.img': Permission denied" } }
 
 -> { "execute": "blockdev-change-medium",
-             "arguments": { "device": "isa-fd0",
+             "arguments": { "id": "floppyA",
                             "filename": "/srv/images/ro.img",
                             "format": "raw",
                             "read-only-mode": "read-only" } }
 
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "query-memdev",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_memdev,
-    },
-
-SQMP
 query-memdev
 ------------
 
@@ -4640,16 +3539,7 @@ Example (1):
      ]
    }
 
-EQMP
-
-    {
-        .name       = "query-memory-devices",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_memory_devices,
-    },
-
-SQMP
-@query-memory-devices
+query-memory-devices
 --------------------
 
 Return a list of memory devices.
@@ -4667,17 +3557,9 @@ Example:
                         "slot": 0},
                    "type": "dimm"
                  } ] }
-EQMP
 
-    {
-        .name       = "query-acpi-ospm-status",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_acpi_ospm_status,
-    },
-
-SQMP
-@query-acpi-ospm-status
---------------------
+query-acpi-ospm-status
+----------------------
 
 Return list of ACPIOSTInfo for devices that support status reporting
 via ACPI _OST method.
@@ -4689,17 +3571,7 @@ Example:
                  { "slot": "2", "slot-type": "DIMM", "source": 0, "status": 0},
                  { "slot": "3", "slot-type": "DIMM", "source": 0, "status": 0}
    ]}
-EQMP
-
-#if defined TARGET_I386
-    {
-        .name       = "rtc-reset-reinjection",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_rtc_reset_reinjection,
-    },
-#endif
 
-SQMP
 rtc-reset-reinjection
 ---------------------
 
@@ -4711,15 +3583,7 @@ Example:
 
 -> { "execute": "rtc-reset-reinjection" }
 <- { "return": {} }
-EQMP
 
-    {
-        .name       = "trace-event-get-state",
-        .args_type  = "name:s,vcpu:i?",
-        .mhandler.cmd_new = qmp_marshal_trace_event_get_state,
-    },
-
-SQMP
 trace-event-get-state
 ---------------------
 
@@ -4743,15 +3607,7 @@ Example:
 
 -> { "execute": "trace-event-get-state", "arguments": { "name": "qemu_memalign" } }
 <- { "return": [ { "name": "qemu_memalign", "state": "disabled" } ] }
-EQMP
-
-    {
-        .name       = "trace-event-set-state",
-        .args_type  = "name:s,enable:b,ignore-unavailable:b?,vcpu:i?",
-        .mhandler.cmd_new = qmp_marshal_trace_event_set_state,
-    },
 
-SQMP
 trace-event-set-state
 ---------------------
 
@@ -4778,17 +3634,9 @@ Example:
 
 -> { "execute": "trace-event-set-state", "arguments": { "name": "qemu_memalign", "enable": "true" } }
 <- { "return": {} }
-EQMP
 
-    {
-        .name       = "input-send-event",
-        .args_type  = "console:i?,events:q",
-        .mhandler.cmd_new = qmp_marshal_input_send_event,
-    },
-
-SQMP
-@input-send-event
------------------
+input-send-event
+----------------
 
 Send input event to guest.
 
@@ -4842,15 +3690,6 @@ Move mouse pointer to absolute coordinates (20000, 400).
                { "type": "abs", "data" : { "axis": "y", "value" : 400 } } ] } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "block-set-write-threshold",
-        .args_type  = "node-name:s,write-threshold:l",
-        .mhandler.cmd_new = qmp_marshal_block_set_write_threshold,
-    },
-
-SQMP
 block-set-write-threshold
 ------------
 
@@ -4870,15 +3709,6 @@ Example:
                  "write-threshold": 17179869184 } }
 <- { "return": {} }
 
-EQMP
-
-    {
-        .name       = "query-rocker",
-        .args_type  = "name:s",
-        .mhandler.cmd_new = qmp_marshal_query_rocker,
-    },
-
-SQMP
 Show rocker switch
 ------------------
 
@@ -4891,15 +3721,6 @@ Example:
 -> { "execute": "query-rocker", "arguments": { "name": "sw1" } }
 <- { "return": {"name": "sw1", "ports": 2, "id": 1327446905938}}
 
-EQMP
-
-    {
-        .name       = "query-rocker-ports",
-        .args_type  = "name:s",
-        .mhandler.cmd_new = qmp_marshal_query_rocker_ports,
-    },
-
-SQMP
 Show rocker switch ports
 ------------------------
 
@@ -4916,15 +3737,6 @@ Example:
                   "autoneg": "off", "link-up": true, "speed": 10000}
    ]}
 
-EQMP
-
-    {
-        .name       = "query-rocker-of-dpa-flows",
-        .args_type  = "name:s,tbl-id:i?",
-        .mhandler.cmd_new = qmp_marshal_query_rocker_of_dpa_flows,
-    },
-
-SQMP
 Show rocker switch OF-DPA flow tables
 -------------------------------------
 
@@ -4945,15 +3757,6 @@ Example:
                  {...more...},
    ]}
 
-EQMP
-
-    {
-        .name       = "query-rocker-of-dpa-groups",
-        .args_type  = "name:s,type:i?",
-        .mhandler.cmd_new = qmp_marshal_query_rocker_of_dpa_groups,
-    },
-
-SQMP
 Show rocker OF-DPA group tables
 -------------------------------
 
@@ -4975,17 +3778,6 @@ Example:
                   "pop-vlan": 1, "id": 251658240}
    ]}
 
-EQMP
-
-#if defined TARGET_ARM
-    {
-        .name       = "query-gic-capabilities",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_gic_capabilities,
-    },
-#endif
-
-SQMP
 query-gic-capabilities
 ---------------
 
@@ -5000,15 +3792,6 @@ Example:
 <- { "return": [{ "version": 2, "emulated": true, "kernel": false },
                 { "version": 3, "emulated": false, "kernel": true } ] }
 
-EQMP
-
-    {
-        .name       = "query-hotpluggable-cpus",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_hotpluggable_cpus,
-    },
-
-SQMP
 Show existing/possible CPUs
 ---------------------------
 
diff --git a/docs/qmp-events.txt b/docs/qmp-events.txt
index 7967ec4c5a..e0a2365c63 100644
--- a/docs/qmp-events.txt
+++ b/docs/qmp-events.txt
@@ -65,7 +65,12 @@ Emitted when a disk I/O error occurs.
 
 Data:
 
-- "device": device name (json-string)
+- "device": device name. This is always present for compatibility
+            reasons, but it can be empty ("") if the image does not
+            have a device name associated. (json-string)
+- "node-name": node name. Note that errors may be reported for the root node
+               that is directly attached to a guest device rather than for the
+               node where the error occurred. (json-string)
 - "operation": I/O operation (json-string, "read" or "write")
 - "action": action that has been taken, it's one of the following (json-string):
     "ignore": error has been ignored
@@ -76,6 +81,7 @@ Example:
 
 { "event": "BLOCK_IO_ERROR",
     "data": { "device": "ide0-hd1",
+              "node-name": "#block212",
               "operation": "write",
               "action": "stop" },
     "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
@@ -214,12 +220,16 @@ or by HMP/QMP commands.
 
 Data:
 
-- "device": device name (json-string)
+- "device": Block device name. This is always present for compatibility
+            reasons, but it can be empty ("") if the image does not have a
+            device name associated. (json-string)
+- "id": The name or QOM path of the guest device (json-string)
 - "tray-open": true if the tray has been opened or false if it has been closed
                (json-bool)
 
 { "event": "DEVICE_TRAY_MOVED",
   "data": { "device": "ide1-cd0",
+            "id": "/machine/unattached/device[22]",
             "tray-open": true
   },
   "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
diff --git a/docs/rcu.txt b/docs/rcu.txt
index 2f70954e82..c84e7f42b2 100644
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -37,7 +37,7 @@ do not matter; as soon as all previous critical sections have finished,
 there cannot be any readers who hold references to the data structure,
 and these can now be safely reclaimed (e.g., freed or unref'ed).
 
-Here is a picutre:
+Here is a picture:
 
         thread 1                  thread 2                  thread 3
     -------------------    ------------------------    -------------------
@@ -145,7 +145,7 @@ The core RCU API is small:
         and then read from there.
 
         RCU read-side critical sections must use atomic_rcu_read() to
-        read data, unless concurrent writes are presented by another
+        read data, unless concurrent writes are prevented by another
         synchronization mechanism.
 
         Furthermore, RCU read-side critical sections should traverse the
diff --git a/docs/specs/acpi_nvdimm.txt b/docs/specs/acpi_nvdimm.txt
index 0fdd251fc0..3f322e6f55 100644
--- a/docs/specs/acpi_nvdimm.txt
+++ b/docs/specs/acpi_nvdimm.txt
@@ -65,8 +65,8 @@ _FIT(Firmware Interface Table)
    The detailed definition of the structure can be found at ACPI 6.0: 5.2.25
    NVDIMM Firmware Interface Table (NFIT).
 
-QEMU NVDIMM Implemention
-========================
+QEMU NVDIMM Implementation
+==========================
 QEMU uses 4 bytes IO Port starting from 0x0a18 and a RAM-based memory page
 for NVDIMM ACPI.
 
@@ -80,8 +80,17 @@ Memory:
    emulates _DSM access and writes the output data to it.
 
    ACPI writes _DSM Input Data (based on the offset in the page):
-   [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle, 0 is reserved for NVDIMM
-                Root device.
+   [0x0 - 0x3]: 4 bytes, NVDIMM Device Handle.
+
+                The handle is completely QEMU internal thing, the values in
+                range [1, 0xFFFF] indicate nvdimm device. Other values are
+                reserved for other purposes.
+
+                Reserved handles:
+                0 is reserved for nvdimm root device named NVDR.
+                0x10000 is reserved for QEMU internal DSM function called on
+                the root device.
+
    [0x4 - 0x7]: 4 bytes, Revision ID, that is the Arg1 of _DSM method.
    [0x8 - 0xB]: 4 bytes. Function Index, that is the Arg2 of _DSM method.
    [0xC - 0xFFF]: 4084 bytes, the Arg3 of _DSM method.
@@ -127,6 +136,52 @@ _DSM process diagram:
  | result from the page     |      |              |
  +--------------------------+      +--------------+
 
- _FIT implementation
- -------------------
- TODO (will fill it when nvdimm hotplug is introduced)
+NVDIMM hotplug
+--------------
+ACPI BIOS GPE.4 handler is dedicated for notifying OS about nvdimm device
+hot-add event.
+
+QEMU internal use only _DSM function
+------------------------------------
+1) Read FIT
+   _FIT method uses _DSM method to fetch NFIT structures blob from QEMU
+   in 1 page sized increments which are then concatenated and returned
+   as _FIT method result.
+
+   Input parameters:
+   Arg0 – UUID {set to 648B9CF2-CDA1-4312-8AD9-49C4AF32BD62}
+   Arg1 – Revision ID (set to 1)
+   Arg2 - Function Index, 0x1
+   Arg3 - A package containing a buffer whose layout is as follows:
+
+   +----------+--------+--------+-------------------------------------------+
+   |  Field   | Length | Offset |                 Description               |
+   +----------+--------+--------+-------------------------------------------+
+   | offset   |   4    |   0    | offset in QEMU's NFIT structures blob to  |
+   |          |        |        | read from                                 |
+   +----------+--------+--------+-------------------------------------------+
+
+   Output layout in the dsm memory page:
+   +----------+--------+--------+-------------------------------------------+
+   |  Field   | Length | Offset |                 Description               |
+   +----------+--------+--------+-------------------------------------------+
+   | length   |   4    |   0    | length of entire returned data            |
+   |          |        |        | (including this header)                   |
+   +----------+-----------------+-------------------------------------------+
+   |          |        |        | return status codes                       |
+   |          |        |        | 0x0 - success                             |
+   |          |        |        | 0x100 - error caused by NFIT update while |
+   | status   |   4    |   4    | read by _FIT wasn't completed, other      |
+   |          |        |        | codes follow Chapter 3 in DSM Spec Rev1   |
+   +----------+-----------------+-------------------------------------------+
+   | fit data | Varies |   8    | contains FIT data, this field is present  |
+   |          |        |        | if status field is 0;                     |
+   +----------+--------+--------+-------------------------------------------+
+
+   The FIT offset is maintained by the OSPM itself, current offset plus
+   the size of the fit data returned by the function is the next offset
+   OSPM should read. When all FIT data has been read out, zero fit data
+   size is returned.
+
+   If it returns status code 0x100, OSPM should restart to read FIT (read
+   from offset 0 again).
diff --git a/docs/specs/edu.txt b/docs/specs/edu.txt
index 7f8146780b..0876310809 100644
--- a/docs/specs/edu.txt
+++ b/docs/specs/edu.txt
@@ -52,7 +52,7 @@ size == 8 for the rest.
 
 0x20 (RW) : status register, bitwise OR
 	    0x01 -- computing factorial (RO)
-	    0x80 -- raise interrupt 0x01 after finishing factorial computation
+	    0x80 -- raise interrupt after finishing factorial computation
 
 0x24 (RO) : interrupt status register
 	    It contains values which raised the interrupt (see interrupt raise
@@ -87,6 +87,11 @@ An IRQ is generated when written to the interrupt raise register. The value
 appears in interrupt status register when the interrupt is raised and has to
 be written to the interrupt acknowledge register to lower it.
 
+The device supports both INTx and MSI interrupt. By default, INTx is
+used. Even if the driver disabled INTx and only uses MSI, it still
+needs to update the acknowledge register at the end of the IRQ handler
+routine.
+
 DMA controller
 --------------
 One has to specify, source, destination, size, and start the transfer. One
diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt
index 631b0cadae..f57e2a09c6 100644
--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -233,12 +233,27 @@ tools by host-level management such as an HMC. This level of management is not
 applicable to PowerKVM, hence the reason for extending the notification
 framework to support hotplug events.
 
-Note that these events are not yet formally part of the PAPR+ specification,
-but support for this format has already been implemented in DR-related
-guest tools such as powerpc-utils/librtas, as well as kernel patches that have
-been submitted to handle in-kernel processing of memory/cpu-related hotplug
-events[1], and is planned for formal inclusion is PAPR+ specification. The
-hotplug-specific payload is QEMU implemented as follows (with all values
+The format for these EPOW-signalled events is described below under
+"hotplug/unplug event structure". Note that these events are not
+formally part of the PAPR+ specification, and have been superseded by a
+newer format, also described below under "hotplug/unplug event structure",
+and so are now deemed a "legacy" format. The formats are similar, but the
+"modern" format contains additional fields/flags, which are denoted for the
+purposes of this documentation with "#ifdef GUEST_SUPPORTS_MODERN" guards.
+
+QEMU should assume support only for "legacy" fields/flags unless the guest
+advertises support for the "modern" format via ibm,client-architecture-support
+hcall by setting byte 5, bit 6 of it's ibm,architecture-vec-5 option vector
+structure (as described by LoPAPR v11, B.6.2.3). As with "legacy" format events,
+"modern" format events are surfaced to the guest via check-exception RTAS calls,
+but use a dedicated event source to signal the guest. This event source is
+advertised to the guest by the addition of a "hot-plug-events" node under
+"/event-sources" node of the guest's device tree using the standard format
+described in LoPAPR v11, B.6.12.1.
+
+== hotplug/unplug event structure ==
+
+The hotplug-specific payload in QEMU is implemented as follows (with all values
 encoded in big-endian format):
 
 struct rtas_event_log_v6_hp {
@@ -263,14 +278,23 @@ struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_HP_ACTION_ADD       1
 #define RTAS_LOG_V6_HP_ACTION_REMOVE    2
     uint8_t hotplug_action;             /* action (add/remove) */
-#define RTAS_LOG_V6_HP_ID_DRC_NAME      1
-#define RTAS_LOG_V6_HP_ID_DRC_INDEX     2
-#define RTAS_LOG_V6_HP_ID_DRC_COUNT     3
+#define RTAS_LOG_V6_HP_ID_DRC_NAME          1
+#define RTAS_LOG_V6_HP_ID_DRC_INDEX         2
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT         3
+#ifdef GUEST_SUPPORTS_MODERN
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4
+#endif
     uint8_t hotplug_identifier;         /* type of the resource identifier,
                                          * which serves as the discriminator
                                          * for the 'drc' union field below
                                          */
+#ifdef GUEST_SUPPORTS_MODERN
+    uint8_t capabilities;               /* capability flags, currently unused
+                                         * by QEMU
+                                         */
+#else
     uint8_t reserved;
+#endif
     union {
         uint32_t index;                 /* DRC index of resource to take action
                                          * on
@@ -278,6 +302,19 @@ struct rtas_event_log_v6_hp {
         uint32_t count;                 /* number of DR resources to take
                                          * action on (guest chooses which)
                                          */
+#ifdef GUEST_SUPPORTS_MODERN
+        struct {
+            uint32_t count;             /* number of DR resources to take
+                                         * action on
+                                         */
+            uint32_t index;             /* DRC index of first resource to take
+                                         * action on. guest will take action
+                                         * on DRC index <index> through
+                                         * DRC index <index + count - 1> in
+                                         * sequential order
+                                         */
+        } count_indexed;
+#endif
         char name[1];                   /* string representing the name of the
                                          * DRC to take action on
                                          */
diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt
index 7890d71698..d70bd83b13 100644
--- a/docs/specs/vhost-user.txt
+++ b/docs/specs/vhost-user.txt
@@ -123,22 +123,22 @@ The communication consists of master sending message requests and slave sending
 message replies. Most of the requests don't require replies. Here is a list of
 the ones that do:
 
- * VHOST_GET_FEATURES
- * VHOST_GET_PROTOCOL_FEATURES
- * VHOST_GET_VRING_BASE
- * VHOST_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+ * VHOST_USER_GET_FEATURES
+ * VHOST_USER_GET_PROTOCOL_FEATURES
+ * VHOST_USER_GET_VRING_BASE
+ * VHOST_USER_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
 
 [ Also see the section on REPLY_ACK protocol extension. ]
 
 There are several messages that the master sends with file descriptors passed
 in the ancillary data:
 
- * VHOST_SET_MEM_TABLE
- * VHOST_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
- * VHOST_SET_LOG_FD
- * VHOST_SET_VRING_KICK
- * VHOST_SET_VRING_CALL
- * VHOST_SET_VRING_ERR
+ * VHOST_USER_SET_MEM_TABLE
+ * VHOST_USER_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+ * VHOST_USER_SET_LOG_FD
+ * VHOST_USER_SET_VRING_KICK
+ * VHOST_USER_SET_VRING_CALL
+ * VHOST_USER_SET_VRING_ERR
 
 If Master is unable to send the full message or receives a wrong reply it will
 close the connection. An optional reconnection mechanism can be implemented.
diff --git a/docs/tcg-exclusive.promela b/docs/tcg-exclusive.promela
new file mode 100644
index 0000000000..c91cfca9f7
--- /dev/null
+++ b/docs/tcg-exclusive.promela
@@ -0,0 +1,225 @@
+/*
+ * This model describes the implementation of exclusive sections in
+ * cpus-common.c (start_exclusive, end_exclusive, cpu_exec_start,
+ * cpu_exec_end).
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain.  If you really want a license,
+ * the WTFPL will do.
+ *
+ * To verify it:
+ *     spin -a docs/tcg-exclusive.promela
+ *     gcc pan.c -O2
+ *     ./a.out -a
+ *
+ * Tunable processor macros: N_CPUS, N_EXCLUSIVE, N_CYCLES, USE_MUTEX,
+ *                           TEST_EXPENSIVE.
+ */
+
+// Define the missing parameters for the model
+#ifndef N_CPUS
+#define N_CPUS 2
+#warning defaulting to 2 CPU processes
+#endif
+
+// the expensive test is not so expensive for <= 2 CPUs
+// If the mutex is used, it's also cheap (300 MB / 4 seconds) for 3 CPUs
+// For 3 CPUs and the lock-free option it needs 1.5 GB of RAM
+#if N_CPUS <= 2 || (N_CPUS <= 3 && defined USE_MUTEX)
+#define TEST_EXPENSIVE
+#endif
+
+#ifndef N_EXCLUSIVE
+# if !defined N_CYCLES || N_CYCLES <= 1 || defined TEST_EXPENSIVE
+#  define N_EXCLUSIVE     2
+#  warning defaulting to 2 concurrent exclusive sections
+# else
+#  define N_EXCLUSIVE     1
+#  warning defaulting to 1 concurrent exclusive sections
+# endif
+#endif
+#ifndef N_CYCLES
+# if N_EXCLUSIVE <= 1 || defined TEST_EXPENSIVE
+#  define N_CYCLES        2
+#  warning defaulting to 2 CPU cycles
+# else
+#  define N_CYCLES        1
+#  warning defaulting to 1 CPU cycles
+# endif
+#endif
+
+
+// synchronization primitives.  condition variables require a
+// process-local "cond_t saved;" variable.
+
+#define mutex_t              byte
+#define MUTEX_LOCK(m)        atomic { m == 0 -> m = 1 }
+#define MUTEX_UNLOCK(m)      m = 0
+
+#define cond_t               int
+#define COND_WAIT(c, m)      {                                  \
+                               saved = c;                       \
+                               MUTEX_UNLOCK(m);                 \
+                               c != saved -> MUTEX_LOCK(m);     \
+                             }
+#define COND_BROADCAST(c)    c++
+
+// this is the logic from cpus-common.c
+
+mutex_t mutex;
+cond_t exclusive_cond;
+cond_t exclusive_resume;
+byte pending_cpus;
+
+byte running[N_CPUS];
+byte has_waiter[N_CPUS];
+
+#define exclusive_idle()                                          \
+  do                                                              \
+      :: pending_cpus -> COND_WAIT(exclusive_resume, mutex);      \
+      :: else         -> break;                                   \
+  od
+
+#define start_exclusive()                                         \
+    MUTEX_LOCK(mutex);                                            \
+    exclusive_idle();                                             \
+    pending_cpus = 1;                                             \
+                                                                  \
+    i = 0;                                                        \
+    do                                                            \
+       :: i < N_CPUS -> {                                         \
+           if                                                     \
+              :: running[i] -> has_waiter[i] = 1; pending_cpus++; \
+              :: else       -> skip;                              \
+           fi;                                                    \
+           i++;                                                   \
+       }                                                          \
+       :: else -> break;                                          \
+    od;                                                           \
+                                                                  \
+    do                                                            \
+      :: pending_cpus > 1 -> COND_WAIT(exclusive_cond, mutex);    \
+      :: else             -> break;                               \
+    od;                                                           \
+    MUTEX_UNLOCK(mutex);
+
+#define end_exclusive()                                           \
+    MUTEX_LOCK(mutex);                                            \
+    pending_cpus = 0;                                             \
+    COND_BROADCAST(exclusive_resume);                             \
+    MUTEX_UNLOCK(mutex);
+
+#ifdef USE_MUTEX
+// Simple version using mutexes
+#define cpu_exec_start(id)                                                   \
+    MUTEX_LOCK(mutex);                                                       \
+    exclusive_idle();                                                        \
+    running[id] = 1;                                                         \
+    MUTEX_UNLOCK(mutex);
+
+#define cpu_exec_end(id)                                                     \
+    MUTEX_LOCK(mutex);                                                       \
+    running[id] = 0;                                                         \
+    if                                                                       \
+        :: pending_cpus -> {                                                 \
+            pending_cpus--;                                                  \
+            if                                                               \
+                :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond);      \
+                :: else -> skip;                                             \
+            fi;                                                              \
+        }                                                                    \
+        :: else -> skip;                                                     \
+    fi;                                                                      \
+    MUTEX_UNLOCK(mutex);
+#else
+// Wait-free fast path, only needs mutex when concurrent with
+// an exclusive section
+#define cpu_exec_start(id)                                                   \
+    running[id] = 1;                                                         \
+    if                                                                       \
+        :: pending_cpus -> {                                                 \
+            MUTEX_LOCK(mutex);                                               \
+            if                                                               \
+                :: !has_waiter[id] -> {                                      \
+                    running[id] = 0;                                         \
+                    exclusive_idle();                                        \
+                    running[id] = 1;                                         \
+                }                                                            \
+                :: else -> skip;                                             \
+            fi;                                                              \
+            MUTEX_UNLOCK(mutex);                                             \
+        }                                                                    \
+        :: else -> skip;                                                     \
+    fi;
+
+#define cpu_exec_end(id)                                                     \
+    running[id] = 0;                                                         \
+    if                                                                       \
+        :: pending_cpus -> {                                                 \
+            MUTEX_LOCK(mutex);                                               \
+            if                                                               \
+                :: has_waiter[id] -> {                                       \
+                    has_waiter[id] = 0;                                      \
+                    pending_cpus--;                                          \
+                    if                                                       \
+                        :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond); \
+                        :: else -> skip;                                     \
+                    fi;                                                      \
+                }                                                            \
+                :: else -> skip;                                             \
+            fi;                                                              \
+            MUTEX_UNLOCK(mutex);                                             \
+        }                                                                    \
+        :: else -> skip;                                                     \
+    fi
+#endif
+
+// Promela processes
+
+byte done_cpu;
+byte in_cpu;
+active[N_CPUS] proctype cpu()
+{
+    byte id = _pid % N_CPUS;
+    byte cycles = 0;
+    cond_t saved;
+
+    do
+       :: cycles == N_CYCLES -> break;
+       :: else -> {
+           cycles++;
+           cpu_exec_start(id)
+           in_cpu++;
+           done_cpu++;
+           in_cpu--;
+           cpu_exec_end(id)
+       }
+    od;
+}
+
+byte done_exclusive;
+byte in_exclusive;
+active[N_EXCLUSIVE] proctype exclusive()
+{
+    cond_t saved;
+    byte i;
+
+    start_exclusive();
+    in_exclusive = 1;
+    done_exclusive++;
+    in_exclusive = 0;
+    end_exclusive();
+}
+
+#define LIVENESS   (done_cpu == N_CPUS * N_CYCLES && done_exclusive == N_EXCLUSIVE)
+#define SAFETY     !(in_exclusive && in_cpu)
+
+never {    /* ! ([] SAFETY && <> [] LIVENESS) */
+    do
+    // once the liveness property is satisfied, this is not executable
+    // and the never clause is not accepted
+    :: ! LIVENESS -> accept_liveness: skip
+    :: 1          -> assert(SAFETY)
+    od;
+}
diff --git a/docs/throttle.txt b/docs/throttle.txt
index 26d4d5107f..cd4e109d39 100644
--- a/docs/throttle.txt
+++ b/docs/throttle.txt
@@ -235,7 +235,10 @@ consider the following values:
   - Water leaks from the bucket at a rate of 100 IOPS.
   - Water can be added to the bucket at a rate of 2000 IOPS.
   - The size of the bucket is 2000 x 60 = 120000
-  - If 'iops-total-max' is unset then the bucket size is 100 x 60.
+  - If 'iops-total-max-length' is unset then it defaults to 1 and the
+    size of the bucket is 2000.
+  - If 'iops-total-max' is unset then 'iops-total-max-length' must be
+    unset as well. In this case the bucket size is 100.
 
 The bucket is initially empty, therefore water can be added until it's
 full at a rate of 2000 IOPS (the burst rate). Once the bucket is full
diff --git a/docs/tracing.txt b/docs/tracing.txt
index 29f2f9a24d..f351998a4e 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -150,13 +150,16 @@ The trace backends are chosen at configure time:
 For a list of supported trace backends, try ./configure --help or see below.
 If multiple backends are enabled, the trace is sent to them all.
 
+If no backends are explicitly selected, configure will default to the
+"log" backend.
+
 The following subsections describe the supported trace backends.
 
 === Nop ===
 
 The "nop" backend generates empty trace event functions so that the compiler
-can optimize out trace events completely.  This is the default and imposes no
-performance penalty.
+can optimize out trace events completely.  This imposes no performance
+penalty.
 
 Note that regardless of the selected trace backend, events with the "disable"
 property will be generated with the "nop" backend.
@@ -192,6 +195,18 @@ After running qemu by root user, you can get the trace:
 
 Restriction: "ftrace" backend is restricted to Linux only.
 
+=== Syslog ===
+
+The "syslog" backend sends trace events using the POSIX syslog API. The log
+is opened specifying the LOG_DAEMON facility and LOG_PID option (so events
+are tagged with the pid of the particular QEMU process that generated
+them). All events are logged at LOG_INFO level.
+
+NOTE: syslog may squash duplicate consecutive trace events and apply rate
+      limiting.
+
+Restriction: "syslog" backend is restricted to POSIX compliant OS.
+
 ==== Monitor commands ====
 
 * trace-file on|off|flush|set <path>
diff --git a/docs/writing-qmp-commands.txt b/docs/writing-qmp-commands.txt
index 59aa77ae25..44c14db418 100644
--- a/docs/writing-qmp-commands.txt
+++ b/docs/writing-qmp-commands.txt
@@ -7,8 +7,8 @@ This document doesn't discuss QMP protocol level details, nor does it dive
 into the QAPI framework implementation.
 
 For an in-depth introduction to the QAPI framework, please refer to
-docs/qapi-code-gen.txt. For documentation about the QMP protocol, please
-check the files in QMP/.
+docs/qapi-code-gen.txt. For documentation about the QMP protocol,
+start with docs/qmp-intro.txt.
 
 == Overview ==
 
@@ -119,17 +119,6 @@ There are a few things to be noticed:
 5. Printing to the terminal is discouraged for QMP commands, we do it here
    because it's the easiest way to demonstrate a QMP command
 
-Now a little hack is needed. As we're still using the old QMP server we need
-to add the new command to its internal dispatch table. This step won't be
-required in the near future. Open the qmp-commands.hx file and add the
-following at the bottom:
-
-    {
-        .name       = "hello-world",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_hello_world,
-    },
-
 You're done. Now build qemu, run it as suggested in the "Testing" section,
 and then type the following QMP command:
 
@@ -174,21 +163,6 @@ There are two important details to be noticed:
 2. The C implementation signature must follow the schema's argument ordering,
    which is defined by the "data" member
 
-The last step is to update the qmp-commands.hx file:
-
-    {
-        .name       = "hello-world",
-        .args_type  = "message:s?",
-        .mhandler.cmd_new = qmp_marshal_hello_world,
-    },
-
-Notice that the "args_type" member got our "message" argument. The character
-"s" stands for "string" and "?" means it's optional. This too must be ordered
-according to the C implementation and schema file. You can look for more
-examples in the qmp-commands.hx file if you need to define more arguments.
-
-Again, this step won't be required in the future.
-
 Time to test our new version of the "hello-world" command. Build qemu, run it as
 described in the "Testing" section and then send two commands:
 
@@ -337,7 +311,7 @@ we should add it to the hmp-commands.hx file:
         .args_type  = "message:s?",
         .params     = "hello-world [message]",
         .help       = "Print message to the standard output",
-        .mhandler.cmd = hmp_hello_world,
+        .cmd        = hmp_hello_world,
     },
 
 STEXI
@@ -454,14 +428,6 @@ There are a number of things to be noticed:
 6. You have to include the "qmp-commands.h" header file in qemu-timer.c,
    otherwise qemu won't build
 
-The last step is to add the correspoding entry in the qmp-commands.hx file:
-
-    {
-        .name       = "query-alarm-clock",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_alarm_clock,
-    },
-
 Time to test the new command. Build qemu, run it as described in the "Testing"
 section and try this:
 
@@ -518,7 +484,7 @@ in the monitor.c file. The entry for the "info alarmclock" follows:
         .args_type  = "",
         .params     = "",
         .help       = "show information about the alarm clock",
-        .mhandler.info = hmp_info_alarm_clock,
+        .cmd        = hmp_info_alarm_clock,
     },
 
 To test this, run qemu and type "info alarmclock" in the user monitor.
@@ -600,14 +566,6 @@ iteration of the loop. That's because the alarm timer method in use is the
 first element of the alarm_timers array. Also notice that QAPI lists are handled
 by hand and we return the head of the list.
 
-To test this you have to add the corresponding qmp-commands.hx entry:
-
-    {
-        .name       = "query-alarm-methods",
-        .args_type  = "",
-        .mhandler.cmd_new = qmp_marshal_query_alarm_methods,
-    },
-
 Now Build qemu, run it as explained in the "Testing" section and try our new
 command:
 
diff --git a/docs/xbzrle.txt b/docs/xbzrle.txt
index 52c8511a4c..c0a7dfd44c 100644
--- a/docs/xbzrle.txt
+++ b/docs/xbzrle.txt
@@ -42,7 +42,7 @@ nzrun = length byte...
 length = uleb128 encoded integer
 
 On the sender side XBZRLE is used as a compact delta encoding of page updates,
-retrieving the old page content from the cache (default size of 512 MB). The
+retrieving the old page content from the cache (default size of 64MB). The
 receiving side uses the existing page's content and XBZRLE to decode the new
 page's content.
 
@@ -73,7 +73,7 @@ e9 07 0f 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f 03 01 67 01 01 69
 
 Cache update strategy
 =====================
-Keeping the hot pages in the cache is effective for decreased cache
+Keeping the hot pages in the cache is effective for decreasing cache
 misses. XBZRLE uses a counter as the age of each page. The counter will
 increase after each ram dirty bitmap sync. When a cache conflict is
 detected, XBZRLE will only evict pages in the cache that are older than
diff --git a/docs/xen-save-devices-state.txt b/docs/xen-save-devices-state.txt
index 92e08dbf6a..a72ecc8081 100644
--- a/docs/xen-save-devices-state.txt
+++ b/docs/xen-save-devices-state.txt
@@ -9,7 +9,7 @@ however it is also possible to save the state of all devices to file,
 without saving the RAM or the block devices of the VM.
 
 This operation is called "xen-save-devices-state" (see
-QMP/qmp-commands.txt)
+qmp-commands.txt)
 
 
 The binary format used in the file is the following:
diff --git a/exec.c b/exec.c
index 8ffde75983..08c558eecf 100644
--- a/exec.c
+++ b/exec.c
@@ -93,6 +93,11 @@ static MemoryRegion io_mem_unassigned;
 
 #endif
 
+#ifdef TARGET_PAGE_BITS_VARY
+int target_page_bits;
+bool target_page_bits_decided;
+#endif
+
 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
@@ -102,8 +107,37 @@ __thread CPUState *current_cpu;
    2 = Adaptive rate instruction counting.  */
 int use_icount;
 
+bool set_preferred_target_page_bits(int bits)
+{
+    /* The target page size is the lowest common denominator for all
+     * the CPUs in the system, so we can only make it smaller, never
+     * larger. And we can't make it smaller once we've committed to
+     * a particular size.
+     */
+#ifdef TARGET_PAGE_BITS_VARY
+    assert(bits >= TARGET_PAGE_BITS_MIN);
+    if (target_page_bits == 0 || target_page_bits > bits) {
+        if (target_page_bits_decided) {
+            return false;
+        }
+        target_page_bits = bits;
+    }
+#endif
+    return true;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 
+static void finalize_target_page_bits(void)
+{
+#ifdef TARGET_PAGE_BITS_VARY
+    if (target_page_bits == 0) {
+        target_page_bits = TARGET_PAGE_BITS_MIN;
+    }
+    target_page_bits_decided = true;
+#endif
+}
+
 typedef struct PhysPageEntry PhysPageEntry;
 
 struct PhysPageEntry {
@@ -153,7 +187,7 @@ typedef struct subpage_t {
     MemoryRegion iomem;
     AddressSpace *as;
     hwaddr base;
-    uint16_t sub_section[TARGET_PAGE_SIZE];
+    uint16_t sub_section[];
 } subpage_t;
 
 #define PHYS_SECTION_UNASSIGNED 0
@@ -255,7 +289,7 @@ static void phys_page_set(AddressSpaceDispatch *d,
 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
  * and update our entry so we can skip it and go directly to the destination.
  */
-static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
+static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
 {
     unsigned valid_ptr = P_L2_SIZE;
     int valid = 0;
@@ -275,7 +309,7 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *com
         valid_ptr = i;
         valid++;
         if (p[i].skip) {
-            phys_page_compact(&p[i], nodes, compacted);
+            phys_page_compact(&p[i], nodes);
         }
     }
 
@@ -307,10 +341,8 @@ static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *com
 
 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
 {
-    DECLARE_BITMAP(compacted, nodes_nb);
-
     if (d->phys_map.skip) {
-        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
+        phys_page_compact(&d->phys_map, d->map.nodes);
     }
 }
 
@@ -320,9 +352,9 @@ static inline bool section_covers_addr(const MemoryRegionSection *section,
     /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
      * the section must cover the entire address space.
      */
-    return section->size.hi ||
+    return int128_gethi(section->size) ||
            range_covers_byte(section->offset_within_address_space,
-                             section->size.lo, addr);
+                             int128_getlo(section->size), addr);
 }
 
 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
@@ -461,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
                                   hwaddr *xlat, hwaddr *plen)
 {
     MemoryRegionSection *section;
-    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
+    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 
     section = address_space_translate_internal(d, addr, xlat, plen, false);
 
@@ -598,32 +630,11 @@ AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
 }
 #endif
 
-static int cpu_get_free_index(void)
-{
-    CPUState *some_cpu;
-    int cpu_index = 0;
-
-    CPU_FOREACH(some_cpu) {
-        cpu_index++;
-    }
-    return cpu_index;
-}
-
-void cpu_exec_exit(CPUState *cpu)
+void cpu_exec_unrealizefn(CPUState *cpu)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
 
-    cpu_list_lock();
-    if (cpu->node.tqe_prev == NULL) {
-        /* there is nothing to undo since cpu_exec_init() hasn't been called */
-        cpu_list_unlock();
-        return;
-    }
-
-    QTAILQ_REMOVE(&cpus, cpu, node);
-    cpu->node.tqe_prev = NULL;
-    cpu->cpu_index = UNASSIGNED_CPU_INDEX;
-    cpu_list_unlock();
+    cpu_list_remove(cpu);
 
     if (cc->vmsd != NULL) {
         vmstate_unregister(NULL, cc->vmsd, cpu);
@@ -633,11 +644,8 @@ void cpu_exec_exit(CPUState *cpu)
     }
 }
 
-void cpu_exec_init(CPUState *cpu, Error **errp)
+void cpu_exec_initfn(CPUState *cpu)
 {
-    CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
-    Error *local_err ATTRIBUTE_UNUSED = NULL;
-
     cpu->as = NULL;
     cpu->num_ases = 0;
 
@@ -658,14 +666,13 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
     cpu->memory = system_memory;
     object_ref(OBJECT(cpu->memory));
 #endif
+}
 
-    cpu_list_lock();
-    if (cpu->cpu_index == UNASSIGNED_CPU_INDEX) {
-        cpu->cpu_index = cpu_get_free_index();
-        assert(cpu->cpu_index != UNASSIGNED_CPU_INDEX);
-    }
-    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
-    cpu_list_unlock();
+void cpu_exec_realizefn(CPUState *cpu, Error **errp)
+{
+    CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
+
+    cpu_list_add(cpu);
 
 #ifndef CONFIG_USER_ONLY
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
@@ -677,23 +684,15 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
 #endif
 }
 
-#if defined(CONFIG_USER_ONLY)
 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
 {
-    tb_invalidate_phys_page_range(pc, pc + 1, 0);
-}
-#else
-static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
-{
-    MemTxAttrs attrs;
-    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
-    int asidx = cpu_asidx_from_attrs(cpu, attrs);
-    if (phys != -1) {
-        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
-                                phys | (pc & ~TARGET_PAGE_MASK));
-    }
+    /* Flush the whole TB as this will not have race conditions
+     * even if we don't have proper locking yet.
+     * Ideally we would just invalidate the TBs for the
+     * specified PC.
+     */
+    tb_flush(cpu);
 }
-#endif
 
 #if defined(CONFIG_USER_ONLY)
 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
@@ -899,11 +898,13 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
     fprintf(stderr, "\n");
     cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     if (qemu_log_separate()) {
+        qemu_log_lock();
         qemu_log("qemu: fatal: ");
         qemu_log_vprintf(fmt, ap2);
         qemu_log("\n");
         log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
         qemu_log_flush();
+        qemu_log_unlock();
         qemu_log_close();
     }
     va_end(ap2);
@@ -1217,6 +1218,15 @@ void qemu_mutex_unlock_ramlist(void)
 }
 
 #ifdef __linux__
+static int64_t get_file_size(int fd)
+{
+    int64_t size = lseek(fd, 0, SEEK_END);
+    if (size < 0) {
+        return -errno;
+    }
+    return size;
+}
+
 static void *file_ram_alloc(RAMBlock *block,
                             ram_addr_t memory,
                             const char *path,
@@ -1228,7 +1238,7 @@ static void *file_ram_alloc(RAMBlock *block,
     char *c;
     void *area = MAP_FAILED;
     int fd = -1;
-    int64_t page_size;
+    int64_t file_size;
 
     if (kvm_enabled() && !kvm_has_sync_mmu()) {
         error_setg(errp,
@@ -1283,25 +1293,47 @@ static void *file_ram_alloc(RAMBlock *block,
          */
     }
 
-    page_size = qemu_fd_getpagesize(fd);
-    block->mr->align = MAX(page_size, QEMU_VMALLOC_ALIGN);
+    block->page_size = qemu_fd_getpagesize(fd);
+    block->mr->align = block->page_size;
+#if defined(__s390x__)
+    if (kvm_enabled()) {
+        block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
+    }
+#endif
 
-    if (memory < page_size) {
+    file_size = get_file_size(fd);
+
+    if (memory < block->page_size) {
         error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
-                   "or larger than page size 0x%" PRIx64,
-                   memory, page_size);
+                   "or larger than page size 0x%zx",
+                   memory, block->page_size);
         goto error;
     }
 
-    memory = ROUND_UP(memory, page_size);
+    if (file_size > 0 && file_size < memory) {
+        error_setg(errp, "backing store %s size 0x%" PRIx64
+                   " does not match 'size' option 0x" RAM_ADDR_FMT,
+                   path, file_size, memory);
+        goto error;
+    }
+
+    memory = ROUND_UP(memory, block->page_size);
 
     /*
      * ftruncate is not supported by hugetlbfs in older
      * hosts, so don't bother bailing out on errors.
      * If anything goes wrong with it under other filesystems,
      * mmap will fail.
+     *
+     * Do not truncate the non-empty backend file to avoid corrupting
+     * the existing data in the file. Disabling shrinking is not
+     * enough. For example, the current vNVDIMM implementation stores
+     * the guest NVDIMM labels at the end of the backend file. If the
+     * backend file is later extended, QEMU will not be able to find
+     * those labels. Therefore, extending the non-empty backend file
+     * is disabled as well.
      */
-    if (ftruncate(fd, memory)) {
+    if (!file_size && ftruncate(fd, memory)) {
         perror("ftruncate");
     }
 
@@ -1448,6 +1480,11 @@ void qemu_ram_unset_idstr(RAMBlock *block)
     }
 }
 
+size_t qemu_ram_pagesize(RAMBlock *rb)
+{
+    return rb->page_size;
+}
+
 static int memory_try_enable_merging(void *addr, size_t len)
 {
     if (!machine_mem_merge(current_machine)) {
@@ -1615,10 +1652,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
     if (new_block->host) {
         qemu_ram_setup_dump(new_block->host, new_block->max_length);
         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
+        /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
         qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
-        if (kvm_enabled()) {
-            kvm_setup_guest_memory(new_block->host, new_block->max_length);
-        }
     }
 }
 
@@ -1689,6 +1724,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     new_block->max_length = max_size;
     assert(max_size >= size);
     new_block->fd = -1;
+    new_block->page_size = getpagesize();
     new_block->host = host;
     if (host) {
         new_block->flags |= RAM_PREALLOC;
@@ -1973,7 +2009,11 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
                                uint64_t val, unsigned size)
 {
+    bool locked = false;
+
     if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+        locked = true;
+        tb_lock();
         tb_invalidate_phys_page_fast(ram_addr, size);
     }
     switch (size) {
@@ -1989,6 +2029,11 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
     default:
         abort();
     }
+
+    if (locked) {
+        tb_unlock();
+    }
+
     /* Set both VGA and migration bits for simplicity and to remove
      * the notdirty callback faster.
      */
@@ -2049,6 +2094,12 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                     continue;
                 }
                 cpu->watchpoint_hit = wp;
+
+                /* The tb_lock will be reset when cpu_loop_exit or
+                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
+                 * main loop.
+                 */
+                tb_lock();
                 tb_check_watchpoint(cpu);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     cpu->exception_index = EXCP_DEBUG;
@@ -2236,8 +2287,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
 {
     subpage_t *mmio;
 
-    mmio = g_malloc0(sizeof(subpage_t));
-
+    mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE * sizeof(uint16_t));
     mmio->as = as;
     mmio->base = base;
     memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
@@ -2342,7 +2392,7 @@ static void tcg_commit(MemoryListener *listener)
      * may have split the RCU critical section.
      */
     d = atomic_rcu_read(&cpuas->as->dispatch);
-    cpuas->memory_dispatch = d;
+    atomic_rcu_set(&cpuas->memory_dispatch, d);
     tlb_flush(cpuas->cpu, 1);
 }
 
@@ -2457,7 +2507,9 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
             cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
     }
     if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+        tb_lock();
         tb_invalidate_phys_range(addr, addr + length);
+        tb_unlock();
         dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
     }
     cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
@@ -2829,6 +2881,14 @@ void cpu_register_map_client(QEMUBH *bh)
 void cpu_exec_init_all(void)
 {
     qemu_mutex_init(&ram_list.mutex);
+    /* The data structures we set up here depend on knowing the page size,
+     * so no more changes can be made after this point.
+     * In an ideal world, nothing we did before we had finished the
+     * machine setup would care about the target page size, and we could
+     * do this much later, rather than requiring board models to state
+     * up front what their requirements are.
+     */
+    finalize_target_page_bits();
     io_mem_init();
     memory_map_init();
     qemu_mutex_init(&map_client_list_lock);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 9b1eccff24..c295f3183f 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -4814,6 +4814,10 @@ int32_t floatx80_to_int32(floatx80 a, float_status *status)
     int32_t aExp, shiftCount;
     uint64_t aSig;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return 1 << 31;
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -4842,6 +4846,10 @@ int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
     uint64_t aSig, savedASig;
     int32_t z;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return 1 << 31;
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -4888,6 +4896,10 @@ int64_t floatx80_to_int64(floatx80 a, float_status *status)
     int32_t aExp, shiftCount;
     uint64_t aSig, aSigExtra;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return 1ULL << 63;
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -4929,6 +4941,10 @@ int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
     uint64_t aSig;
     int64_t z;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return 1ULL << 63;
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -4971,6 +4987,10 @@ float32 floatx80_to_float32(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return float32_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -4999,6 +5019,10 @@ float64 floatx80_to_float64(floatx80 a, float_status *status)
     int32_t aExp;
     uint64_t aSig, zSig;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return float64_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5027,6 +5051,10 @@ float128 floatx80_to_float128(floatx80 a, float_status *status)
     int aExp;
     uint64_t aSig, zSig0, zSig1;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return float128_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5052,6 +5080,10 @@ floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
     uint64_t lastBitMask, roundBitsMask;
     floatx80 z;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aExp = extractFloatx80Exp( a );
     if ( 0x403E <= aExp ) {
         if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
@@ -5279,6 +5311,10 @@ floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSign = extractFloatx80Sign( a );
     bSign = extractFloatx80Sign( b );
     if ( aSign == bSign ) {
@@ -5300,6 +5336,10 @@ floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSign = extractFloatx80Sign( a );
     bSign = extractFloatx80Sign( b );
     if ( aSign == bSign ) {
@@ -5323,6 +5363,10 @@ floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
     int32_t aExp, bExp, zExp;
     uint64_t aSig, bSig, zSig0, zSig1;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5380,6 +5424,10 @@ floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
     uint64_t aSig, bSig, zSig0, zSig1;
     uint64_t rem0, rem1, rem2, term0, term1, term2;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5461,6 +5509,10 @@ floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
     uint64_t aSig0, aSig1, bSig;
     uint64_t q, term0, term1, alternateASig0, alternateASig1;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSig0 = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5556,6 +5608,10 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
     uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
     uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSig0 = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
@@ -5620,10 +5676,11 @@ floatx80 floatx80_sqrt(floatx80 a, float_status *status)
 int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
 {
 
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
+        || (extractFloatx80Exp(a) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(a) << 1))
+        || (extractFloatx80Exp(b) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(b) << 1))
        ) {
         float_raise(float_flag_invalid, status);
         return 0;
@@ -5649,10 +5706,11 @@ int floatx80_le(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
+        || (extractFloatx80Exp(a) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(a) << 1))
+        || (extractFloatx80Exp(b) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(b) << 1))
        ) {
         float_raise(float_flag_invalid, status);
         return 0;
@@ -5682,10 +5740,11 @@ int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
+        || (extractFloatx80Exp(a) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(a) << 1))
+        || (extractFloatx80Exp(b) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(b) << 1))
        ) {
         float_raise(float_flag_invalid, status);
         return 0;
@@ -5712,10 +5771,11 @@ int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
 *----------------------------------------------------------------------------*/
 int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
 {
-    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
-         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
-              && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
+        || (extractFloatx80Exp(a) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(a) << 1))
+        || (extractFloatx80Exp(b) == 0x7FFF
+            && (uint64_t) (extractFloatx80Frac(b) << 1))
        ) {
         float_raise(float_flag_invalid, status);
         return 1;
@@ -5733,6 +5793,10 @@ int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
 int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
 {
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return 0;
+    }
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
               && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@@ -5764,6 +5828,10 @@ int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return 0;
+    }
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
               && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@@ -5800,6 +5868,10 @@ int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
 {
     flag aSign, bSign;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return 0;
+    }
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
               && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@@ -5833,6 +5905,10 @@ int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
 *----------------------------------------------------------------------------*/
 int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
 {
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return 1;
+    }
     if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
               && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
@@ -7374,6 +7450,10 @@ static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
 {
     flag aSign, bSign;
 
+    if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
+        float_raise(float_flag_invalid, status);
+        return float_relation_unordered;
+    }
     if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
           ( extractFloatx80Frac( a )<<1 ) ) ||
         ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
@@ -7645,6 +7725,10 @@ floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
     int32_t aExp;
     uint64_t aSig;
 
+    if (floatx80_invalid_encoding(a)) {
+        float_raise(float_flag_invalid, status);
+        return floatx80_default_nan(status);
+    }
     aSig = extractFloatx80Frac( a );
     aExp = extractFloatx80Exp( a );
     aSign = extractFloatx80Sign( a );
diff --git a/fsdev/9p-iov-marshal.c b/fsdev/9p-iov-marshal.c
index 663cad5429..1d16f8df4b 100644
--- a/fsdev/9p-iov-marshal.c
+++ b/fsdev/9p-iov-marshal.c
@@ -125,7 +125,7 @@ ssize_t v9fs_iov_vunmarshal(struct iovec *out_sg, int out_num, size_t offset,
                 str->data = g_malloc(str->size + 1);
                 copied = v9fs_unpack(str->data, out_sg, out_num, offset,
                                      str->size);
-                if (copied > 0) {
+                if (copied >= 0) {
                     str->data[str->size] = 0;
                 } else {
                     v9fs_string_free(str);
diff --git a/fsdev/9p-marshal.c b/fsdev/9p-marshal.c
index 238dbf21b1..a01bba6908 100644
--- a/fsdev/9p-marshal.c
+++ b/fsdev/9p-marshal.c
@@ -25,11 +25,6 @@ void v9fs_string_free(V9fsString *str)
     str->size = 0;
 }
 
-void v9fs_string_null(V9fsString *str)
-{
-    v9fs_string_free(str);
-}
-
 void GCC_FMT_ATTR(2, 3)
 v9fs_string_sprintf(V9fsString *str, const char *fmt, ...)
 {
diff --git a/fsdev/9p-marshal.h b/fsdev/9p-marshal.h
index 140db6d99f..c8823d878f 100644
--- a/fsdev/9p-marshal.h
+++ b/fsdev/9p-marshal.h
@@ -76,9 +76,8 @@ static inline void v9fs_string_init(V9fsString *str)
     str->data = NULL;
     str->size = 0;
 }
-extern void v9fs_string_free(V9fsString *str);
-extern void v9fs_string_null(V9fsString *str);
-extern void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...);
-extern void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs);
+void v9fs_string_free(V9fsString *str);
+void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...);
+void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs);
 
 #endif
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 6db9feac8f..a56dc8488d 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -100,6 +100,7 @@ struct FileOperations
 {
     int (*parse_opts)(QemuOpts *, struct FsDriverEntry *);
     int (*init)(struct FsContext *);
+    void (*cleanup)(struct FsContext *);
     int (*lstat)(FsContext *, V9fsPath *, struct stat *);
     ssize_t (*readlink)(FsContext *, V9fsPath *, char *, size_t);
     int (*chmod)(FsContext *, V9fsPath *, FsCred *);
diff --git a/gdbstub.c b/gdbstub.c
index 8660dd38b2..ea9ff6a8d0 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -31,7 +31,6 @@
 
 #define MAX_PACKET_LENGTH 4096
 
-#include "cpu.h"
 #include "qemu/sockets.h"
 #include "sysemu/kvm.h"
 #include "exec/semihost.h"
@@ -307,7 +306,7 @@ typedef struct GDBState {
     int fd;
     int running_state;
 #else
-    CharDriverState *chr;
+    CharBackend chr;
     CharDriverState *mon_chr;
 #endif
     char syscall_buf[256];
@@ -405,7 +404,9 @@ static void put_buffer(GDBState *s, const uint8_t *buf, int len)
         }
     }
 #else
-    qemu_chr_fe_write(s->chr, buf, len);
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(&s->chr, buf, len);
 #endif
 }
 
@@ -1485,6 +1486,9 @@ void gdb_exit(CPUArchState *env, int code)
 {
   GDBState *s;
   char buf[4];
+#ifndef CONFIG_USER_ONLY
+  CharDriverState *chr;
+#endif
 
   s = gdbserver_state;
   if (!s) {
@@ -1495,7 +1499,8 @@ void gdb_exit(CPUArchState *env, int code)
       return;
   }
 #else
-  if (!s->chr) {
+  chr = qemu_chr_fe_get_driver(&s->chr);
+  if (!chr) {
       return;
   }
 #endif
@@ -1504,7 +1509,8 @@ void gdb_exit(CPUArchState *env, int code)
   put_packet(s, buf);
 
 #ifndef CONFIG_USER_ONLY
-  qemu_chr_delete(s->chr);
+  qemu_chr_fe_deinit(&s->chr);
+  qemu_chr_delete(chr);
 #endif
 
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
@@ -1770,13 +1776,9 @@ int gdbserver_start(const char *device)
             sigaction(SIGINT, &act, NULL);
         }
 #endif
-        chr = qemu_chr_new_noreplay("gdb", device, NULL);
+        chr = qemu_chr_new_noreplay("gdb", device);
         if (!chr)
             return -1;
-
-        qemu_chr_fe_claim_no_fail(chr);
-        qemu_chr_add_handlers(chr, gdb_chr_can_receive, gdb_chr_receive,
-                              gdb_chr_event, NULL);
     }
 
     s = gdbserver_state;
@@ -1791,14 +1793,20 @@ int gdbserver_start(const char *device)
         mon_chr->chr_write = gdb_monitor_write;
         monitor_init(mon_chr, 0);
     } else {
-        if (s->chr)
-            qemu_chr_delete(s->chr);
+        if (qemu_chr_fe_get_driver(&s->chr)) {
+            qemu_chr_delete(qemu_chr_fe_get_driver(&s->chr));
+        }
         mon_chr = s->mon_chr;
         memset(s, 0, sizeof(GDBState));
+        s->mon_chr = mon_chr;
     }
     s->c_cpu = first_cpu;
     s->g_cpu = first_cpu;
-    s->chr = chr;
+    if (chr) {
+        qemu_chr_fe_init(&s->chr, chr, &error_abort);
+        qemu_chr_fe_set_handlers(&s->chr, gdb_chr_can_receive, gdb_chr_receive,
+                                 gdb_chr_event, NULL, NULL, true);
+    }
     s->state = chr ? RS_IDLE : RS_INACTIVE;
     s->mon_chr = mon_chr;
     s->current_syscall_cb = NULL;
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 74446c669e..55d50c42c6 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -18,7 +18,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the version of QEMU",
-        .mhandler.cmd = hmp_info_version,
+        .cmd        = hmp_info_version,
     },
 
 STEXI
@@ -32,7 +32,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the network state",
-        .mhandler.cmd = hmp_info_network,
+        .cmd        = hmp_info_network,
     },
 
 STEXI
@@ -46,7 +46,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the character devices",
-        .mhandler.cmd = hmp_info_chardev,
+        .cmd        = hmp_info_chardev,
     },
 
 STEXI
@@ -61,7 +61,7 @@ ETEXI
         .params     = "[-n] [-v] [device]",
         .help       = "show info of one block device or all block devices "
                       "(-n: show named nodes; -v: show details)",
-        .mhandler.cmd = hmp_info_block,
+        .cmd        = hmp_info_block,
     },
 
 STEXI
@@ -75,7 +75,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show block device statistics",
-        .mhandler.cmd = hmp_info_blockstats,
+        .cmd        = hmp_info_blockstats,
     },
 
 STEXI
@@ -89,7 +89,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show progress of ongoing block device operations",
-        .mhandler.cmd = hmp_info_block_jobs,
+        .cmd        = hmp_info_block_jobs,
     },
 
 STEXI
@@ -103,7 +103,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the cpu registers",
-        .mhandler.cmd = hmp_info_registers,
+        .cmd        = hmp_info_registers,
     },
 
 STEXI
@@ -118,7 +118,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show local apic state",
-        .mhandler.cmd = hmp_info_local_apic,
+        .cmd        = hmp_info_local_apic,
     },
 #endif
 
@@ -134,7 +134,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show io apic state",
-        .mhandler.cmd = hmp_info_io_apic,
+        .cmd        = hmp_info_io_apic,
     },
 #endif
 
@@ -149,7 +149,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show infos for each CPU",
-        .mhandler.cmd = hmp_info_cpus,
+        .cmd        = hmp_info_cpus,
     },
 
 STEXI
@@ -163,7 +163,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the command line history",
-        .mhandler.cmd = hmp_info_history,
+        .cmd        = hmp_info_history,
     },
 
 STEXI
@@ -172,20 +172,12 @@ STEXI
 Show the command line history.
 ETEXI
 
-#if defined(TARGET_I386) || defined(TARGET_PPC) || defined(TARGET_MIPS) || \
-    defined(TARGET_LM32) || (defined(TARGET_SPARC) && !defined(TARGET_SPARC64))
     {
         .name       = "irq",
         .args_type  = "",
         .params     = "",
         .help       = "show the interrupts statistics (if available)",
-#ifdef TARGET_SPARC
-        .mhandler.cmd = sun4m_hmp_info_irq,
-#elif defined(TARGET_LM32)
-        .mhandler.cmd = lm32_hmp_info_irq,
-#else
-        .mhandler.cmd = hmp_info_irq,
-#endif
+        .cmd        = hmp_info_irq,
     },
 
 STEXI
@@ -198,16 +190,9 @@ ETEXI
         .name       = "pic",
         .args_type  = "",
         .params     = "",
-        .help       = "show i8259 (PIC) state",
-#ifdef TARGET_SPARC
-        .mhandler.cmd = sun4m_hmp_info_pic,
-#elif defined(TARGET_LM32)
-        .mhandler.cmd = lm32_hmp_info_pic,
-#else
-        .mhandler.cmd = hmp_info_pic,
-#endif
+        .help       = "show PIC state",
+        .cmd        = hmp_info_pic,
     },
-#endif
 
 STEXI
 @item info pic
@@ -220,7 +205,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show PCI info",
-        .mhandler.cmd = hmp_info_pci,
+        .cmd        = hmp_info_pci,
     },
 
 STEXI
@@ -236,7 +221,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show virtual to physical memory mappings",
-        .mhandler.cmd = hmp_info_tlb,
+        .cmd        = hmp_info_tlb,
     },
 #endif
 
@@ -252,7 +237,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the active virtual memory mappings",
-        .mhandler.cmd = hmp_info_mem,
+        .cmd        = hmp_info_mem,
     },
 #endif
 
@@ -267,7 +252,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show memory tree",
-        .mhandler.cmd = hmp_info_mtree,
+        .cmd        = hmp_info_mtree,
     },
 
 STEXI
@@ -281,7 +266,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show dynamic compiler info",
-        .mhandler.cmd = hmp_info_jit,
+        .cmd        = hmp_info_jit,
     },
 
 STEXI
@@ -295,7 +280,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show dynamic compiler opcode counters",
-        .mhandler.cmd = hmp_info_opcount,
+        .cmd        = hmp_info_opcount,
     },
 
 STEXI
@@ -309,7 +294,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show KVM information",
-        .mhandler.cmd = hmp_info_kvm,
+        .cmd        = hmp_info_kvm,
     },
 
 STEXI
@@ -323,7 +308,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show NUMA information",
-        .mhandler.cmd = hmp_info_numa,
+        .cmd        = hmp_info_numa,
     },
 
 STEXI
@@ -337,7 +322,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show guest USB devices",
-        .mhandler.cmd = hmp_info_usb,
+        .cmd        = hmp_info_usb,
     },
 
 STEXI
@@ -351,7 +336,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show host USB devices",
-        .mhandler.cmd = hmp_info_usbhost,
+        .cmd        = hmp_info_usbhost,
     },
 
 STEXI
@@ -365,7 +350,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show profiling information",
-        .mhandler.cmd = hmp_info_profile,
+        .cmd        = hmp_info_profile,
     },
 
 STEXI
@@ -379,7 +364,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show capture information",
-        .mhandler.cmd = hmp_info_capture,
+        .cmd        = hmp_info_capture,
     },
 
 STEXI
@@ -393,7 +378,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the currently saved VM snapshots",
-        .mhandler.cmd = hmp_info_snapshots,
+        .cmd        = hmp_info_snapshots,
     },
 
 STEXI
@@ -407,7 +392,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the current VM status (running|paused)",
-        .mhandler.cmd = hmp_info_status,
+        .cmd        = hmp_info_status,
     },
 
 STEXI
@@ -421,7 +406,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show which guest mouse is receiving events",
-        .mhandler.cmd = hmp_info_mice,
+        .cmd        = hmp_info_mice,
     },
 
 STEXI
@@ -435,7 +420,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the vnc server status",
-        .mhandler.cmd = hmp_info_vnc,
+        .cmd        = hmp_info_vnc,
     },
 
 STEXI
@@ -450,7 +435,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the spice server status",
-        .mhandler.cmd = hmp_info_spice,
+        .cmd        = hmp_info_spice,
     },
 #endif
 
@@ -465,7 +450,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the current VM name",
-        .mhandler.cmd = hmp_info_name,
+        .cmd        = hmp_info_name,
     },
 
 STEXI
@@ -479,7 +464,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the current VM UUID",
-        .mhandler.cmd = hmp_info_uuid,
+        .cmd        = hmp_info_uuid,
     },
 
 STEXI
@@ -493,7 +478,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show CPU statistics",
-        .mhandler.cmd = hmp_info_cpustats,
+        .cmd        = hmp_info_cpustats,
     },
 
 STEXI
@@ -508,7 +493,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show user network stack connection states",
-        .mhandler.cmd = hmp_info_usernet,
+        .cmd        = hmp_info_usernet,
     },
 #endif
 
@@ -523,7 +508,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show migration status",
-        .mhandler.cmd = hmp_info_migrate,
+        .cmd        = hmp_info_migrate,
     },
 
 STEXI
@@ -537,7 +522,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show current migration capabilities",
-        .mhandler.cmd = hmp_info_migrate_capabilities,
+        .cmd        = hmp_info_migrate_capabilities,
     },
 
 STEXI
@@ -551,7 +536,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show current migration parameters",
-        .mhandler.cmd = hmp_info_migrate_parameters,
+        .cmd        = hmp_info_migrate_parameters,
     },
 
 STEXI
@@ -565,7 +550,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show current migration xbzrle cache size",
-        .mhandler.cmd = hmp_info_migrate_cache_size,
+        .cmd        = hmp_info_migrate_cache_size,
     },
 
 STEXI
@@ -579,7 +564,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show balloon information",
-        .mhandler.cmd = hmp_info_balloon,
+        .cmd        = hmp_info_balloon,
     },
 
 STEXI
@@ -593,7 +578,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show device tree",
-        .mhandler.cmd = hmp_info_qtree,
+        .cmd        = hmp_info_qtree,
     },
 
 STEXI
@@ -607,7 +592,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show qdev device model list",
-        .mhandler.cmd = hmp_info_qdm,
+        .cmd        = hmp_info_qdm,
     },
 
 STEXI
@@ -621,7 +606,7 @@ ETEXI
         .args_type  = "path:s?",
         .params     = "[path]",
         .help       = "show QOM composition tree",
-        .mhandler.cmd = hmp_info_qom_tree,
+        .cmd        = hmp_info_qom_tree,
     },
 
 STEXI
@@ -635,7 +620,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show roms",
-        .mhandler.cmd = hmp_info_roms,
+        .cmd        = hmp_info_roms,
     },
 
 STEXI
@@ -650,7 +635,7 @@ ETEXI
         .params     = "[name] [vcpu]",
         .help       = "show available trace-events & their state "
                       "(name: event name pattern; vcpu: vCPU to query, default is any)",
-        .mhandler.cmd = hmp_info_trace_events,
+        .cmd = hmp_info_trace_events,
         .command_completion = info_trace_events_completion,
     },
 
@@ -665,7 +650,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show the TPM device",
-        .mhandler.cmd = hmp_info_tpm,
+        .cmd        = hmp_info_tpm,
     },
 
 STEXI
@@ -679,7 +664,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show memory backends",
-        .mhandler.cmd = hmp_info_memdev,
+        .cmd        = hmp_info_memdev,
     },
 
 STEXI
@@ -693,7 +678,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show memory devices",
-        .mhandler.cmd = hmp_info_memory_devices,
+        .cmd        = hmp_info_memory_devices,
     },
 
 STEXI
@@ -707,7 +692,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "show iothreads",
-        .mhandler.cmd = hmp_info_iothreads,
+        .cmd        = hmp_info_iothreads,
     },
 
 STEXI
@@ -721,7 +706,7 @@ ETEXI
         .args_type  = "name:s",
         .params     = "name",
         .help       = "Show rocker switch",
-        .mhandler.cmd = hmp_rocker,
+        .cmd        = hmp_rocker,
     },
 
 STEXI
@@ -735,7 +720,7 @@ ETEXI
         .args_type  = "name:s",
         .params     = "name",
         .help       = "Show rocker ports",
-        .mhandler.cmd = hmp_rocker_ports,
+        .cmd        = hmp_rocker_ports,
     },
 
 STEXI
@@ -749,7 +734,7 @@ ETEXI
         .args_type  = "name:s,tbl_id:i?",
         .params     = "name [tbl_id]",
         .help       = "Show rocker OF-DPA flow tables",
-        .mhandler.cmd = hmp_rocker_of_dpa_flows,
+        .cmd        = hmp_rocker_of_dpa_flows,
     },
 
 STEXI
@@ -763,7 +748,7 @@ ETEXI
         .args_type  = "name:s,type:i?",
         .params     = "name [type]",
         .help       = "Show rocker OF-DPA groups",
-        .mhandler.cmd = hmp_rocker_of_dpa_groups,
+        .cmd        = hmp_rocker_of_dpa_groups,
     },
 
 STEXI
@@ -778,7 +763,7 @@ ETEXI
         .args_type  = "addr:l",
         .params     = "address",
         .help       = "Display the value of a storage key",
-        .mhandler.cmd = hmp_info_skeys,
+        .cmd        = hmp_info_skeys,
     },
 #endif
 
@@ -793,7 +778,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "Display the latest dump status",
-        .mhandler.cmd = hmp_info_dump,
+        .cmd        = hmp_info_dump,
     },
 
 STEXI
@@ -807,7 +792,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "Show information about hotpluggable CPUs",
-        .mhandler.cmd = hmp_hotpluggable_cpus,
+        .cmd        = hmp_hotpluggable_cpus,
     },
 
 STEXI
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 848efee5d1..88192817b2 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -14,7 +14,7 @@ ETEXI
         .args_type  = "name:S?",
         .params     = "[cmd]",
         .help       = "show the help",
-        .mhandler.cmd = do_help_cmd,
+        .cmd        = do_help_cmd,
     },
 
 STEXI
@@ -28,7 +28,7 @@ ETEXI
         .args_type  = "device:B",
         .params     = "device|all",
         .help       = "commit changes to the disk images (if -snapshot is used) or backing files",
-        .mhandler.cmd = hmp_commit,
+        .cmd        = hmp_commit,
     },
 
 STEXI
@@ -47,7 +47,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "quit the emulator",
-        .mhandler.cmd = hmp_quit,
+        .cmd        = hmp_quit,
     },
 
 STEXI
@@ -61,7 +61,7 @@ ETEXI
         .args_type  = "device:B,size:o",
         .params     = "device size",
         .help       = "resize a block image",
-        .mhandler.cmd = hmp_block_resize,
+        .cmd        = hmp_block_resize,
     },
 
 STEXI
@@ -78,7 +78,7 @@ ETEXI
         .args_type  = "device:B,speed:o?,base:s?",
         .params     = "device [speed [base]]",
         .help       = "copy data from a backing file into a block device",
-        .mhandler.cmd = hmp_block_stream,
+        .cmd        = hmp_block_stream,
     },
 
 STEXI
@@ -92,7 +92,7 @@ ETEXI
         .args_type  = "device:B,speed:o",
         .params     = "device speed",
         .help       = "set maximum speed for a background block operation",
-        .mhandler.cmd = hmp_block_job_set_speed,
+        .cmd        = hmp_block_job_set_speed,
     },
 
 STEXI
@@ -107,7 +107,7 @@ ETEXI
         .params     = "[-f] device",
         .help       = "stop an active background block operation (use -f"
                       "\n\t\t\t if the operation is currently paused)",
-        .mhandler.cmd = hmp_block_job_cancel,
+        .cmd        = hmp_block_job_cancel,
     },
 
 STEXI
@@ -121,7 +121,7 @@ ETEXI
         .args_type  = "device:B",
         .params     = "device",
         .help       = "stop an active background block operation",
-        .mhandler.cmd = hmp_block_job_complete,
+        .cmd        = hmp_block_job_complete,
     },
 
 STEXI
@@ -136,7 +136,7 @@ ETEXI
         .args_type  = "device:B",
         .params     = "device",
         .help       = "pause an active background block operation",
-        .mhandler.cmd = hmp_block_job_pause,
+        .cmd        = hmp_block_job_pause,
     },
 
 STEXI
@@ -150,7 +150,7 @@ ETEXI
         .args_type  = "device:B",
         .params     = "device",
         .help       = "resume a paused background block operation",
-        .mhandler.cmd = hmp_block_job_resume,
+        .cmd        = hmp_block_job_resume,
     },
 
 STEXI
@@ -164,7 +164,7 @@ ETEXI
         .args_type  = "force:-f,device:B",
         .params     = "[-f] device",
         .help       = "eject a removable medium (use -f to force it)",
-        .mhandler.cmd = hmp_eject,
+        .cmd        = hmp_eject,
     },
 
 STEXI
@@ -178,7 +178,7 @@ ETEXI
         .args_type  = "id:B",
         .params     = "device",
         .help       = "remove host block device",
-        .mhandler.cmd = hmp_drive_del,
+        .cmd        = hmp_drive_del,
     },
 
 STEXI
@@ -197,7 +197,7 @@ ETEXI
         .args_type  = "device:B,target:F,arg:s?,read-only-mode:s?",
         .params     = "device filename [format [read-only-mode]]",
         .help       = "change a removable medium, optional format",
-        .mhandler.cmd = hmp_change,
+        .cmd        = hmp_change,
     },
 
 STEXI
@@ -256,7 +256,7 @@ ETEXI
         .args_type  = "filename:F",
         .params     = "filename",
         .help       = "save screen into PPM image 'filename'",
-        .mhandler.cmd = hmp_screendump,
+        .cmd        = hmp_screendump,
     },
 
 STEXI
@@ -270,7 +270,7 @@ ETEXI
         .args_type  = "filename:F",
         .params     = "filename",
         .help       = "output logs to 'filename'",
-        .mhandler.cmd = hmp_logfile,
+        .cmd        = hmp_logfile,
     },
 
 STEXI
@@ -285,7 +285,7 @@ ETEXI
         .params     = "name on|off [vcpu]",
         .help       = "changes status of a specific trace event "
                       "(vcpu: vCPU to set, default is all)",
-        .mhandler.cmd = hmp_trace_event,
+        .cmd = hmp_trace_event,
         .command_completion = trace_event_completion,
     },
 
@@ -301,7 +301,7 @@ ETEXI
         .args_type  = "op:s?,arg:F?",
         .params     = "on|off|flush|set [arg]",
         .help       = "open, close, or flush trace file, or set a new file name",
-        .mhandler.cmd = hmp_trace_file,
+        .cmd        = hmp_trace_file,
     },
 
 STEXI
@@ -316,7 +316,7 @@ ETEXI
         .args_type  = "items:s",
         .params     = "item1[,...]",
         .help       = "activate logging of the specified items",
-        .mhandler.cmd = hmp_log,
+        .cmd        = hmp_log,
     },
 
 STEXI
@@ -330,7 +330,7 @@ ETEXI
         .args_type  = "name:s?",
         .params     = "[tag|id]",
         .help       = "save a VM snapshot. If no tag or id are provided, a new snapshot is created",
-        .mhandler.cmd = hmp_savevm,
+        .cmd        = hmp_savevm,
     },
 
 STEXI
@@ -347,7 +347,7 @@ ETEXI
         .args_type  = "name:s",
         .params     = "tag|id",
         .help       = "restore a VM snapshot from its tag or id",
-        .mhandler.cmd = hmp_loadvm,
+        .cmd        = hmp_loadvm,
         .command_completion = loadvm_completion,
     },
 
@@ -363,7 +363,7 @@ ETEXI
         .args_type  = "name:s",
         .params     = "tag|id",
         .help       = "delete a VM snapshot from its tag or id",
-        .mhandler.cmd = hmp_delvm,
+        .cmd        = hmp_delvm,
         .command_completion = delvm_completion,
     },
 
@@ -378,7 +378,7 @@ ETEXI
         .args_type  = "option:s?",
         .params     = "[on|off]",
         .help       = "run emulation in singlestep mode or switch to normal mode",
-        .mhandler.cmd = hmp_singlestep,
+        .cmd        = hmp_singlestep,
     },
 
 STEXI
@@ -393,7 +393,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "stop emulation",
-        .mhandler.cmd = hmp_stop,
+        .cmd        = hmp_stop,
     },
 
 STEXI
@@ -407,7 +407,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "resume emulation",
-        .mhandler.cmd = hmp_cont,
+        .cmd        = hmp_cont,
     },
 
 STEXI
@@ -421,7 +421,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "wakeup guest from suspend",
-        .mhandler.cmd = hmp_system_wakeup,
+        .cmd        = hmp_system_wakeup,
     },
 
 STEXI
@@ -435,7 +435,7 @@ ETEXI
         .args_type  = "device:s?",
         .params     = "[device]",
         .help       = "start gdbserver on given device (default 'tcp::1234'), stop with 'none'",
-        .mhandler.cmd = hmp_gdbserver,
+        .cmd        = hmp_gdbserver,
     },
 
 STEXI
@@ -449,7 +449,7 @@ ETEXI
         .args_type  = "fmt:/,addr:l",
         .params     = "/fmt addr",
         .help       = "virtual memory dump starting at 'addr'",
-        .mhandler.cmd = hmp_memory_dump,
+        .cmd        = hmp_memory_dump,
     },
 
 STEXI
@@ -463,7 +463,7 @@ ETEXI
         .args_type  = "fmt:/,addr:l",
         .params     = "/fmt addr",
         .help       = "physical memory dump starting at 'addr'",
-        .mhandler.cmd = hmp_physical_memory_dump,
+        .cmd        = hmp_physical_memory_dump,
     },
 
 STEXI
@@ -530,7 +530,7 @@ ETEXI
         .args_type  = "fmt:/,val:l",
         .params     = "/fmt expr",
         .help       = "print expression value (use $reg for CPU register access)",
-        .mhandler.cmd = do_print,
+        .cmd        = do_print,
     },
 
 STEXI
@@ -545,7 +545,7 @@ ETEXI
         .args_type  = "fmt:/,addr:i,index:i.",
         .params     = "/fmt addr",
         .help       = "I/O port read",
-        .mhandler.cmd = hmp_ioport_read,
+        .cmd        = hmp_ioport_read,
     },
 
 STEXI
@@ -559,7 +559,7 @@ ETEXI
         .args_type  = "fmt:/,addr:i,val:i",
         .params     = "/fmt addr value",
         .help       = "I/O port write",
-        .mhandler.cmd = hmp_ioport_write,
+        .cmd        = hmp_ioport_write,
     },
 
 STEXI
@@ -573,7 +573,7 @@ ETEXI
         .args_type  = "keys:s,hold-time:i?",
         .params     = "keys [hold_ms]",
         .help       = "send keys to the VM (e.g. 'sendkey ctrl-alt-f1', default hold time=100 ms)",
-        .mhandler.cmd = hmp_sendkey,
+        .cmd        = hmp_sendkey,
         .command_completion = sendkey_completion,
     },
 
@@ -596,7 +596,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "reset the system",
-        .mhandler.cmd = hmp_system_reset,
+        .cmd        = hmp_system_reset,
     },
 
 STEXI
@@ -610,7 +610,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "send system power down event",
-        .mhandler.cmd = hmp_system_powerdown,
+        .cmd        = hmp_system_powerdown,
     },
 
 STEXI
@@ -624,7 +624,7 @@ ETEXI
         .args_type  = "start:i,size:i",
         .params     = "addr size",
         .help       = "compute the checksum of a memory region",
-        .mhandler.cmd = hmp_sum,
+        .cmd        = hmp_sum,
     },
 
 STEXI
@@ -638,7 +638,7 @@ ETEXI
         .args_type  = "devname:s",
         .params     = "device",
         .help       = "add USB device (e.g. 'host:bus.addr' or 'host:vendor_id:product_id')",
-        .mhandler.cmd = hmp_usb_add,
+        .cmd        = hmp_usb_add,
     },
 
 STEXI
@@ -653,7 +653,7 @@ ETEXI
         .args_type  = "devname:s",
         .params     = "device",
         .help       = "remove USB device 'bus.addr'",
-        .mhandler.cmd = hmp_usb_del,
+        .cmd        = hmp_usb_del,
     },
 
 STEXI
@@ -669,7 +669,7 @@ ETEXI
         .args_type  = "device:O",
         .params     = "driver[,prop=value][,...]",
         .help       = "add device, like -device on the command line",
-        .mhandler.cmd = hmp_device_add,
+        .cmd        = hmp_device_add,
         .command_completion = device_add_completion,
     },
 
@@ -684,7 +684,7 @@ ETEXI
         .args_type  = "id:s",
         .params     = "device",
         .help       = "remove device",
-        .mhandler.cmd = hmp_device_del,
+        .cmd        = hmp_device_del,
         .command_completion = device_del_completion,
     },
 
@@ -700,7 +700,7 @@ ETEXI
         .args_type  = "index:i",
         .params     = "index",
         .help       = "set the default CPU",
-        .mhandler.cmd = hmp_cpu,
+        .cmd        = hmp_cpu,
     },
 
 STEXI
@@ -714,7 +714,7 @@ ETEXI
         .args_type  = "dx_str:s,dy_str:s,dz_str:s?",
         .params     = "dx dy [dz]",
         .help       = "send mouse move events",
-        .mhandler.cmd = hmp_mouse_move,
+        .cmd        = hmp_mouse_move,
     },
 
 STEXI
@@ -729,7 +729,7 @@ ETEXI
         .args_type  = "button_state:i",
         .params     = "state",
         .help       = "change mouse button state (1=L, 2=M, 4=R)",
-        .mhandler.cmd = hmp_mouse_button,
+        .cmd        = hmp_mouse_button,
     },
 
 STEXI
@@ -743,7 +743,7 @@ ETEXI
         .args_type  = "index:i",
         .params     = "index",
         .help       = "set which mouse device receives events",
-        .mhandler.cmd = hmp_mouse_set,
+        .cmd        = hmp_mouse_set,
     },
 
 STEXI
@@ -761,7 +761,7 @@ ETEXI
         .args_type  = "path:F,freq:i?,bits:i?,nchannels:i?",
         .params     = "path [frequency [bits [channels]]]",
         .help       = "capture audio to a wave file (default frequency=44100 bits=16 channels=2)",
-        .mhandler.cmd = hmp_wavcapture,
+        .cmd        = hmp_wavcapture,
     },
 STEXI
 @item wavcapture @var{filename} [@var{frequency} [@var{bits} [@var{channels}]]]
@@ -782,7 +782,7 @@ ETEXI
         .args_type  = "n:i",
         .params     = "capture index",
         .help       = "stop capture",
-        .mhandler.cmd = hmp_stopcapture,
+        .cmd        = hmp_stopcapture,
     },
 STEXI
 @item stopcapture @var{index}
@@ -798,7 +798,7 @@ ETEXI
         .args_type  = "val:l,size:i,filename:s",
         .params     = "addr size file",
         .help       = "save to disk virtual memory dump starting at 'addr' of size 'size'",
-        .mhandler.cmd = hmp_memsave,
+        .cmd        = hmp_memsave,
     },
 
 STEXI
@@ -812,7 +812,7 @@ ETEXI
         .args_type  = "val:l,size:i,filename:s",
         .params     = "addr size file",
         .help       = "save to disk physical memory dump starting at 'addr' of size 'size'",
-        .mhandler.cmd = hmp_pmemsave,
+        .cmd        = hmp_pmemsave,
     },
 
 STEXI
@@ -826,7 +826,7 @@ ETEXI
         .args_type  = "bootdevice:s",
         .params     = "bootdevice",
         .help       = "define new values for the boot device list",
-        .mhandler.cmd = hmp_boot_set,
+        .cmd        = hmp_boot_set,
     },
 
 STEXI
@@ -844,7 +844,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "inject an NMI",
-        .mhandler.cmd = hmp_nmi,
+        .cmd        = hmp_nmi,
     },
 STEXI
 @item nmi @var{cpu}
@@ -858,7 +858,7 @@ ETEXI
         .args_type  = "device:s,data:s",
         .params     = "device data",
         .help       = "Write to a ring buffer character device",
-        .mhandler.cmd = hmp_ringbuf_write,
+        .cmd        = hmp_ringbuf_write,
         .command_completion = ringbuf_write_completion,
     },
 
@@ -875,7 +875,7 @@ ETEXI
         .args_type  = "device:s,size:i",
         .params     = "device size",
         .help       = "Read from a ring buffer character device",
-        .mhandler.cmd = hmp_ringbuf_read,
+        .cmd        = hmp_ringbuf_read,
         .command_completion = ringbuf_write_completion,
     },
 
@@ -901,7 +901,7 @@ ETEXI
 		      " full copy of disk\n\t\t\t -i for migration without "
 		      "shared storage with incremental copy of disk "
 		      "(base image shared between src and destination)",
-        .mhandler.cmd = hmp_migrate,
+        .cmd        = hmp_migrate,
     },
 
 
@@ -918,7 +918,7 @@ ETEXI
         .args_type  = "",
         .params     = "",
         .help       = "cancel the current VM migration",
-        .mhandler.cmd = hmp_migrate_cancel,
+        .cmd        = hmp_migrate_cancel,
     },
 
 STEXI
@@ -933,7 +933,7 @@ ETEXI
         .args_type  = "uri:s",
         .params     = "uri",
         .help       = "Continue an incoming migration from an -incoming defer",
-        .mhandler.cmd = hmp_migrate_incoming,
+        .cmd        = hmp_migrate_incoming,
     },
 
 STEXI
@@ -954,7 +954,7 @@ ETEXI
                       "The cache size affects the number of cache misses."
                       "In case of a high cache miss ratio you need to increase"
                       " the cache size",
-        .mhandler.cmd = hmp_migrate_set_cache_size,
+        .cmd        = hmp_migrate_set_cache_size,
     },
 
 STEXI
@@ -969,7 +969,7 @@ ETEXI
         .params     = "value",
         .help       = "set maximum speed (in bytes) for migrations. "
 	"Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
-        .mhandler.cmd = hmp_migrate_set_speed,
+        .cmd        = hmp_migrate_set_speed,
     },
 
 STEXI
@@ -983,7 +983,7 @@ ETEXI
         .args_type  = "value:T",
         .params     = "value",
         .help       = "set maximum tolerated downtime (in seconds) for migrations",
-        .mhandler.cmd = hmp_migrate_set_downtime,
+        .cmd        = hmp_migrate_set_downtime,
     },
 
 STEXI
@@ -997,7 +997,7 @@ ETEXI
         .args_type  = "capability:s,state:b",
         .params     = "capability state",
         .help       = "Enable/Disable the usage of a capability for migration",
-        .mhandler.cmd = hmp_migrate_set_capability,
+        .cmd        = hmp_migrate_set_capability,
         .command_completion = migrate_set_capability_completion,
     },
 
@@ -1012,7 +1012,7 @@ ETEXI
         .args_type  = "parameter:s,value:s",
         .params     = "parameter value",
         .help       = "Set the parameter for migration",
-        .mhandler.cmd = hmp_migrate_set_parameter,
+        .cmd        = hmp_migrate_set_parameter,
         .command_completion = migrate_set_parameter_completion,
     },
 
@@ -1029,7 +1029,7 @@ ETEXI
         .help       = "Followup to a migration command to switch the migration"
                       " to postcopy mode. The postcopy-ram capability must "
                       "be set before the original migration command.",
-        .mhandler.cmd = hmp_migrate_start_postcopy,
+        .cmd        = hmp_migrate_start_postcopy,
     },
 
 STEXI
@@ -1037,6 +1037,21 @@ STEXI
 @findex migrate_start_postcopy
 Switch in-progress migration to postcopy mode. Ignored after the end of
 migration (or once already in postcopy).
+ETEXI
+
+    {
+        .name       = "x_colo_lost_heartbeat",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Tell COLO that heartbeat is lost,\n\t\t\t"
+                      "a failover or takeover is needed.",
+        .cmd = hmp_x_colo_lost_heartbeat,
+    },
+
+STEXI
+@item x_colo_lost_heartbeat
+@findex x_colo_lost_heartbeat
+Tell COLO that heartbeat is lost, a failover or takeover is needed.
 ETEXI
 
     {
@@ -1044,7 +1059,7 @@ ETEXI
         .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
         .params     = "protocol hostname port tls-port cert-subject",
         .help       = "set migration information for remote display",
-        .mhandler.cmd = hmp_client_migrate_info,
+        .cmd        = hmp_client_migrate_info,
     },
 
 STEXI
@@ -1067,7 +1082,7 @@ ETEXI
                       "-s: dump in kdump-compressed format, with snappy compression.\n\t\t\t"
                       "begin: the starting physical address.\n\t\t\t"
                       "length: the memory size, in bytes.",
-        .mhandler.cmd = hmp_dump_guest_memory,
+        .cmd        = hmp_dump_guest_memory,
     },
 
 
@@ -1094,7 +1109,7 @@ ETEXI
         .args_type  = "filename:F",
         .params     = "",
         .help       = "Save guest storage keys into file 'filename'.\n",
-        .mhandler.cmd = hmp_dump_skeys,
+        .cmd        = hmp_dump_skeys,
     },
 #endif
 
@@ -1116,7 +1131,7 @@ ETEXI
                       "The default format is qcow2.  The -n flag requests QEMU\n\t\t\t"
                       "to reuse the image found in new-image-file, instead of\n\t\t\t"
                       "recreating it from scratch.",
-        .mhandler.cmd = hmp_snapshot_blkdev,
+        .cmd        = hmp_snapshot_blkdev,
     },
 
 STEXI
@@ -1132,7 +1147,7 @@ ETEXI
         .help       = "take an internal snapshot of device.\n\t\t\t"
                       "The format of the image used by device must\n\t\t\t"
                       "support it, such as qcow2.\n\t\t\t",
-        .mhandler.cmd = hmp_snapshot_blkdev_internal,
+        .cmd        = hmp_snapshot_blkdev_internal,
     },
 
 STEXI
@@ -1150,7 +1165,7 @@ ETEXI
                       "the snapshot matching both id and name.\n\t\t\t"
                       "The format of the image used by device must\n\t\t\t"
                       "support it, such as qcow2.\n\t\t\t",
-        .mhandler.cmd = hmp_snapshot_delete_blkdev_internal,
+        .cmd        = hmp_snapshot_delete_blkdev_internal,
     },
 
 STEXI
@@ -1171,7 +1186,7 @@ ETEXI
                       "in new-image-file, instead of recreating it from scratch.\n\t\t\t"
                       "The -f flag requests QEMU to copy the whole disk,\n\t\t\t"
                       "so that the result does not need a backing file.\n\t\t\t",
-        .mhandler.cmd = hmp_drive_mirror,
+        .cmd        = hmp_drive_mirror,
     },
 STEXI
 @item drive_mirror
@@ -1182,8 +1197,8 @@ ETEXI
 
     {
         .name       = "drive_backup",
-        .args_type  = "reuse:-n,full:-f,device:B,target:s,format:s?",
-        .params     = "[-n] [-f] device target [format]",
+        .args_type  = "reuse:-n,full:-f,compress:-c,device:B,target:s,format:s?",
+        .params     = "[-n] [-f] [-c] device target [format]",
         .help       = "initiates a point-in-time\n\t\t\t"
                       "copy for a device. The device's contents are\n\t\t\t"
                       "copied to the new image file, excluding data that\n\t\t\t"
@@ -1191,8 +1206,10 @@ ETEXI
                       "The -n flag requests QEMU to reuse the image found\n\t\t\t"
                       "in new-image-file, instead of recreating it from scratch.\n\t\t\t"
                       "The -f flag requests QEMU to copy the whole disk,\n\t\t\t"
-                      "so that the result does not need a backing file.\n\t\t\t",
-        .mhandler.cmd = hmp_drive_backup,
+                      "so that the result does not need a backing file.\n\t\t\t"
+                      "The -c flag requests QEMU to compress backup data\n\t\t\t"
+                      "(if the target format supports it).\n\t\t\t",
+        .cmd        = hmp_drive_backup,
     },
 STEXI
 @item drive_backup
@@ -1210,7 +1227,7 @@ ETEXI
                       "[,snapshot=on|off][,cache=on|off]\n"
                       "[,readonly=on|off][,copy-on-read=on|off]",
         .help       = "add drive to PCI storage controller",
-        .mhandler.cmd = hmp_drive_add,
+        .cmd        = hmp_drive_add,
     },
 
 STEXI
@@ -1234,7 +1251,7 @@ ETEXI
                       "<error_status> = error string or 32bit\n\t\t\t"
                       "<tlb header> = 32bit x 4\n\t\t\t"
                       "<tlb header prefix> = 32bit x 4",
-        .mhandler.cmd = hmp_pcie_aer_inject_error,
+        .cmd        = hmp_pcie_aer_inject_error,
     },
 
 STEXI
@@ -1248,7 +1265,7 @@ ETEXI
         .args_type  = "device:s,opts:s?",
         .params     = "tap|user|socket|vde|netmap|bridge|vhost-user|dump [options]",
         .help       = "add host VLAN client",
-        .mhandler.cmd = hmp_host_net_add,
+        .cmd        = hmp_host_net_add,
         .command_completion = host_net_add_completion,
     },
 
@@ -1263,7 +1280,7 @@ ETEXI
         .args_type  = "vlan_id:i,device:s",
         .params     = "vlan_id name",
         .help       = "remove host VLAN client",
-        .mhandler.cmd = hmp_host_net_remove,
+        .cmd        = hmp_host_net_remove,
         .command_completion = host_net_remove_completion,
     },
 
@@ -1278,7 +1295,7 @@ ETEXI
         .args_type  = "netdev:O",
         .params     = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
         .help       = "add host network device",
-        .mhandler.cmd = hmp_netdev_add,
+        .cmd        = hmp_netdev_add,
         .command_completion = netdev_add_completion,
     },
 
@@ -1293,7 +1310,7 @@ ETEXI
         .args_type  = "id:s",
         .params     = "id",
         .help       = "remove host network device",
-        .mhandler.cmd = hmp_netdev_del,
+        .cmd        = hmp_netdev_del,
         .command_completion = netdev_del_completion,
     },
 
@@ -1308,7 +1325,7 @@ ETEXI
         .args_type  = "object:O",
         .params     = "[qom-type=]type,id=str[,prop=value][,...]",
         .help       = "create QOM object",
-        .mhandler.cmd = hmp_object_add,
+        .cmd        = hmp_object_add,
         .command_completion = object_add_completion,
     },
 
@@ -1323,7 +1340,7 @@ ETEXI
         .args_type  = "id:s",
         .params     = "id",
         .help       = "destroy QOM object",
-        .mhandler.cmd = hmp_object_del,
+        .cmd        = hmp_object_del,
         .command_completion = object_del_completion,
     },
 
@@ -1339,7 +1356,7 @@ ETEXI
         .args_type  = "arg1:s,arg2:s?,arg3:s?",
         .params     = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
         .help       = "redirect TCP or UDP connections from host to guest (requires -net user)",
-        .mhandler.cmd = hmp_hostfwd_add,
+        .cmd        = hmp_hostfwd_add,
     },
 #endif
 STEXI
@@ -1354,7 +1371,7 @@ ETEXI
         .args_type  = "arg1:s,arg2:s?,arg3:s?",
         .params     = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
         .help       = "remove host-to-guest TCP or UDP redirection",
-        .mhandler.cmd = hmp_hostfwd_remove,
+        .cmd        = hmp_hostfwd_remove,
     },
 
 #endif
@@ -1369,7 +1386,7 @@ ETEXI
         .args_type  = "value:M",
         .params     = "target",
         .help       = "request VM to change its memory allocation (in MB)",
-        .mhandler.cmd = hmp_balloon,
+        .cmd        = hmp_balloon,
     },
 
 STEXI
@@ -1383,7 +1400,7 @@ ETEXI
         .args_type  = "name:s,up:b",
         .params     = "name on|off",
         .help       = "change the link status of a network adapter",
-        .mhandler.cmd = hmp_set_link,
+        .cmd        = hmp_set_link,
         .command_completion = set_link_completion,
     },
 
@@ -1398,7 +1415,7 @@ ETEXI
         .args_type  = "action:s",
         .params     = "[reset|shutdown|poweroff|pause|debug|none]",
         .help       = "change watchdog action",
-        .mhandler.cmd = hmp_watchdog_action,
+        .cmd        = hmp_watchdog_action,
         .command_completion = watchdog_action_completion,
     },
 
@@ -1413,7 +1430,7 @@ ETEXI
         .args_type  = "aclname:s",
         .params     = "aclname",
         .help       = "list rules in the access control list",
-        .mhandler.cmd = hmp_acl_show,
+        .cmd        = hmp_acl_show,
     },
 
 STEXI
@@ -1430,7 +1447,7 @@ ETEXI
         .args_type  = "aclname:s,policy:s",
         .params     = "aclname allow|deny",
         .help       = "set default access control list policy",
-        .mhandler.cmd = hmp_acl_policy,
+        .cmd        = hmp_acl_policy,
     },
 
 STEXI
@@ -1446,7 +1463,7 @@ ETEXI
         .args_type  = "aclname:s,match:s,policy:s,index:i?",
         .params     = "aclname match allow|deny [index]",
         .help       = "add a match rule to the access control list",
-        .mhandler.cmd = hmp_acl_add,
+        .cmd        = hmp_acl_add,
     },
 
 STEXI
@@ -1465,7 +1482,7 @@ ETEXI
         .args_type  = "aclname:s,match:s",
         .params     = "aclname match",
         .help       = "remove a match rule from the access control list",
-        .mhandler.cmd = hmp_acl_remove,
+        .cmd        = hmp_acl_remove,
     },
 
 STEXI
@@ -1479,7 +1496,7 @@ ETEXI
         .args_type  = "aclname:s",
         .params     = "aclname",
         .help       = "reset the access control list",
-        .mhandler.cmd = hmp_acl_reset,
+        .cmd        = hmp_acl_reset,
     },
 
 STEXI
@@ -1494,7 +1511,7 @@ ETEXI
         .args_type  = "all:-a,writable:-w,uri:s",
         .params     = "nbd_server_start [-a] [-w] host:port",
         .help       = "serve block devices on the given host and port",
-        .mhandler.cmd = hmp_nbd_server_start,
+        .cmd        = hmp_nbd_server_start,
     },
 STEXI
 @item nbd_server_start @var{host}:@var{port}
@@ -1510,7 +1527,7 @@ ETEXI
         .args_type  = "writable:-w,device:B",
         .params     = "nbd_server_add [-w] device",
         .help       = "export a block device via NBD",
-        .mhandler.cmd = hmp_nbd_server_add,
+        .cmd        = hmp_nbd_server_add,
     },
 STEXI
 @item nbd_server_add @var{device}
@@ -1525,7 +1542,7 @@ ETEXI
         .args_type  = "",
         .params     = "nbd_server_stop",
         .help       = "stop serving block devices using the NBD protocol",
-        .mhandler.cmd = hmp_nbd_server_stop,
+        .cmd        = hmp_nbd_server_stop,
     },
 STEXI
 @item nbd_server_stop
@@ -1541,7 +1558,7 @@ ETEXI
         .args_type  = "broadcast:-b,cpu_index:i,bank:i,status:l,mcg_status:l,addr:l,misc:l",
         .params     = "[-b] cpu bank status mcgstatus addr misc",
         .help       = "inject a MCE on the given CPU [and broadcast to other CPUs with -b option]",
-        .mhandler.cmd = hmp_mce,
+        .cmd        = hmp_mce,
     },
 
 #endif
@@ -1556,7 +1573,7 @@ ETEXI
         .args_type  = "fdname:s",
         .params     = "getfd name",
         .help       = "receive a file descriptor via SCM rights and assign it a name",
-        .mhandler.cmd = hmp_getfd,
+        .cmd        = hmp_getfd,
     },
 
 STEXI
@@ -1572,7 +1589,7 @@ ETEXI
         .args_type  = "fdname:s",
         .params     = "closefd name",
         .help       = "close a file descriptor previously passed via SCM rights",
-        .mhandler.cmd = hmp_closefd,
+        .cmd        = hmp_closefd,
     },
 
 STEXI
@@ -1588,7 +1605,7 @@ ETEXI
         .args_type  = "device:B,password:s",
         .params     = "block_passwd device password",
         .help       = "set the password of encrypted block devices",
-        .mhandler.cmd = hmp_block_passwd,
+        .cmd        = hmp_block_passwd,
     },
 
 STEXI
@@ -1602,7 +1619,7 @@ ETEXI
         .args_type  = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l",
         .params     = "device bps bps_rd bps_wr iops iops_rd iops_wr",
         .help       = "change I/O throttle limits for a block drive",
-        .mhandler.cmd = hmp_block_set_io_throttle,
+        .cmd        = hmp_block_set_io_throttle,
     },
 
 STEXI
@@ -1616,7 +1633,7 @@ ETEXI
         .args_type  = "protocol:s,password:s,connected:s?",
         .params     = "protocol password action-if-connected",
         .help       = "set spice/vnc password",
-        .mhandler.cmd = hmp_set_password,
+        .cmd        = hmp_set_password,
     },
 
 STEXI
@@ -1635,7 +1652,7 @@ ETEXI
         .args_type  = "protocol:s,time:s",
         .params     = "protocol time",
         .help       = "set spice/vnc password expire-time",
-        .mhandler.cmd = hmp_expire_password,
+        .cmd        = hmp_expire_password,
     },
 
 STEXI
@@ -1666,7 +1683,7 @@ ETEXI
         .args_type  = "args:s",
         .params     = "args",
         .help       = "add chardev",
-        .mhandler.cmd = hmp_chardev_add,
+        .cmd        = hmp_chardev_add,
         .command_completion = chardev_add_completion,
     },
 
@@ -1682,7 +1699,7 @@ ETEXI
         .args_type  = "id:s",
         .params     = "id",
         .help       = "remove chardev",
-        .mhandler.cmd = hmp_chardev_remove,
+        .cmd        = hmp_chardev_remove,
         .command_completion = chardev_remove_completion,
     },
 
@@ -1698,7 +1715,7 @@ ETEXI
         .args_type  = "device:B,command:s",
         .params     = "[device] \"[command]\"",
         .help       = "run a qemu-io command on a block device",
-        .mhandler.cmd = hmp_qemu_io,
+        .cmd        = hmp_qemu_io,
     },
 
 STEXI
@@ -1713,7 +1730,7 @@ ETEXI
         .args_type  = "id:i",
         .params     = "id",
         .help       = "add cpu",
-        .mhandler.cmd  = hmp_cpu_add,
+        .cmd        = hmp_cpu_add,
     },
 
 STEXI
@@ -1727,7 +1744,7 @@ ETEXI
         .args_type  = "path:s?",
         .params     = "path",
         .help       = "list QOM properties",
-        .mhandler.cmd  = hmp_qom_list,
+        .cmd        = hmp_qom_list,
     },
 
 STEXI
@@ -1740,7 +1757,7 @@ ETEXI
         .args_type  = "path:s,property:s,value:s",
         .params     = "path property value",
         .help       = "set QOM property",
-        .mhandler.cmd  = hmp_qom_set,
+        .cmd        = hmp_qom_set,
     },
 
 STEXI
@@ -1753,8 +1770,8 @@ ETEXI
         .args_type  = "item:s?",
         .params     = "[subcommand]",
         .help       = "show various information about the system state",
-        .mhandler.cmd = hmp_info_help,
-        .sub_table = info_cmds,
+        .cmd        = hmp_info_help,
+        .sub_table  = info_cmds,
     },
 
 STEXI
diff --git a/hmp.c b/hmp.c
index cc2056e9e2..b86961705d 100644
--- a/hmp.c
+++ b/hmp.c
@@ -36,6 +36,7 @@
 #include "qemu-io.h"
 #include "qemu/cutils.h"
 #include "qemu/error-report.h"
+#include "hw/intc/intc.h"
 
 #ifdef CONFIG_SPICE
 #include <spice/enums.h>
@@ -283,27 +284,44 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
 
     if (params) {
         monitor_printf(mon, "parameters:");
+        assert(params->has_compress_level);
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_LEVEL],
             params->compress_level);
+        assert(params->has_compress_threads);
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_COMPRESS_THREADS],
             params->compress_threads);
+        assert(params->has_decompress_threads);
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS],
             params->decompress_threads);
+        assert(params->has_cpu_throttle_initial);
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL],
             params->cpu_throttle_initial);
+        assert(params->has_cpu_throttle_increment);
         monitor_printf(mon, " %s: %" PRId64,
             MigrationParameter_lookup[MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT],
             params->cpu_throttle_increment);
         monitor_printf(mon, " %s: '%s'",
             MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_CREDS],
-            params->tls_creds ? : "");
+            params->has_tls_creds ? params->tls_creds : "");
         monitor_printf(mon, " %s: '%s'",
             MigrationParameter_lookup[MIGRATION_PARAMETER_TLS_HOSTNAME],
-            params->tls_hostname ? : "");
+            params->has_tls_hostname ? params->tls_hostname : "");
+        assert(params->has_max_bandwidth);
+        monitor_printf(mon, " %s: %" PRId64 " bytes/second",
+            MigrationParameter_lookup[MIGRATION_PARAMETER_MAX_BANDWIDTH],
+            params->max_bandwidth);
+        assert(params->has_downtime_limit);
+        monitor_printf(mon, " %s: %" PRId64 " milliseconds",
+            MigrationParameter_lookup[MIGRATION_PARAMETER_DOWNTIME_LIMIT],
+            params->downtime_limit);
+        assert(params->has_x_checkpoint_delay);
+        monitor_printf(mon, " %s: %" PRId64,
+            MigrationParameter_lookup[MIGRATION_PARAMETER_X_CHECKPOINT_DELAY],
+            params->x_checkpoint_delay);
         monitor_printf(mon, "\n");
     }
 
@@ -787,6 +805,70 @@ static void hmp_info_pci_device(Monitor *mon, const PciDeviceInfo *dev)
     }
 }
 
+static int hmp_info_irq_foreach(Object *obj, void *opaque)
+{
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    Monitor *mon = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        uint64_t *irq_counts;
+        unsigned int nb_irqs, i;
+        if (k->get_statistics &&
+            k->get_statistics(intc, &irq_counts, &nb_irqs)) {
+            if (nb_irqs > 0) {
+                monitor_printf(mon, "IRQ statistics for %s:\n",
+                               object_get_typename(obj));
+                for (i = 0; i < nb_irqs; i++) {
+                    if (irq_counts[i] > 0) {
+                        monitor_printf(mon, "%2d: %" PRId64 "\n", i,
+                                       irq_counts[i]);
+                    }
+                }
+            }
+        } else {
+            monitor_printf(mon, "IRQ statistics not available for %s.\n",
+                           object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+void hmp_info_irq(Monitor *mon, const QDict *qdict)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_irq_foreach, mon);
+}
+
+static int hmp_info_pic_foreach(Object *obj, void *opaque)
+{
+    InterruptStatsProvider *intc;
+    InterruptStatsProviderClass *k;
+    Monitor *mon = opaque;
+
+    if (object_dynamic_cast(obj, TYPE_INTERRUPT_STATS_PROVIDER)) {
+        intc = INTERRUPT_STATS_PROVIDER(obj);
+        k = INTERRUPT_STATS_PROVIDER_GET_CLASS(obj);
+        if (k->print_info) {
+            k->print_info(intc, mon);
+        } else {
+            monitor_printf(mon, "Interrupt controller information not available for %s.\n",
+                           object_get_typename(obj));
+        }
+    }
+
+    return 0;
+}
+
+void hmp_info_pic(Monitor *mon, const QDict *qdict)
+{
+    object_child_foreach_recursive(object_get_root(),
+                                   hmp_info_pic_foreach, mon);
+}
+
 void hmp_info_pci(Monitor *mon, const QDict *qdict)
 {
     PciInfoList *info_list, *info;
@@ -1109,8 +1191,19 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
     const char *format = qdict_get_try_str(qdict, "format");
     bool reuse = qdict_get_try_bool(qdict, "reuse", false);
     bool full = qdict_get_try_bool(qdict, "full", false);
-    enum NewImageMode mode;
+    bool compress = qdict_get_try_bool(qdict, "compress", false);
     Error *err = NULL;
+    DriveBackup backup = {
+        .device = (char *)device,
+        .target = (char *)filename,
+        .has_format = !!format,
+        .format = (char *)format,
+        .sync = full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
+        .has_mode = true,
+        .mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS,
+        .has_compress = !!compress,
+        .compress = compress,
+    };
 
     if (!filename) {
         error_setg(&err, QERR_MISSING_PARAMETER, "target");
@@ -1118,16 +1211,7 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
         return;
     }
 
-    if (reuse) {
-        mode = NEW_IMAGE_MODE_EXISTING;
-    } else {
-        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-    }
-
-    qmp_drive_backup(false, NULL, device, filename, !!format, format,
-                     full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, NULL,
-                     false, 0, false, 0, &err);
+    qmp_drive_backup(&backup, &err);
     hmp_handle_error(mon, &err);
 }
 
@@ -1193,6 +1277,7 @@ void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &err);
 }
 
+/* Kept for backwards compatibility */
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict)
 {
     double value = qdict_get_double(qdict, "value");
@@ -1211,6 +1296,7 @@ void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict)
     }
 }
 
+/* Kept for backwards compatibility */
 void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict)
 {
     int64_t value = qdict_get_int(qdict, "value");
@@ -1251,45 +1337,62 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
 {
     const char *param = qdict_get_str(qdict, "parameter");
     const char *valuestr = qdict_get_str(qdict, "value");
+    int64_t valuebw = 0;
     long valueint = 0;
+    char *endp;
     Error *err = NULL;
-    bool has_compress_level = false;
-    bool has_compress_threads = false;
-    bool has_decompress_threads = false;
-    bool has_cpu_throttle_initial = false;
-    bool has_cpu_throttle_increment = false;
-    bool has_tls_creds = false;
-    bool has_tls_hostname = false;
     bool use_int_value = false;
     int i;
 
     for (i = 0; i < MIGRATION_PARAMETER__MAX; i++) {
         if (strcmp(param, MigrationParameter_lookup[i]) == 0) {
+            MigrationParameters p = { 0 };
             switch (i) {
             case MIGRATION_PARAMETER_COMPRESS_LEVEL:
-                has_compress_level = true;
+                p.has_compress_level = true;
                 use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_COMPRESS_THREADS:
-                has_compress_threads = true;
+                p.has_compress_threads = true;
                 use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
-                has_decompress_threads = true;
+                p.has_decompress_threads = true;
                 use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_CPU_THROTTLE_INITIAL:
-                has_cpu_throttle_initial = true;
+                p.has_cpu_throttle_initial = true;
                 use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_CPU_THROTTLE_INCREMENT:
-                has_cpu_throttle_increment = true;
+                p.has_cpu_throttle_increment = true;
+                use_int_value = true;
                 break;
             case MIGRATION_PARAMETER_TLS_CREDS:
-                has_tls_creds = true;
+                p.has_tls_creds = true;
+                p.tls_creds = (char *) valuestr;
                 break;
             case MIGRATION_PARAMETER_TLS_HOSTNAME:
-                has_tls_hostname = true;
+                p.has_tls_hostname = true;
+                p.tls_hostname = (char *) valuestr;
+                break;
+            case MIGRATION_PARAMETER_MAX_BANDWIDTH:
+                p.has_max_bandwidth = true;
+                valuebw = qemu_strtosz(valuestr, &endp);
+                if (valuebw < 0 || (size_t)valuebw != valuebw
+                    || *endp != '\0') {
+                    error_setg(&err, "Invalid size %s", valuestr);
+                    goto cleanup;
+                }
+                p.max_bandwidth = valuebw;
+                break;
+            case MIGRATION_PARAMETER_DOWNTIME_LIMIT:
+                p.has_downtime_limit = true;
+                use_int_value = true;
+                break;
+            case MIGRATION_PARAMETER_X_CHECKPOINT_DELAY:
+                p.has_x_checkpoint_delay = true;
+                use_int_value = true;
                 break;
             }
 
@@ -1299,16 +1402,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
                                valuestr);
                     goto cleanup;
                 }
+                /* Set all integers; only one has_FOO will be set, and
+                 * the code ignores the remaining values */
+                p.compress_level = valueint;
+                p.compress_threads = valueint;
+                p.decompress_threads = valueint;
+                p.cpu_throttle_initial = valueint;
+                p.cpu_throttle_increment = valueint;
+                p.downtime_limit = valueint;
+                p.x_checkpoint_delay = valueint;
             }
 
-            qmp_migrate_set_parameters(has_compress_level, valueint,
-                                       has_compress_threads, valueint,
-                                       has_decompress_threads, valueint,
-                                       has_cpu_throttle_initial, valueint,
-                                       has_cpu_throttle_increment, valueint,
-                                       has_tls_creds, valuestr,
-                                       has_tls_hostname, valuestr,
-                                       &err);
+            qmp_migrate_set_parameters(&p, &err);
             break;
         }
     }
@@ -1347,6 +1452,14 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &err);
 }
 
+void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+
+    qmp_x_colo_lost_heartbeat(&err);
+    hmp_handle_error(mon, &err);
+}
+
 void hmp_set_password(Monitor *mon, const QDict *qdict)
 {
     const char *protocol  = qdict_get_str(qdict, "protocol");
@@ -1374,7 +1487,7 @@ void hmp_eject(Monitor *mon, const QDict *qdict)
     const char *device = qdict_get_str(qdict, "device");
     Error *err = NULL;
 
-    qmp_eject(device, true, force, &err);
+    qmp_eject(true, device, false, NULL, true, force, &err);
     hmp_handle_error(mon, &err);
 }
 
@@ -1420,8 +1533,9 @@ void hmp_change(Monitor *mon, const QDict *qdict)
             }
         }
 
-        qmp_blockdev_change_medium(device, target, !!arg, arg,
-                                   !!read_only, read_only_mode, &err);
+        qmp_blockdev_change_medium(true, device, false, NULL, target,
+                                   !!arg, arg, !!read_only, read_only_mode,
+                                   &err);
         if (err &&
             error_get_class(err) == ERROR_CLASS_DEVICE_ENCRYPTED) {
             error_free(err);
@@ -1457,8 +1571,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
     const char *base = qdict_get_try_str(qdict, "base");
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
-    qmp_block_stream(false, NULL, device, base != NULL, base, false, NULL,
-                     qdict_haskey(qdict, "speed"), speed,
+    qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
+                     false, NULL, qdict_haskey(qdict, "speed"), speed,
                      true, BLOCKDEV_ON_ERROR_REPORT, &error);
 
     hmp_handle_error(mon, &error);
@@ -1905,7 +2019,8 @@ void hmp_chardev_add(Monitor *mon, const QDict *qdict)
     if (opts == NULL) {
         error_setg(&err, "Parsing chardev args failed");
     } else {
-        qemu_chr_new_from_opts(opts, NULL, &err);
+        qemu_chr_new_from_opts(opts, &err);
+        qemu_opts_del(opts);
     }
     hmp_handle_error(mon, &err);
 }
@@ -1921,23 +2036,32 @@ void hmp_chardev_remove(Monitor *mon, const QDict *qdict)
 void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 {
     BlockBackend *blk;
+    BlockBackend *local_blk = NULL;
+    AioContext *aio_context;
     const char* device = qdict_get_str(qdict, "device");
     const char* command = qdict_get_str(qdict, "command");
     Error *err = NULL;
 
     blk = blk_by_name(device);
-    if (blk) {
-        AioContext *aio_context = blk_get_aio_context(blk);
-        aio_context_acquire(aio_context);
+    if (!blk) {
+        BlockDriverState *bs = bdrv_lookup_bs(NULL, device, &err);
+        if (bs) {
+            blk = local_blk = blk_new();
+            blk_insert_bs(blk, bs);
+        } else {
+            goto fail;
+        }
+    }
 
-        qemuio_command(blk, command);
+    aio_context = blk_get_aio_context(blk);
+    aio_context_acquire(aio_context);
 
-        aio_context_release(aio_context);
-    } else {
-        error_set(&err, ERROR_CLASS_DEVICE_NOT_FOUND,
-                  "Device '%s' not found", device);
-    }
+    qemuio_command(blk, command);
+
+    aio_context_release(aio_context);
 
+fail:
+    blk_unref(local_blk);
     hmp_handle_error(mon, &err);
 }
 
diff --git a/hmp.h b/hmp.h
index 0876ec03a1..05daf7cd5c 100644
--- a/hmp.h
+++ b/hmp.h
@@ -36,6 +36,8 @@ void hmp_info_blockstats(Monitor *mon, const QDict *qdict);
 void hmp_info_vnc(Monitor *mon, const QDict *qdict);
 void hmp_info_spice(Monitor *mon, const QDict *qdict);
 void hmp_info_balloon(Monitor *mon, const QDict *qdict);
+void hmp_info_irq(Monitor *mon, const QDict *qdict);
+void hmp_info_pic(Monitor *mon, const QDict *qdict);
 void hmp_info_pci(Monitor *mon, const QDict *qdict);
 void hmp_info_block_jobs(Monitor *mon, const QDict *qdict);
 void hmp_info_tpm(Monitor *mon, const QDict *qdict);
@@ -70,6 +72,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
 void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
 void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict);
+void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
 void hmp_set_password(Monitor *mon, const QDict *qdict);
 void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_eject(Monitor *mon, const QDict *qdict);
diff --git a/hw/9pfs/9p-handle.c b/hw/9pfs/9p-handle.c
index 3d77594f92..1687661bc9 100644
--- a/hw/9pfs/9p-handle.c
+++ b/hw/9pfs/9p-handle.c
@@ -649,6 +649,14 @@ static int handle_init(FsContext *ctx)
     return ret;
 }
 
+static void handle_cleanup(FsContext *ctx)
+{
+    struct handle_data *data = ctx->private;
+
+    close(data->mountfd);
+    g_free(data);
+}
+
 static int handle_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 {
     const char *sec_model = qemu_opt_get(opts, "security_model");
@@ -671,6 +679,7 @@ static int handle_parse_opts(QemuOpts *opts, struct FsDriverEntry *fse)
 FileOperations handle_ops = {
     .parse_opts   = handle_parse_opts,
     .init         = handle_init,
+    .cleanup      = handle_cleanup,
     .lstat        = handle_lstat,
     .readlink     = handle_readlink,
     .close        = handle_close,
diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
index 3f271fcbd2..845675e7a1 100644
--- a/hw/9pfs/9p-local.c
+++ b/hw/9pfs/9p-local.c
@@ -1060,13 +1060,10 @@ static int local_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                               const char *name, V9fsPath *target)
 {
     if (dir_path) {
-        v9fs_string_sprintf((V9fsString *)target, "%s/%s",
-                            dir_path->data, name);
+        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
     } else {
-        v9fs_string_sprintf((V9fsString *)target, "%s", name);
+        v9fs_path_sprintf(target, "%s", name);
     }
-    /* Bump the size for including terminating NULL */
-    target->size++;
     return 0;
 }
 
diff --git a/hw/9pfs/9p-proxy.c b/hw/9pfs/9p-proxy.c
index f265501eac..f4aa7a9d70 100644
--- a/hw/9pfs/9p-proxy.c
+++ b/hw/9pfs/9p-proxy.c
@@ -294,8 +294,7 @@ static int v9fs_receive_status(V9fsProxy *proxy,
  * This request read by proxy helper process
  * returns 0 on success and -errno on error
  */
-static int v9fs_request(V9fsProxy *proxy, int type,
-                        void *response, const char *fmt, ...)
+static int v9fs_request(V9fsProxy *proxy, int type, void *response, ...)
 {
     dev_t rdev;
     va_list ap;
@@ -317,7 +316,7 @@ static int v9fs_request(V9fsProxy *proxy, int type,
     }
     iovec = &proxy->out_iovec;
     reply = &proxy->in_iovec;
-    va_start(ap, fmt);
+    va_start(ap, response);
     switch (type) {
     case T_OPEN:
         path = va_arg(ap, V9fsString *);
@@ -605,7 +604,7 @@ static int v9fs_request(V9fsProxy *proxy, int type,
 static int proxy_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat *stbuf)
 {
     int retval;
-    retval = v9fs_request(fs_ctx->private, T_LSTAT, stbuf, "s", fs_path);
+    retval = v9fs_request(fs_ctx->private, T_LSTAT, stbuf, fs_path);
     if (retval < 0) {
         errno = -retval;
         return -1;
@@ -617,8 +616,7 @@ static ssize_t proxy_readlink(FsContext *fs_ctx, V9fsPath *fs_path,
                               char *buf, size_t bufsz)
 {
     int retval;
-    retval = v9fs_request(fs_ctx->private, T_READLINK, buf, "sd",
-                          fs_path, bufsz);
+    retval = v9fs_request(fs_ctx->private, T_READLINK, buf, fs_path, bufsz);
     if (retval < 0) {
         errno = -retval;
         return -1;
@@ -639,7 +637,7 @@ static int proxy_closedir(FsContext *ctx, V9fsFidOpenState *fs)
 static int proxy_open(FsContext *ctx, V9fsPath *fs_path,
                       int flags, V9fsFidOpenState *fs)
 {
-    fs->fd = v9fs_request(ctx->private, T_OPEN, NULL, "sd", fs_path, flags);
+    fs->fd = v9fs_request(ctx->private, T_OPEN, NULL, fs_path, flags);
     if (fs->fd < 0) {
         errno = -fs->fd;
         fs->fd = -1;
@@ -653,7 +651,7 @@ static int proxy_opendir(FsContext *ctx,
     int serrno, fd;
 
     fs->dir.stream = NULL;
-    fd = v9fs_request(ctx->private, T_OPEN, NULL, "sd", fs_path, O_DIRECTORY);
+    fd = v9fs_request(ctx->private, T_OPEN, NULL, fs_path, O_DIRECTORY);
     if (fd < 0) {
         errno = -fd;
         return -1;
@@ -735,8 +733,8 @@ static ssize_t proxy_pwritev(FsContext *ctx, V9fsFidOpenState *fs,
 static int proxy_chmod(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
     int retval;
-    retval = v9fs_request(fs_ctx->private, T_CHMOD, NULL, "sd",
-                          fs_path, credp->fc_mode);
+    retval = v9fs_request(fs_ctx->private, T_CHMOD, NULL, fs_path,
+                          credp->fc_mode);
     if (retval < 0) {
         errno = -retval;
     }
@@ -752,8 +750,8 @@ static int proxy_mknod(FsContext *fs_ctx, V9fsPath *dir_path,
     v9fs_string_init(&fullname);
     v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
 
-    retval = v9fs_request(fs_ctx->private, T_MKNOD, NULL, "sdqdd",
-                          &fullname, credp->fc_mode, credp->fc_rdev,
+    retval = v9fs_request(fs_ctx->private, T_MKNOD, NULL, &fullname,
+                          credp->fc_mode, credp->fc_rdev,
                           credp->fc_uid, credp->fc_gid);
     v9fs_string_free(&fullname);
     if (retval < 0) {
@@ -772,14 +770,13 @@ static int proxy_mkdir(FsContext *fs_ctx, V9fsPath *dir_path,
     v9fs_string_init(&fullname);
     v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
 
-    retval = v9fs_request(fs_ctx->private, T_MKDIR, NULL, "sddd", &fullname,
+    retval = v9fs_request(fs_ctx->private, T_MKDIR, NULL, &fullname,
                           credp->fc_mode, credp->fc_uid, credp->fc_gid);
     v9fs_string_free(&fullname);
     if (retval < 0) {
         errno = -retval;
         retval = -1;
     }
-    v9fs_string_free(&fullname);
     return retval;
 }
 
@@ -804,9 +801,8 @@ static int proxy_open2(FsContext *fs_ctx, V9fsPath *dir_path, const char *name,
     v9fs_string_init(&fullname);
     v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
 
-    fs->fd = v9fs_request(fs_ctx->private, T_CREATE, NULL, "sdddd",
-                          &fullname, flags, credp->fc_mode,
-                          credp->fc_uid, credp->fc_gid);
+    fs->fd = v9fs_request(fs_ctx->private, T_CREATE, NULL, &fullname, flags,
+                          credp->fc_mode, credp->fc_uid, credp->fc_gid);
     v9fs_string_free(&fullname);
     if (fs->fd < 0) {
         errno = -fs->fd;
@@ -827,8 +823,8 @@ static int proxy_symlink(FsContext *fs_ctx, const char *oldpath,
     v9fs_string_sprintf(&fullname, "%s/%s", dir_path->data, name);
     v9fs_string_sprintf(&target, "%s", oldpath);
 
-    retval = v9fs_request(fs_ctx->private, T_SYMLINK, NULL, "ssdd",
-                          &target, &fullname, credp->fc_uid, credp->fc_gid);
+    retval = v9fs_request(fs_ctx->private, T_SYMLINK, NULL, &target, &fullname,
+                          credp->fc_uid, credp->fc_gid);
     v9fs_string_free(&fullname);
     v9fs_string_free(&target);
     if (retval < 0) {
@@ -847,7 +843,7 @@ static int proxy_link(FsContext *ctx, V9fsPath *oldpath,
     v9fs_string_init(&newpath);
     v9fs_string_sprintf(&newpath, "%s/%s", dirpath->data, name);
 
-    retval = v9fs_request(ctx->private, T_LINK, NULL, "ss", oldpath, &newpath);
+    retval = v9fs_request(ctx->private, T_LINK, NULL, oldpath, &newpath);
     v9fs_string_free(&newpath);
     if (retval < 0) {
         errno = -retval;
@@ -860,7 +856,7 @@ static int proxy_truncate(FsContext *ctx, V9fsPath *fs_path, off_t size)
 {
     int retval;
 
-    retval = v9fs_request(ctx->private, T_TRUNCATE, NULL, "sq", fs_path, size);
+    retval = v9fs_request(ctx->private, T_TRUNCATE, NULL, fs_path, size);
     if (retval < 0) {
         errno = -retval;
         return -1;
@@ -879,8 +875,7 @@ static int proxy_rename(FsContext *ctx, const char *oldpath,
 
     v9fs_string_sprintf(&oldname, "%s", oldpath);
     v9fs_string_sprintf(&newname, "%s", newpath);
-    retval = v9fs_request(ctx->private, T_RENAME, NULL, "ss",
-                          &oldname, &newname);
+    retval = v9fs_request(ctx->private, T_RENAME, NULL, &oldname, &newname);
     v9fs_string_free(&oldname);
     v9fs_string_free(&newname);
     if (retval < 0) {
@@ -892,8 +887,8 @@ static int proxy_rename(FsContext *ctx, const char *oldpath,
 static int proxy_chown(FsContext *fs_ctx, V9fsPath *fs_path, FsCred *credp)
 {
     int retval;
-    retval = v9fs_request(fs_ctx->private, T_CHOWN, NULL, "sdd",
-                          fs_path, credp->fc_uid, credp->fc_gid);
+    retval = v9fs_request(fs_ctx->private, T_CHOWN, NULL, fs_path,
+                          credp->fc_uid, credp->fc_gid);
     if (retval < 0) {
         errno = -retval;
     }
@@ -904,8 +899,7 @@ static int proxy_utimensat(FsContext *s, V9fsPath *fs_path,
                            const struct timespec *buf)
 {
     int retval;
-    retval = v9fs_request(s->private, T_UTIME, NULL, "sqqqq",
-                          fs_path,
+    retval = v9fs_request(s->private, T_UTIME, NULL, fs_path,
                           buf[0].tv_sec, buf[0].tv_nsec,
                           buf[1].tv_sec, buf[1].tv_nsec);
     if (retval < 0) {
@@ -920,7 +914,7 @@ static int proxy_remove(FsContext *ctx, const char *path)
     V9fsString name;
     v9fs_string_init(&name);
     v9fs_string_sprintf(&name, "%s", path);
-    retval = v9fs_request(ctx->private, T_REMOVE, NULL, "s", &name);
+    retval = v9fs_request(ctx->private, T_REMOVE, NULL, &name);
     v9fs_string_free(&name);
     if (retval < 0) {
         errno = -retval;
@@ -949,7 +943,7 @@ static int proxy_fsync(FsContext *ctx, int fid_type,
 static int proxy_statfs(FsContext *s, V9fsPath *fs_path, struct statfs *stbuf)
 {
     int retval;
-    retval = v9fs_request(s->private, T_STATFS, stbuf, "s", fs_path);
+    retval = v9fs_request(s->private, T_STATFS, stbuf, fs_path);
     if (retval < 0) {
         errno = -retval;
         return -1;
@@ -965,8 +959,8 @@ static ssize_t proxy_lgetxattr(FsContext *ctx, V9fsPath *fs_path,
 
     v9fs_string_init(&xname);
     v9fs_string_sprintf(&xname, "%s", name);
-    retval = v9fs_request(ctx->private, T_LGETXATTR, value, "dss", size,
-                          fs_path, &xname);
+    retval = v9fs_request(ctx->private, T_LGETXATTR, value, size, fs_path,
+                          &xname);
     v9fs_string_free(&xname);
     if (retval < 0) {
         errno = -retval;
@@ -978,8 +972,7 @@ static ssize_t proxy_llistxattr(FsContext *ctx, V9fsPath *fs_path,
                                 void *value, size_t size)
 {
     int retval;
-    retval = v9fs_request(ctx->private, T_LLISTXATTR, value, "ds", size,
-                        fs_path);
+    retval = v9fs_request(ctx->private, T_LLISTXATTR, value, size, fs_path);
     if (retval < 0) {
         errno = -retval;
     }
@@ -1000,8 +993,8 @@ static int proxy_lsetxattr(FsContext *ctx, V9fsPath *fs_path, const char *name,
     xvalue.data = g_malloc(size);
     memcpy(xvalue.data, value, size);
 
-    retval = v9fs_request(ctx->private, T_LSETXATTR, value, "sssdd",
-                          fs_path, &xname, &xvalue, size, flags);
+    retval = v9fs_request(ctx->private, T_LSETXATTR, value, fs_path, &xname,
+                          &xvalue, size, flags);
     v9fs_string_free(&xname);
     v9fs_string_free(&xvalue);
     if (retval < 0) {
@@ -1018,8 +1011,7 @@ static int proxy_lremovexattr(FsContext *ctx, V9fsPath *fs_path,
 
     v9fs_string_init(&xname);
     v9fs_string_sprintf(&xname, "%s", name);
-    retval = v9fs_request(ctx->private, T_LREMOVEXATTR, NULL, "ss",
-                          fs_path, &xname);
+    retval = v9fs_request(ctx->private, T_LREMOVEXATTR, NULL, fs_path, &xname);
     v9fs_string_free(&xname);
     if (retval < 0) {
         errno = -retval;
@@ -1031,13 +1023,10 @@ static int proxy_name_to_path(FsContext *ctx, V9fsPath *dir_path,
                               const char *name, V9fsPath *target)
 {
     if (dir_path) {
-        v9fs_string_sprintf((V9fsString *)target, "%s/%s",
-                            dir_path->data, name);
+        v9fs_path_sprintf(target, "%s/%s", dir_path->data, name);
     } else {
-        v9fs_string_sprintf((V9fsString *)target, "%s", name);
+        v9fs_path_sprintf(target, "%s", name);
     }
-    /* Bump the size for including terminating NULL */
-    target->size++;
     return 0;
 }
 
@@ -1086,7 +1075,7 @@ static int proxy_ioc_getversion(FsContext *fs_ctx, V9fsPath *path,
         errno = ENOTTY;
         return -1;
     }
-    err = v9fs_request(fs_ctx->private, T_GETVERSION, st_gen, "s", path);
+    err = v9fs_request(fs_ctx->private, T_GETVERSION, st_gen, path);
     if (err < 0) {
         errno = -err;
         err = -1;
@@ -1179,9 +1168,22 @@ static int proxy_init(FsContext *ctx)
     return 0;
 }
 
+static void proxy_cleanup(FsContext *ctx)
+{
+    V9fsProxy *proxy = ctx->private;
+
+    g_free(proxy->out_iovec.iov_base);
+    g_free(proxy->in_iovec.iov_base);
+    if (ctx->export_flags & V9FS_PROXY_SOCK_NAME) {
+        close(proxy->sockfd);
+    }
+    g_free(proxy);
+}
+
 FileOperations proxy_ops = {
     .parse_opts   = proxy_parse_opts,
     .init         = proxy_init,
+    .cleanup      = proxy_cleanup,
     .lstat        = proxy_lstat,
     .readlink     = proxy_readlink,
     .close        = proxy_close,
diff --git a/hw/9pfs/9p-synth.h b/hw/9pfs/9p-synth.h
index 6bcb44ace2..49c2fc7b27 100644
--- a/hw/9pfs/9p-synth.h
+++ b/hw/9pfs/9p-synth.h
@@ -43,10 +43,10 @@ typedef struct V9fsSynthOpenState {
     struct dirent dent;
 } V9fsSynthOpenState;
 
-extern int qemu_v9fs_synth_mkdir(V9fsSynthNode *parent, int mode,
-                                 const char *name, V9fsSynthNode **result);
-extern int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode,
-                                    const char *name, v9fs_synth_read read,
-                                    v9fs_synth_write write, void *arg);
+int qemu_v9fs_synth_mkdir(V9fsSynthNode *parent, int mode,
+                          const char *name, V9fsSynthNode **result);
+int qemu_v9fs_synth_add_file(V9fsSynthNode *parent, int mode,
+                             const char *name, v9fs_synth_read read,
+                             v9fs_synth_write write, void *arg);
 
 #endif
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index dfe293d11d..faebd91f5f 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include <glib/gprintf.h>
 #include "hw/virtio/virtio.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
@@ -179,6 +180,20 @@ void v9fs_path_free(V9fsPath *path)
     path->size = 0;
 }
 
+
+void GCC_FMT_ATTR(2, 3)
+v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
+{
+    va_list ap;
+
+    v9fs_path_free(path);
+
+    va_start(ap, fmt);
+    /* Bump the size for including terminating NULL */
+    path->size = g_vasprintf(&path->data, fmt, ap) + 1;
+    va_end(ap);
+}
+
 void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
 {
     v9fs_path_free(lhs);
@@ -221,7 +236,7 @@ static size_t v9fs_string_size(V9fsString *str)
 
 /*
  * returns 0 if fid got re-opened, 1 if not, < 0 on error */
-static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
+static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
 {
     int err = 1;
     if (f->fid_type == P9_FID_FILE) {
@@ -240,7 +255,7 @@ static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
     return err;
 }
 
-static V9fsFidState *get_fid(V9fsPDU *pdu, int32_t fid)
+static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
 {
     int err;
     V9fsFidState *f;
@@ -306,11 +321,11 @@ static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
     return f;
 }
 
-static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
+static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     int retval = 0;
 
-    if (fidp->fs.xattr.copied_len == -1) {
+    if (fidp->fs.xattr.xattrwalk_fid) {
         /* getxattr/listxattr fid */
         goto free_value;
     }
@@ -338,7 +353,7 @@ static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
     return retval;
 }
 
-static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
+static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     int retval = 0;
 
@@ -359,7 +374,7 @@ static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
     return retval;
 }
 
-static int put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
+static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     BUG_ON(!fidp->ref);
     fidp->ref--;
@@ -403,7 +418,7 @@ static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
     return fidp;
 }
 
-void v9fs_reclaim_fd(V9fsPDU *pdu)
+void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
 {
     int reclaim_count = 0;
     V9fsState *s = pdu->s;
@@ -484,7 +499,7 @@ void v9fs_reclaim_fd(V9fsPDU *pdu)
     }
 }
 
-static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
+static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -517,10 +532,10 @@ static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
     return 0;
 }
 
-static void virtfs_reset(V9fsPDU *pdu)
+static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
 {
     V9fsState *s = pdu->s;
-    V9fsFidState *fidp = NULL;
+    V9fsFidState *fidp;
 
     /* Free all fids */
     while (s->fid_list) {
@@ -533,11 +548,6 @@ static void virtfs_reset(V9fsPDU *pdu)
             free_fid(pdu, fidp);
         }
     }
-    if (fidp) {
-        /* One or more unclunked fids found... */
-        error_report("9pfs:%s: One or more uncluncked fids "
-                     "found during reset", __func__);
-    }
 }
 
 #define P9_QID_TYPE_DIR         0x80
@@ -583,7 +593,8 @@ static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
     }
 }
 
-static int fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, V9fsQID *qidp)
+static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
+                                   V9fsQID *qidp)
 {
     struct stat stbuf;
     int err;
@@ -610,17 +621,11 @@ V9fsPDU *pdu_alloc(V9fsState *s)
 
 void pdu_free(V9fsPDU *pdu)
 {
-    if (pdu) {
-        V9fsState *s = pdu->s;
-        /*
-         * Cancelled pdu are added back to the freelist
-         * by flush request .
-         */
-        if (!pdu->cancelled) {
-            QLIST_REMOVE(pdu, next);
-            QLIST_INSERT_HEAD(&s->free_list, pdu, next);
-        }
-    }
+    V9fsState *s = pdu->s;
+
+    g_assert(!pdu->cancelled);
+    QLIST_REMOVE(pdu, next);
+    QLIST_INSERT_HEAD(&s->free_list, pdu, next);
 }
 
 /*
@@ -628,7 +633,7 @@ void pdu_free(V9fsPDU *pdu)
  * because we always expect to have enough space to encode
  * error details
  */
-static void pdu_complete(V9fsPDU *pdu, ssize_t len)
+static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
 {
     int8_t id = pdu->id + 1; /* Response */
     V9fsState *s = pdu->s;
@@ -665,9 +670,9 @@ static void pdu_complete(V9fsPDU *pdu, ssize_t len)
     pdu_push_and_notify(pdu);
 
     /* Now wakeup anybody waiting in flush for this request */
-    qemu_co_queue_next(&pdu->complete);
-
-    pdu_free(pdu);
+    if (!qemu_co_queue_next(&pdu->complete)) {
+        pdu_free(pdu);
+    }
 }
 
 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
@@ -795,9 +800,9 @@ static uint32_t stat_to_v9mode(const struct stat *stbuf)
     return mode;
 }
 
-static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
-                            const struct stat *stbuf,
-                            V9fsStat *v9stat)
+static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
+                                       const struct stat *stbuf,
+                                       V9fsStat *v9stat)
 {
     int err;
     const char *str;
@@ -810,15 +815,15 @@ static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
     v9stat->mtime = stbuf->st_mtime;
     v9stat->length = stbuf->st_size;
 
-    v9fs_string_null(&v9stat->uid);
-    v9fs_string_null(&v9stat->gid);
-    v9fs_string_null(&v9stat->muid);
+    v9fs_string_free(&v9stat->uid);
+    v9fs_string_free(&v9stat->gid);
+    v9fs_string_free(&v9stat->muid);
 
     v9stat->n_uid = stbuf->st_uid;
     v9stat->n_gid = stbuf->st_gid;
     v9stat->n_muid = 0;
 
-    v9fs_string_null(&v9stat->extension);
+    v9fs_string_free(&v9stat->extension);
 
     if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
         err = v9fs_co_readlink(pdu, name, &v9stat->extension);
@@ -917,10 +922,8 @@ static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
     V9fsPath str;
     v9fs_path_init(&str);
     v9fs_path_copy(&str, dst);
-    v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
+    v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
     v9fs_path_free(&str);
-    /* +1 to include terminating NULL */
-    dst->size++;
 }
 
 static inline bool is_ro_export(FsContext *ctx)
@@ -928,7 +931,7 @@ static inline bool is_ro_export(FsContext *ctx)
     return ctx->export_flags & V9FS_RDONLY;
 }
 
-static void v9fs_version(void *opaque)
+static void coroutine_fn v9fs_version(void *opaque)
 {
     ssize_t err;
     V9fsPDU *pdu = opaque;
@@ -966,7 +969,7 @@ static void v9fs_version(void *opaque)
     v9fs_string_free(&version);
 }
 
-static void v9fs_attach(void *opaque)
+static void coroutine_fn v9fs_attach(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     V9fsState *s = pdu->s;
@@ -1032,7 +1035,7 @@ static void v9fs_attach(void *opaque)
     v9fs_string_free(&aname);
 }
 
-static void v9fs_stat(void *opaque)
+static void coroutine_fn v9fs_stat(void *opaque)
 {
     int32_t fid;
     V9fsStat v9stat;
@@ -1076,7 +1079,7 @@ static void v9fs_stat(void *opaque)
     pdu_complete(pdu, err);
 }
 
-static void v9fs_getattr(void *opaque)
+static void coroutine_fn v9fs_getattr(void *opaque)
 {
     int32_t fid;
     size_t offset = 7;
@@ -1152,7 +1155,7 @@ static void v9fs_getattr(void *opaque)
 
 #define P9_ATTR_MASK    127
 
-static void v9fs_setattr(void *opaque)
+static void coroutine_fn v9fs_setattr(void *opaque)
 {
     int err = 0;
     int32_t fid;
@@ -1270,7 +1273,7 @@ static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
         qid1->path != qid2->path;
 }
 
-static void v9fs_walk(void *opaque)
+static void coroutine_fn v9fs_walk(void *opaque)
 {
     int name_idx;
     V9fsQID *qids = NULL;
@@ -1320,13 +1323,14 @@ static void v9fs_walk(void *opaque)
         goto out_nofid;
     }
 
+    v9fs_path_init(&dpath);
+    v9fs_path_init(&path);
+
     err = fid_to_qid(pdu, fidp, &qid);
     if (err < 0) {
         goto out;
     }
 
-    v9fs_path_init(&dpath);
-    v9fs_path_init(&path);
     /*
      * Both dpath and path initially poin to fidp.
      * Needed to handle request with nwnames == 0
@@ -1352,7 +1356,10 @@ static void v9fs_walk(void *opaque)
         memcpy(&qids[name_idx], &qid, sizeof(qid));
     }
     if (fid == newfid) {
-        BUG_ON(fidp->fid_type != P9_FID_NONE);
+        if (fidp->fid_type != P9_FID_NONE) {
+            err = -EINVAL;
+            goto out;
+        }
         v9fs_path_copy(&fidp->path, &path);
     } else {
         newfidp = alloc_fid(s, newfid);
@@ -1383,7 +1390,7 @@ static void v9fs_walk(void *opaque)
     }
 }
 
-static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
+static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
 {
     struct statfs stbuf;
     int32_t iounit = 0;
@@ -1403,7 +1410,7 @@ static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
     return iounit;
 }
 
-static void v9fs_open(void *opaque)
+static void coroutine_fn v9fs_open(void *opaque)
 {
     int flags;
     int32_t fid;
@@ -1434,7 +1441,10 @@ static void v9fs_open(void *opaque)
         err = -ENOENT;
         goto out_nofid;
     }
-    BUG_ON(fidp->fid_type != P9_FID_NONE);
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }
 
     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
     if (err < 0) {
@@ -1493,7 +1503,7 @@ static void v9fs_open(void *opaque)
     pdu_complete(pdu, err);
 }
 
-static void v9fs_lcreate(void *opaque)
+static void coroutine_fn v9fs_lcreate(void *opaque)
 {
     int32_t dfid, flags, mode;
     gid_t gid;
@@ -1590,7 +1600,7 @@ static void v9fs_fsync(void *opaque)
     pdu_complete(pdu, err);
 }
 
-static void v9fs_clunk(void *opaque)
+static void coroutine_fn v9fs_clunk(void *opaque)
 {
     int err;
     int32_t fid;
@@ -1628,20 +1638,17 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
 {
     ssize_t err;
     size_t offset = 7;
-    int read_count;
-    int64_t xattr_len;
+    uint64_t read_count;
     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
     VirtQueueElement *elem = v->elems[pdu->idx];
 
-    xattr_len = fidp->fs.xattr.len;
-    read_count = xattr_len - off;
+    if (fidp->fs.xattr.len < off) {
+        read_count = 0;
+    } else {
+        read_count = fidp->fs.xattr.len - off;
+    }
     if (read_count > max_count) {
         read_count = max_count;
-    } else if (read_count < 0) {
-        /*
-         * read beyond XATTR value
-         */
-        read_count = 0;
     }
     err = pdu_marshal(pdu, offset, "d", read_count);
     if (err < 0) {
@@ -1659,8 +1666,9 @@ static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
     return offset;
 }
 
-static int v9fs_do_readdir_with_stat(V9fsPDU *pdu,
-                                     V9fsFidState *fidp, uint32_t max_count)
+static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
+                                                  V9fsFidState *fidp,
+                                                  uint32_t max_count)
 {
     V9fsPath path;
     V9fsStat v9stat;
@@ -1750,7 +1758,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
     qemu_iovec_concat(qiov, &elem, skip, size);
 }
 
-static void v9fs_read(void *opaque)
+static void coroutine_fn v9fs_read(void *opaque)
 {
     int32_t fid;
     uint64_t off;
@@ -1812,14 +1820,15 @@ static void v9fs_read(void *opaque)
             if (len < 0) {
                 /* IO error return the error */
                 err = len;
-                goto out;
+                goto out_free_iovec;
             }
         } while (count < max_count && len > 0);
         err = pdu_marshal(pdu, offset, "d", count);
         if (err < 0) {
-            goto out;
+            goto out_free_iovec;
         }
         err += offset + count;
+out_free_iovec:
         qemu_iovec_destroy(&qiov);
         qemu_iovec_destroy(&qiov_full);
     } else if (fidp->fid_type == P9_FID_XATTR) {
@@ -1843,8 +1852,8 @@ static size_t v9fs_readdir_data_size(V9fsString *name)
     return 24 + v9fs_string_size(name);
 }
 
-static int v9fs_do_readdir(V9fsPDU *pdu,
-                           V9fsFidState *fidp, int32_t max_count)
+static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
+                                        int32_t max_count)
 {
     size_t size;
     V9fsQID qid;
@@ -1913,7 +1922,7 @@ static int v9fs_do_readdir(V9fsPDU *pdu,
     return count;
 }
 
-static void v9fs_readdir(void *opaque)
+static void coroutine_fn v9fs_readdir(void *opaque)
 {
     int32_t fid;
     V9fsFidState *fidp;
@@ -1968,23 +1977,18 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
 {
     int i, to_copy;
     ssize_t err = 0;
-    int write_count;
-    int64_t xattr_len;
+    uint64_t write_count;
     size_t offset = 7;
 
 
-    xattr_len = fidp->fs.xattr.len;
-    write_count = xattr_len - off;
-    if (write_count > count) {
-        write_count = count;
-    } else if (write_count < 0) {
-        /*
-         * write beyond XATTR value len specified in
-         * xattrcreate
-         */
+    if (fidp->fs.xattr.len < off) {
         err = -ENOSPC;
         goto out;
     }
+    write_count = fidp->fs.xattr.len - off;
+    if (write_count > count) {
+        write_count = count;
+    }
     err = pdu_marshal(pdu, offset, "d", write_count);
     if (err < 0) {
         return err;
@@ -2009,7 +2013,7 @@ static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
     return err;
 }
 
-static void v9fs_write(void *opaque)
+static void coroutine_fn v9fs_write(void *opaque)
 {
     ssize_t err;
     int32_t fid;
@@ -2079,7 +2083,7 @@ static void v9fs_write(void *opaque)
     offset = 7;
     err = pdu_marshal(pdu, offset, "d", total);
     if (err < 0) {
-        goto out;
+        goto out_qiov;
     }
     err += offset;
     trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
@@ -2092,7 +2096,7 @@ static void v9fs_write(void *opaque)
     pdu_complete(pdu, err);
 }
 
-static void v9fs_create(void *opaque)
+static void coroutine_fn v9fs_create(void *opaque)
 {
     int32_t fid;
     int err = 0;
@@ -2272,7 +2276,7 @@ static void v9fs_create(void *opaque)
    v9fs_path_free(&path);
 }
 
-static void v9fs_symlink(void *opaque)
+static void coroutine_fn v9fs_symlink(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     V9fsString name;
@@ -2361,7 +2365,7 @@ static void v9fs_flush(void *opaque)
     pdu_complete(pdu, 7);
 }
 
-static void v9fs_link(void *opaque)
+static void coroutine_fn v9fs_link(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     int32_t dfid, oldfid;
@@ -2402,6 +2406,7 @@ static void v9fs_link(void *opaque)
     if (!err) {
         err = offset;
     }
+    put_fid(pdu, oldfidp);
 out:
     put_fid(pdu, dfidp);
 out_nofid:
@@ -2410,7 +2415,7 @@ static void v9fs_link(void *opaque)
 }
 
 /* Only works with path name based fid */
-static void v9fs_remove(void *opaque)
+static void coroutine_fn v9fs_remove(void *opaque)
 {
     int32_t fid;
     int err = 0;
@@ -2454,7 +2459,7 @@ static void v9fs_remove(void *opaque)
     pdu_complete(pdu, err);
 }
 
-static void v9fs_unlinkat(void *opaque)
+static void coroutine_fn v9fs_unlinkat(void *opaque)
 {
     int err = 0;
     V9fsString name;
@@ -2517,8 +2522,9 @@ static void v9fs_unlinkat(void *opaque)
 
 
 /* Only works with path name based fid */
-static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
-                                int32_t newdirfid, V9fsString *name)
+static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
+                                             int32_t newdirfid,
+                                             V9fsString *name)
 {
     char *end;
     int err = 0;
@@ -2535,7 +2541,10 @@ static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
             err = -ENOENT;
             goto out_nofid;
         }
-        BUG_ON(dirfidp->fid_type != P9_FID_NONE);
+        if (fidp->fid_type != P9_FID_NONE) {
+            err = -EINVAL;
+            goto out;
+        }
         v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
     } else {
         old_name = fidp->path.data;
@@ -2575,7 +2584,7 @@ static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
 }
 
 /* Only works with path name based fid */
-static void v9fs_rename(void *opaque)
+static void coroutine_fn v9fs_rename(void *opaque)
 {
     int32_t fid;
     ssize_t err = 0;
@@ -2607,7 +2616,10 @@ static void v9fs_rename(void *opaque)
         err = -ENOENT;
         goto out_nofid;
     }
-    BUG_ON(fidp->fid_type != P9_FID_NONE);
+    if (fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out;
+    }
     /* if fs driver is not path based, return EOPNOTSUPP */
     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
         err = -EOPNOTSUPP;
@@ -2626,9 +2638,10 @@ static void v9fs_rename(void *opaque)
     v9fs_string_free(&name);
 }
 
-static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
-                               V9fsString *old_name, V9fsPath *newdir,
-                               V9fsString *new_name)
+static void coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
+                                            V9fsString *old_name,
+                                            V9fsPath *newdir,
+                                            V9fsString *new_name)
 {
     V9fsFidState *tfidp;
     V9fsPath oldpath, newpath;
@@ -2654,9 +2667,10 @@ static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
     v9fs_path_free(&newpath);
 }
 
-static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
-                                  V9fsString *old_name, int32_t newdirfid,
-                                  V9fsString *new_name)
+static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
+                                               V9fsString *old_name,
+                                               int32_t newdirfid,
+                                               V9fsString *new_name)
 {
     int err = 0;
     V9fsState *s = pdu->s;
@@ -2697,7 +2711,7 @@ static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
     return err;
 }
 
-static void v9fs_renameat(void *opaque)
+static void coroutine_fn v9fs_renameat(void *opaque)
 {
     ssize_t err = 0;
     size_t offset = 7;
@@ -2739,7 +2753,7 @@ static void v9fs_renameat(void *opaque)
     v9fs_string_free(&new_name);
 }
 
-static void v9fs_wstat(void *opaque)
+static void coroutine_fn v9fs_wstat(void *opaque)
 {
     int32_t fid;
     int err = 0;
@@ -2878,7 +2892,7 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
                        fsid_val, f_namelen);
 }
 
-static void v9fs_statfs(void *opaque)
+static void coroutine_fn v9fs_statfs(void *opaque)
 {
     int32_t fid;
     ssize_t retval = 0;
@@ -2912,7 +2926,7 @@ static void v9fs_statfs(void *opaque)
     pdu_complete(pdu, retval);
 }
 
-static void v9fs_mknod(void *opaque)
+static void coroutine_fn v9fs_mknod(void *opaque)
 {
 
     int mode;
@@ -2978,7 +2992,7 @@ static void v9fs_mknod(void *opaque)
  * do any thing in * qemu 9p server side lock code path.
  * So when a TLOCK request comes, always return success
  */
-static void v9fs_lock(void *opaque)
+static void coroutine_fn v9fs_lock(void *opaque)
 {
     int8_t status;
     V9fsFlock flock;
@@ -3031,7 +3045,7 @@ static void v9fs_lock(void *opaque)
  * When a TGETLOCK request comes, always return success because all lock
  * handling is done by client's VFS layer.
  */
-static void v9fs_getlock(void *opaque)
+static void coroutine_fn v9fs_getlock(void *opaque)
 {
     size_t offset = 7;
     struct stat stbuf;
@@ -3076,7 +3090,7 @@ static void v9fs_getlock(void *opaque)
     v9fs_string_free(&glock.client_id);
 }
 
-static void v9fs_mkdir(void *opaque)
+static void coroutine_fn v9fs_mkdir(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     size_t offset = 7;
@@ -3130,7 +3144,7 @@ static void v9fs_mkdir(void *opaque)
     v9fs_string_free(&name);
 }
 
-static void v9fs_xattrwalk(void *opaque)
+static void coroutine_fn v9fs_xattrwalk(void *opaque)
 {
     int64_t size;
     V9fsString name;
@@ -3160,7 +3174,7 @@ static void v9fs_xattrwalk(void *opaque)
         goto out;
     }
     v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
-    if (name.data == NULL) {
+    if (!v9fs_string_size(&name)) {
         /*
          * listxattr request. Get the size first
          */
@@ -3175,7 +3189,7 @@ static void v9fs_xattrwalk(void *opaque)
          */
         xattr_fidp->fs.xattr.len = size;
         xattr_fidp->fid_type = P9_FID_XATTR;
-        xattr_fidp->fs.xattr.copied_len = -1;
+        xattr_fidp->fs.xattr.xattrwalk_fid = true;
         if (size) {
             xattr_fidp->fs.xattr.value = g_malloc(size);
             err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
@@ -3208,7 +3222,7 @@ static void v9fs_xattrwalk(void *opaque)
          */
         xattr_fidp->fs.xattr.len = size;
         xattr_fidp->fid_type = P9_FID_XATTR;
-        xattr_fidp->fs.xattr.copied_len = -1;
+        xattr_fidp->fs.xattr.xattrwalk_fid = true;
         if (size) {
             xattr_fidp->fs.xattr.value = g_malloc(size);
             err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
@@ -3236,11 +3250,11 @@ static void v9fs_xattrwalk(void *opaque)
     v9fs_string_free(&name);
 }
 
-static void v9fs_xattrcreate(void *opaque)
+static void coroutine_fn v9fs_xattrcreate(void *opaque)
 {
     int flags;
     int32_t fid;
-    int64_t size;
+    uint64_t size;
     ssize_t err = 0;
     V9fsString name;
     size_t offset = 7;
@@ -3255,28 +3269,40 @@ static void v9fs_xattrcreate(void *opaque)
     }
     trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
 
+    if (size > XATTR_SIZE_MAX) {
+        err = -E2BIG;
+        goto out_nofid;
+    }
+
     file_fidp = get_fid(pdu, fid);
     if (file_fidp == NULL) {
         err = -EINVAL;
         goto out_nofid;
     }
+    if (file_fidp->fid_type != P9_FID_NONE) {
+        err = -EINVAL;
+        goto out_put_fid;
+    }
+
     /* Make the file fid point to xattr */
     xattr_fidp = file_fidp;
     xattr_fidp->fid_type = P9_FID_XATTR;
     xattr_fidp->fs.xattr.copied_len = 0;
+    xattr_fidp->fs.xattr.xattrwalk_fid = false;
     xattr_fidp->fs.xattr.len = size;
     xattr_fidp->fs.xattr.flags = flags;
     v9fs_string_init(&xattr_fidp->fs.xattr.name);
     v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
-    xattr_fidp->fs.xattr.value = g_malloc(size);
+    xattr_fidp->fs.xattr.value = g_malloc0(size);
     err = offset;
+out_put_fid:
     put_fid(pdu, file_fidp);
 out_nofid:
     pdu_complete(pdu, err);
     v9fs_string_free(&name);
 }
 
-static void v9fs_readlink(void *opaque)
+static void coroutine_fn v9fs_readlink(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     size_t offset = 7;
@@ -3352,13 +3378,13 @@ static CoroutineEntry *pdu_co_handlers[] = {
     [P9_TREMOVE] = v9fs_remove,
 };
 
-static void v9fs_op_not_supp(void *opaque)
+static void coroutine_fn v9fs_op_not_supp(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     pdu_complete(pdu, -EOPNOTSUPP);
 }
 
-static void v9fs_fs_ro(void *opaque)
+static void coroutine_fn v9fs_fs_ro(void *opaque)
 {
     V9fsPDU *pdu = opaque;
     pdu_complete(pdu, -EROFS);
@@ -3495,8 +3521,11 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
     rc = 0;
 out:
     if (rc) {
-        g_free(s->ctx.fs_root);
+        if (s->ops->cleanup && s->ctx.private) {
+            s->ops->cleanup(&s->ctx);
+        }
         g_free(s->tag);
+        g_free(s->ctx.fs_root);
         v9fs_path_free(&path);
     }
     return rc;
@@ -3504,8 +3533,41 @@ int v9fs_device_realize_common(V9fsState *s, Error **errp)
 
 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
 {
-    g_free(s->ctx.fs_root);
+    if (s->ops->cleanup) {
+        s->ops->cleanup(&s->ctx);
+    }
     g_free(s->tag);
+    g_free(s->ctx.fs_root);
+}
+
+typedef struct VirtfsCoResetData {
+    V9fsPDU pdu;
+    bool done;
+} VirtfsCoResetData;
+
+static void coroutine_fn virtfs_co_reset(void *opaque)
+{
+    VirtfsCoResetData *data = opaque;
+
+    virtfs_reset(&data->pdu);
+    data->done = true;
+}
+
+void v9fs_reset(V9fsState *s)
+{
+    VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
+    Coroutine *co;
+
+    while (!QLIST_EMPTY(&s->active_list)) {
+        aio_poll(qemu_get_aio_context(), true);
+    }
+
+    co = qemu_coroutine_create(virtfs_co_reset, &data);
+    qemu_coroutine_enter(co);
+
+    while (!data.done) {
+        aio_poll(qemu_get_aio_context(), true);
+    }
 }
 
 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index a38603398e..3976b7fe3d 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -159,11 +159,12 @@ typedef struct V9fsConf
 
 typedef struct V9fsXattr
 {
-    int64_t copied_len;
-    int64_t len;
+    uint64_t copied_len;
+    uint64_t len;
     void *value;
     V9fsString name;
     int flags;
+    bool xattrwalk_fid;
 } V9fsXattr;
 
 typedef struct V9fsDir {
@@ -324,19 +325,21 @@ static inline uint8_t v9fs_request_cancelled(V9fsPDU *pdu)
     return pdu->cancelled;
 }
 
-extern void v9fs_reclaim_fd(V9fsPDU *pdu);
-extern void v9fs_path_init(V9fsPath *path);
-extern void v9fs_path_free(V9fsPath *path);
-extern void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs);
-extern int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
-                             const char *name, V9fsPath *path);
-extern int v9fs_device_realize_common(V9fsState *s, Error **errp);
-extern void v9fs_device_unrealize_common(V9fsState *s, Error **errp);
+void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu);
+void v9fs_path_init(V9fsPath *path);
+void v9fs_path_free(V9fsPath *path);
+void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
+void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs);
+int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
+                      const char *name, V9fsPath *path);
+int v9fs_device_realize_common(V9fsState *s, Error **errp);
+void v9fs_device_unrealize_common(V9fsState *s, Error **errp);
 
 ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...);
 ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...);
 V9fsPDU *pdu_alloc(V9fsState *s);
 void pdu_free(V9fsPDU *pdu);
 void pdu_submit(V9fsPDU *pdu);
+void v9fs_reset(V9fsState *s);
 
 #endif
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index d91f9ad6eb..7cd6fce1ad 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -17,7 +17,8 @@
 #include "qemu/coroutine.h"
 #include "coth.h"
 
-int v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp, struct dirent **dent)
+int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
+                                 struct dirent **dent)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -59,7 +60,8 @@ off_t v9fs_co_telldir(V9fsPDU *pdu, V9fsFidState *fidp)
     return err;
 }
 
-void v9fs_co_seekdir(V9fsPDU *pdu, V9fsFidState *fidp, off_t offset)
+void coroutine_fn v9fs_co_seekdir(V9fsPDU *pdu, V9fsFidState *fidp,
+                                  off_t offset)
 {
     V9fsState *s = pdu->s;
     if (v9fs_request_cancelled(pdu)) {
@@ -71,7 +73,7 @@ void v9fs_co_seekdir(V9fsPDU *pdu, V9fsFidState *fidp, off_t offset)
         });
 }
 
-void v9fs_co_rewinddir(V9fsPDU *pdu, V9fsFidState *fidp)
+void coroutine_fn v9fs_co_rewinddir(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     V9fsState *s = pdu->s;
     if (v9fs_request_cancelled(pdu)) {
@@ -83,8 +85,9 @@ void v9fs_co_rewinddir(V9fsPDU *pdu, V9fsFidState *fidp)
         });
 }
 
-int v9fs_co_mkdir(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name,
-                  mode_t mode, uid_t uid, gid_t gid, struct stat *stbuf)
+int coroutine_fn v9fs_co_mkdir(V9fsPDU *pdu, V9fsFidState *fidp,
+                               V9fsString *name, mode_t mode, uid_t uid,
+                               gid_t gid, struct stat *stbuf)
 {
     int err;
     FsCred cred;
@@ -120,7 +123,7 @@ int v9fs_co_mkdir(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name,
     return err;
 }
 
-int v9fs_co_opendir(V9fsPDU *pdu, V9fsFidState *fidp)
+int coroutine_fn v9fs_co_opendir(V9fsPDU *pdu, V9fsFidState *fidp)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -148,7 +151,7 @@ int v9fs_co_opendir(V9fsPDU *pdu, V9fsFidState *fidp)
     return err;
 }
 
-int v9fs_co_closedir(V9fsPDU *pdu, V9fsFidOpenState *fs)
+int coroutine_fn v9fs_co_closedir(V9fsPDU *pdu, V9fsFidOpenState *fs)
 {
     int err;
     V9fsState *s = pdu->s;
diff --git a/hw/9pfs/cofile.c b/hw/9pfs/cofile.c
index 10343c0a93..120e267108 100644
--- a/hw/9pfs/cofile.c
+++ b/hw/9pfs/cofile.c
@@ -17,8 +17,8 @@
 #include "qemu/coroutine.h"
 #include "coth.h"
 
-int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
-                   V9fsStatDotl *v9stat)
+int coroutine_fn v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
+                                V9fsStatDotl *v9stat)
 {
     int err = 0;
     V9fsState *s = pdu->s;
@@ -41,7 +41,7 @@ int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t st_mode,
     return err;
 }
 
-int v9fs_co_lstat(V9fsPDU *pdu, V9fsPath *path, struct stat *stbuf)
+int coroutine_fn v9fs_co_lstat(V9fsPDU *pdu, V9fsPath *path, struct stat *stbuf)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -61,7 +61,8 @@ int v9fs_co_lstat(V9fsPDU *pdu, V9fsPath *path, struct stat *stbuf)
     return err;
 }
 
-int v9fs_co_fstat(V9fsPDU *pdu, V9fsFidState *fidp, struct stat *stbuf)
+int coroutine_fn v9fs_co_fstat(V9fsPDU *pdu, V9fsFidState *fidp,
+                               struct stat *stbuf)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -93,7 +94,7 @@ int v9fs_co_fstat(V9fsPDU *pdu, V9fsFidState *fidp, struct stat *stbuf)
     return err;
 }
 
-int v9fs_co_open(V9fsPDU *pdu, V9fsFidState *fidp, int flags)
+int coroutine_fn v9fs_co_open(V9fsPDU *pdu, V9fsFidState *fidp, int flags)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -121,8 +122,9 @@ int v9fs_co_open(V9fsPDU *pdu, V9fsFidState *fidp, int flags)
     return err;
 }
 
-int v9fs_co_open2(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name, gid_t gid,
-                  int flags, int mode, struct stat *stbuf)
+int coroutine_fn v9fs_co_open2(V9fsPDU *pdu, V9fsFidState *fidp,
+                               V9fsString *name, gid_t gid, int flags, int mode,
+                               struct stat *stbuf)
 {
     int err;
     FsCred cred;
@@ -175,7 +177,7 @@ int v9fs_co_open2(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name, gid_t gid,
     return err;
 }
 
-int v9fs_co_close(V9fsPDU *pdu, V9fsFidOpenState *fs)
+int coroutine_fn v9fs_co_close(V9fsPDU *pdu, V9fsFidOpenState *fs)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -196,7 +198,7 @@ int v9fs_co_close(V9fsPDU *pdu, V9fsFidOpenState *fs)
     return err;
 }
 
-int v9fs_co_fsync(V9fsPDU *pdu, V9fsFidState *fidp, int datasync)
+int coroutine_fn v9fs_co_fsync(V9fsPDU *pdu, V9fsFidState *fidp, int datasync)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -214,8 +216,8 @@ int v9fs_co_fsync(V9fsPDU *pdu, V9fsFidState *fidp, int datasync)
     return err;
 }
 
-int v9fs_co_link(V9fsPDU *pdu, V9fsFidState *oldfid,
-                 V9fsFidState *newdirfid, V9fsString *name)
+int coroutine_fn v9fs_co_link(V9fsPDU *pdu, V9fsFidState *oldfid,
+                              V9fsFidState *newdirfid, V9fsString *name)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -236,8 +238,8 @@ int v9fs_co_link(V9fsPDU *pdu, V9fsFidState *oldfid,
     return err;
 }
 
-int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
-                    struct iovec *iov, int iovcnt, int64_t offset)
+int coroutine_fn v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
+                                 struct iovec *iov, int iovcnt, int64_t offset)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -255,8 +257,8 @@ int v9fs_co_pwritev(V9fsPDU *pdu, V9fsFidState *fidp,
     return err;
 }
 
-int v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
-                   struct iovec *iov, int iovcnt, int64_t offset)
+int coroutine_fn v9fs_co_preadv(V9fsPDU *pdu, V9fsFidState *fidp,
+                                struct iovec *iov, int iovcnt, int64_t offset)
 {
     int err;
     V9fsState *s = pdu->s;
diff --git a/hw/9pfs/cofs.c b/hw/9pfs/cofs.c
index 70f584fcbd..c62103221d 100644
--- a/hw/9pfs/cofs.c
+++ b/hw/9pfs/cofs.c
@@ -49,7 +49,7 @@ static ssize_t __readlink(V9fsState *s, V9fsPath *path, V9fsString *buf)
     return len;
 }
 
-int v9fs_co_readlink(V9fsPDU *pdu, V9fsPath *path, V9fsString *buf)
+int coroutine_fn v9fs_co_readlink(V9fsPDU *pdu, V9fsPath *path, V9fsString *buf)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -69,7 +69,8 @@ int v9fs_co_readlink(V9fsPDU *pdu, V9fsPath *path, V9fsString *buf)
     return err;
 }
 
-int v9fs_co_statfs(V9fsPDU *pdu, V9fsPath *path, struct statfs *stbuf)
+int coroutine_fn v9fs_co_statfs(V9fsPDU *pdu, V9fsPath *path,
+                                struct statfs *stbuf)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -89,7 +90,7 @@ int v9fs_co_statfs(V9fsPDU *pdu, V9fsPath *path, struct statfs *stbuf)
     return err;
 }
 
-int v9fs_co_chmod(V9fsPDU *pdu, V9fsPath *path, mode_t mode)
+int coroutine_fn v9fs_co_chmod(V9fsPDU *pdu, V9fsPath *path, mode_t mode)
 {
     int err;
     FsCred cred;
@@ -112,8 +113,8 @@ int v9fs_co_chmod(V9fsPDU *pdu, V9fsPath *path, mode_t mode)
     return err;
 }
 
-int v9fs_co_utimensat(V9fsPDU *pdu, V9fsPath *path,
-                      struct timespec times[2])
+int coroutine_fn v9fs_co_utimensat(V9fsPDU *pdu, V9fsPath *path,
+                                   struct timespec times[2])
 {
     int err;
     V9fsState *s = pdu->s;
@@ -133,7 +134,8 @@ int v9fs_co_utimensat(V9fsPDU *pdu, V9fsPath *path,
     return err;
 }
 
-int v9fs_co_chown(V9fsPDU *pdu, V9fsPath *path, uid_t uid, gid_t gid)
+int coroutine_fn v9fs_co_chown(V9fsPDU *pdu, V9fsPath *path, uid_t uid,
+                               gid_t gid)
 {
     int err;
     FsCred cred;
@@ -157,7 +159,7 @@ int v9fs_co_chown(V9fsPDU *pdu, V9fsPath *path, uid_t uid, gid_t gid)
     return err;
 }
 
-int v9fs_co_truncate(V9fsPDU *pdu, V9fsPath *path, off_t size)
+int coroutine_fn v9fs_co_truncate(V9fsPDU *pdu, V9fsPath *path, off_t size)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -177,8 +179,9 @@ int v9fs_co_truncate(V9fsPDU *pdu, V9fsPath *path, off_t size)
     return err;
 }
 
-int v9fs_co_mknod(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name, uid_t uid,
-                  gid_t gid, dev_t dev, mode_t mode, struct stat *stbuf)
+int coroutine_fn v9fs_co_mknod(V9fsPDU *pdu, V9fsFidState *fidp,
+                               V9fsString *name, uid_t uid, gid_t gid,
+                               dev_t dev, mode_t mode, struct stat *stbuf)
 {
     int err;
     V9fsPath path;
@@ -216,7 +219,7 @@ int v9fs_co_mknod(V9fsPDU *pdu, V9fsFidState *fidp, V9fsString *name, uid_t uid,
 }
 
 /* Only works with path name based fid */
-int v9fs_co_remove(V9fsPDU *pdu, V9fsPath *path)
+int coroutine_fn v9fs_co_remove(V9fsPDU *pdu, V9fsPath *path)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -236,7 +239,8 @@ int v9fs_co_remove(V9fsPDU *pdu, V9fsPath *path)
     return err;
 }
 
-int v9fs_co_unlinkat(V9fsPDU *pdu, V9fsPath *path, V9fsString *name, int flags)
+int coroutine_fn v9fs_co_unlinkat(V9fsPDU *pdu, V9fsPath *path,
+                                  V9fsString *name, int flags)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -257,7 +261,8 @@ int v9fs_co_unlinkat(V9fsPDU *pdu, V9fsPath *path, V9fsString *name, int flags)
 }
 
 /* Only work with path name based fid */
-int v9fs_co_rename(V9fsPDU *pdu, V9fsPath *oldpath, V9fsPath *newpath)
+int coroutine_fn v9fs_co_rename(V9fsPDU *pdu, V9fsPath *oldpath,
+                                V9fsPath *newpath)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -275,8 +280,9 @@ int v9fs_co_rename(V9fsPDU *pdu, V9fsPath *oldpath, V9fsPath *newpath)
     return err;
 }
 
-int v9fs_co_renameat(V9fsPDU *pdu, V9fsPath *olddirpath, V9fsString *oldname,
-                     V9fsPath *newdirpath, V9fsString *newname)
+int coroutine_fn v9fs_co_renameat(V9fsPDU *pdu, V9fsPath *olddirpath,
+                                  V9fsString *oldname, V9fsPath *newdirpath,
+                                  V9fsString *newname)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -295,8 +301,9 @@ int v9fs_co_renameat(V9fsPDU *pdu, V9fsPath *olddirpath, V9fsString *oldname,
     return err;
 }
 
-int v9fs_co_symlink(V9fsPDU *pdu, V9fsFidState *dfidp, V9fsString *name,
-                    const char *oldpath, gid_t gid, struct stat *stbuf)
+int coroutine_fn v9fs_co_symlink(V9fsPDU *pdu, V9fsFidState *dfidp,
+                                 V9fsString *name, const char *oldpath,
+                                 gid_t gid, struct stat *stbuf)
 {
     int err;
     FsCred cred;
@@ -337,8 +344,8 @@ int v9fs_co_symlink(V9fsPDU *pdu, V9fsFidState *dfidp, V9fsString *name,
  * For path name based fid we don't block. So we can
  * directly call the fs driver ops.
  */
-int v9fs_co_name_to_path(V9fsPDU *pdu, V9fsPath *dirpath,
-                         const char *name, V9fsPath *path)
+int coroutine_fn v9fs_co_name_to_path(V9fsPDU *pdu, V9fsPath *dirpath,
+                                      const char *name, V9fsPath *path)
 {
     int err;
     V9fsState *s = pdu->s;
diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
index 3c7424e423..19e4d9287e 100644
--- a/hw/9pfs/coth.h
+++ b/hw/9pfs/coth.h
@@ -47,52 +47,53 @@
         qemu_coroutine_yield();                                         \
     } while (0)
 
-extern void co_run_in_worker_bh(void *);
-extern int v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
-extern int v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
-extern off_t v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
-extern void v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
-extern void v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);
-extern int v9fs_co_statfs(V9fsPDU *, V9fsPath *, struct statfs *);
-extern int v9fs_co_lstat(V9fsPDU *, V9fsPath *, struct stat *);
-extern int v9fs_co_chmod(V9fsPDU *, V9fsPath *, mode_t);
-extern int v9fs_co_utimensat(V9fsPDU *, V9fsPath *, struct timespec [2]);
-extern int v9fs_co_chown(V9fsPDU *, V9fsPath *, uid_t, gid_t);
-extern int v9fs_co_truncate(V9fsPDU *, V9fsPath *, off_t);
-extern int v9fs_co_llistxattr(V9fsPDU *, V9fsPath *, void *, size_t);
-extern int v9fs_co_lgetxattr(V9fsPDU *, V9fsPath *,
-                             V9fsString *, void *, size_t);
-extern int v9fs_co_mknod(V9fsPDU *, V9fsFidState *, V9fsString *, uid_t,
-                         gid_t, dev_t, mode_t, struct stat *);
-extern int v9fs_co_mkdir(V9fsPDU *, V9fsFidState *, V9fsString *,
-                         mode_t, uid_t, gid_t, struct stat *);
-extern int v9fs_co_remove(V9fsPDU *, V9fsPath *);
-extern int v9fs_co_rename(V9fsPDU *, V9fsPath *, V9fsPath *);
-extern int v9fs_co_unlinkat(V9fsPDU *, V9fsPath *, V9fsString *, int flags);
-extern int v9fs_co_renameat(V9fsPDU *, V9fsPath *, V9fsString *,
-                            V9fsPath *, V9fsString *);
-extern int v9fs_co_fstat(V9fsPDU *, V9fsFidState *, struct stat *);
-extern int v9fs_co_opendir(V9fsPDU *, V9fsFidState *);
-extern int v9fs_co_open(V9fsPDU *, V9fsFidState *, int);
-extern int v9fs_co_open2(V9fsPDU *, V9fsFidState *, V9fsString *,
-                         gid_t, int, int, struct stat *);
-extern int v9fs_co_lsetxattr(V9fsPDU *, V9fsPath *, V9fsString *,
-                             void *, size_t, int);
-extern int v9fs_co_lremovexattr(V9fsPDU *, V9fsPath *, V9fsString *);
-extern int v9fs_co_closedir(V9fsPDU *, V9fsFidOpenState *);
-extern int v9fs_co_close(V9fsPDU *, V9fsFidOpenState *);
-extern int v9fs_co_fsync(V9fsPDU *, V9fsFidState *, int);
-extern int v9fs_co_symlink(V9fsPDU *, V9fsFidState *, V9fsString *,
-                           const char *, gid_t, struct stat *);
-extern int v9fs_co_link(V9fsPDU *, V9fsFidState *,
-                        V9fsFidState *, V9fsString *);
-extern int v9fs_co_pwritev(V9fsPDU *, V9fsFidState *,
-                           struct iovec *, int, int64_t);
-extern int v9fs_co_preadv(V9fsPDU *, V9fsFidState *,
-                          struct iovec *, int, int64_t);
-extern int v9fs_co_name_to_path(V9fsPDU *, V9fsPath *,
-                                const char *, V9fsPath *);
-extern int v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t,
-                          V9fsStatDotl *v9stat);
+void co_run_in_worker_bh(void *);
+int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
+int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
+off_t coroutine_fn v9fs_co_telldir(V9fsPDU *, V9fsFidState *);
+void coroutine_fn v9fs_co_seekdir(V9fsPDU *, V9fsFidState *, off_t);
+void coroutine_fn v9fs_co_rewinddir(V9fsPDU *, V9fsFidState *);
+int coroutine_fn v9fs_co_statfs(V9fsPDU *, V9fsPath *, struct statfs *);
+int coroutine_fn v9fs_co_lstat(V9fsPDU *, V9fsPath *, struct stat *);
+int coroutine_fn v9fs_co_chmod(V9fsPDU *, V9fsPath *, mode_t);
+int coroutine_fn v9fs_co_utimensat(V9fsPDU *, V9fsPath *, struct timespec [2]);
+int coroutine_fn v9fs_co_chown(V9fsPDU *, V9fsPath *, uid_t, gid_t);
+int coroutine_fn v9fs_co_truncate(V9fsPDU *, V9fsPath *, off_t);
+int coroutine_fn v9fs_co_llistxattr(V9fsPDU *, V9fsPath *, void *, size_t);
+int coroutine_fn v9fs_co_lgetxattr(V9fsPDU *, V9fsPath *,
+                                   V9fsString *, void *, size_t);
+int coroutine_fn v9fs_co_mknod(V9fsPDU *, V9fsFidState *, V9fsString *, uid_t,
+                               gid_t, dev_t, mode_t, struct stat *);
+int coroutine_fn v9fs_co_mkdir(V9fsPDU *, V9fsFidState *, V9fsString *,
+                               mode_t, uid_t, gid_t, struct stat *);
+int coroutine_fn v9fs_co_remove(V9fsPDU *, V9fsPath *);
+int coroutine_fn v9fs_co_rename(V9fsPDU *, V9fsPath *, V9fsPath *);
+int coroutine_fn v9fs_co_unlinkat(V9fsPDU *, V9fsPath *, V9fsString *,
+                                  int flags);
+int coroutine_fn v9fs_co_renameat(V9fsPDU *, V9fsPath *, V9fsString *,
+                                  V9fsPath *, V9fsString *);
+int coroutine_fn v9fs_co_fstat(V9fsPDU *, V9fsFidState *, struct stat *);
+int coroutine_fn v9fs_co_opendir(V9fsPDU *, V9fsFidState *);
+int coroutine_fn v9fs_co_open(V9fsPDU *, V9fsFidState *, int);
+int coroutine_fn v9fs_co_open2(V9fsPDU *, V9fsFidState *, V9fsString *,
+                               gid_t, int, int, struct stat *);
+int coroutine_fn v9fs_co_lsetxattr(V9fsPDU *, V9fsPath *, V9fsString *,
+                                   void *, size_t, int);
+int coroutine_fn v9fs_co_lremovexattr(V9fsPDU *, V9fsPath *, V9fsString *);
+int coroutine_fn v9fs_co_closedir(V9fsPDU *, V9fsFidOpenState *);
+int coroutine_fn v9fs_co_close(V9fsPDU *, V9fsFidOpenState *);
+int coroutine_fn v9fs_co_fsync(V9fsPDU *, V9fsFidState *, int);
+int coroutine_fn v9fs_co_symlink(V9fsPDU *, V9fsFidState *, V9fsString *,
+                                 const char *, gid_t, struct stat *);
+int coroutine_fn v9fs_co_link(V9fsPDU *, V9fsFidState *,
+                              V9fsFidState *, V9fsString *);
+int coroutine_fn v9fs_co_pwritev(V9fsPDU *, V9fsFidState *,
+                                 struct iovec *, int, int64_t);
+int coroutine_fn v9fs_co_preadv(V9fsPDU *, V9fsFidState *,
+                                struct iovec *, int, int64_t);
+int coroutine_fn v9fs_co_name_to_path(V9fsPDU *, V9fsPath *,
+                                      const char *, V9fsPath *);
+int coroutine_fn v9fs_co_st_gen(V9fsPDU *pdu, V9fsPath *path, mode_t,
+                                V9fsStatDotl *v9stat);
 
 #endif
diff --git a/hw/9pfs/coxattr.c b/hw/9pfs/coxattr.c
index 133c4ead37..154392eade 100644
--- a/hw/9pfs/coxattr.c
+++ b/hw/9pfs/coxattr.c
@@ -17,7 +17,8 @@
 #include "qemu/coroutine.h"
 #include "coth.h"
 
-int v9fs_co_llistxattr(V9fsPDU *pdu, V9fsPath *path, void *value, size_t size)
+int coroutine_fn v9fs_co_llistxattr(V9fsPDU *pdu, V9fsPath *path, void *value,
+                                    size_t size)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -37,9 +38,9 @@ int v9fs_co_llistxattr(V9fsPDU *pdu, V9fsPath *path, void *value, size_t size)
     return err;
 }
 
-int v9fs_co_lgetxattr(V9fsPDU *pdu, V9fsPath *path,
-                      V9fsString *xattr_name,
-                      void *value, size_t size)
+int coroutine_fn v9fs_co_lgetxattr(V9fsPDU *pdu, V9fsPath *path,
+                                   V9fsString *xattr_name, void *value,
+                                   size_t size)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -61,9 +62,9 @@ int v9fs_co_lgetxattr(V9fsPDU *pdu, V9fsPath *path,
     return err;
 }
 
-int v9fs_co_lsetxattr(V9fsPDU *pdu, V9fsPath *path,
-                      V9fsString *xattr_name, void *value,
-                      size_t size, int flags)
+int coroutine_fn v9fs_co_lsetxattr(V9fsPDU *pdu, V9fsPath *path,
+                                   V9fsString *xattr_name, void *value,
+                                   size_t size, int flags)
 {
     int err;
     V9fsState *s = pdu->s;
@@ -85,8 +86,8 @@ int v9fs_co_lsetxattr(V9fsPDU *pdu, V9fsPath *path,
     return err;
 }
 
-int v9fs_co_lremovexattr(V9fsPDU *pdu, V9fsPath *path,
-                         V9fsString *xattr_name)
+int coroutine_fn v9fs_co_lremovexattr(V9fsPDU *pdu, V9fsPath *path,
+                                      V9fsString *xattr_name)
 {
     int err;
     V9fsState *s = pdu->s;
diff --git a/hw/9pfs/trace-events b/hw/9pfs/trace-events
index 48d3d8abed..fb4de3d465 100644
--- a/hw/9pfs/trace-events
+++ b/hw/9pfs/trace-events
@@ -42,6 +42,6 @@ v9fs_mkdir(uint16_t tag, uint8_t id, int32_t fid, char* name, int mode, uint32_t
 v9fs_mkdir_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int err) "tag %u id %u qid={type %d version %d path %"PRId64"} err %d"
 v9fs_xattrwalk(uint16_t tag, uint8_t id, int32_t fid, int32_t newfid, char* name) "tag %d id %d fid %d newfid %d name %s"
 v9fs_xattrwalk_return(uint16_t tag, uint8_t id, int64_t size) "tag %d id %d size %"PRId64
-v9fs_xattrcreate(uint16_t tag, uint8_t id, int32_t fid, char* name, int64_t size, int flags) "tag %d id %d fid %d name %s size %"PRId64" flags %d"
+v9fs_xattrcreate(uint16_t tag, uint8_t id, int32_t fid, char* name, uint64_t size, int flags) "tag %d id %d fid %d name %s size %"PRIu64" flags %d"
 v9fs_readlink(uint16_t tag, uint8_t id, int32_t fid) "tag %d id %d fid %d"
 v9fs_readlink_return(uint16_t tag, uint8_t id, char* target) "tag %d id %d name %s"
diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
index 009b43f6d0..1782e4a227 100644
--- a/hw/9pfs/virtio-9p-device.c
+++ b/hw/9pfs/virtio-9p-device.c
@@ -41,6 +41,7 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
     V9fsState *s = &v->state;
     V9fsPDU *pdu;
     ssize_t len;
+    VirtQueueElement *elem;
 
     while ((pdu = pdu_alloc(s))) {
         struct {
@@ -48,21 +49,28 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
             uint8_t id;
             uint16_t tag_le;
         } QEMU_PACKED out;
-        VirtQueueElement *elem;
 
         elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
         if (!elem) {
-            pdu_free(pdu);
-            break;
+            goto out_free_pdu;
         }
 
-        BUG_ON(elem->out_num == 0 || elem->in_num == 0);
-        QEMU_BUILD_BUG_ON(sizeof out != 7);
+        if (elem->in_num == 0) {
+            virtio_error(vdev,
+                         "The guest sent a VirtFS request without space for "
+                         "the reply");
+            goto out_free_req;
+        }
+        QEMU_BUILD_BUG_ON(sizeof(out) != 7);
 
         v->elems[pdu->idx] = elem;
         len = iov_to_buf(elem->out_sg, elem->out_num, 0,
-                         &out, sizeof out);
-        BUG_ON(len != sizeof out);
+                         &out, sizeof(out));
+        if (len != sizeof(out)) {
+            virtio_error(vdev, "The guest sent a malformed VirtFS request: "
+                         "header size is %zd, should be 7", len);
+            goto out_free_req;
+        }
 
         pdu->size = le32_to_cpu(out.size_le);
 
@@ -72,6 +80,14 @@ static void handle_9p_output(VirtIODevice *vdev, VirtQueue *vq)
         qemu_co_queue_init(&pdu->complete);
         pdu_submit(pdu);
     }
+
+    return;
+
+out_free_req:
+    virtqueue_detach_element(vq, elem, 0);
+    g_free(elem);
+out_free_pdu:
+    pdu_free(pdu);
 }
 
 static uint64_t virtio_9p_get_features(VirtIODevice *vdev, uint64_t features,
@@ -97,11 +113,6 @@ static void virtio_9p_get_config(VirtIODevice *vdev, uint8_t *config)
     g_free(cfg);
 }
 
-static int virtio_9p_load(QEMUFile *f, void *opaque, size_t size)
-{
-    return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
-}
-
 static void virtio_9p_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -130,6 +141,13 @@ static void virtio_9p_device_unrealize(DeviceState *dev, Error **errp)
     v9fs_device_unrealize_common(s, errp);
 }
 
+static void virtio_9p_reset(VirtIODevice *vdev)
+{
+    V9fsVirtioState *v = (V9fsVirtioState *)vdev;
+
+    v9fs_reset(&v->state);
+}
+
 ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
                             const char *fmt, va_list ap)
 {
@@ -168,7 +186,15 @@ void virtio_init_iov_from_pdu(V9fsPDU *pdu, struct iovec **piov,
 
 /* virtio-9p device */
 
-VMSTATE_VIRTIO_DEVICE(9p, 1, virtio_9p_load, virtio_vmstate_save);
+static const VMStateDescription vmstate_virtio_9p = {
+    .name = "virtio-9p",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static Property virtio_9p_properties[] = {
     DEFINE_PROP_STRING("mount_tag", V9fsVirtioState, state.fsconf.tag),
@@ -188,6 +214,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
     vdc->unrealize = virtio_9p_device_unrealize;
     vdc->get_features = virtio_9p_get_features;
     vdc->get_config = virtio_9p_get_config;
+    vdc->reset = virtio_9p_reset;
 }
 
 static const TypeInfo virtio_device_info = {
diff --git a/hw/9pfs/virtio-9p.h b/hw/9pfs/virtio-9p.h
index 7586b792d6..25c47c7cb6 100644
--- a/hw/9pfs/virtio-9p.h
+++ b/hw/9pfs/virtio-9p.h
@@ -15,7 +15,7 @@ typedef struct V9fsVirtioState
     V9fsState state;
 } V9fsVirtioState;
 
-extern void virtio_9p_push_and_notify(V9fsPDU *pdu);
+void virtio_9p_push_and_notify(V9fsPDU *pdu);
 
 ssize_t virtio_pdu_vmarshal(V9fsPDU *pdu, size_t offset,
                             const char *fmt, va_list ap);
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 96ea5a1f6b..a64d8b4eee 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -2,6 +2,7 @@ devices-dirs-$(call land, $(CONFIG_VIRTIO),$(call land,$(CONFIG_VIRTFS),$(CONFIG
 devices-dirs-$(CONFIG_ACPI) += acpi/
 # [GNU ARM Eclipse]
 ifeq ($(CONFIG_GNU_ARM_ECLIPSE),n)
+devices-dirs-$(CONFIG_SOFTMMU) += adc/
 devices-dirs-$(CONFIG_SOFTMMU) += audio/
 devices-dirs-$(CONFIG_SOFTMMU) += block/
 devices-dirs-$(CONFIG_SOFTMMU) += bt/
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 4b7da6639f..489e63bb75 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -3,7 +3,7 @@ common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o memory_hotplug_acpi_table.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
+common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI) += acpi_interface.o
 common-obj-$(CONFIG_ACPI) += bios-linker-loader.o
 common-obj-$(CONFIG_ACPI) += aml-build.o
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index db3e914fb4..b2a1e4033b 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -226,7 +226,7 @@ static void build_extop_package(GArray *package, uint8_t op)
     build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
 }
 
-static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
+void build_append_int_noprefix(GArray *table, uint64_t value, int size)
 {
     int i;
 
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c
index c13b65c2c9..5ac89fefaf 100644
--- a/hw/acpi/cpu.c
+++ b/hw/acpi/cpu.c
@@ -4,6 +4,7 @@
 #include "qapi/error.h"
 #include "qapi-event.h"
 #include "trace.h"
+#include "sysemu/numa.h"
 
 #define ACPI_CPU_HOTPLUG_REG_LEN 12
 #define ACPI_CPU_SELECTOR_OFFSET_WR 0
@@ -503,6 +504,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
 
         /* build Processor object for each processor */
         for (i = 0; i < arch_ids->len; i++) {
+            int j;
             Aml *dev;
             Aml *uid = aml_int(i);
             GArray *madt_buf = g_array_new(0, 1, 1);
@@ -529,6 +531,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
                 apic->flags = cpu_to_le32(1);
                 break;
             }
+            case ACPI_APIC_LOCAL_X2APIC: {
+                AcpiMadtProcessorX2Apic *apic = (void *)madt_buf->data;
+                apic->flags = cpu_to_le32(1);
+                break;
+            }
             default:
                 assert(0);
             }
@@ -546,6 +553,16 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts,
                           aml_arg(1), aml_arg(2))
             );
             aml_append(dev, method);
+
+            /* Linux guests discard SRAT info for non-present CPUs
+             * as a result _PXM is required for all CPUs which might
+             * be hot-plugged. For simplicity, add it for all CPUs.
+             */
+            j = numa_get_node_for_cpu(i);
+            if (j < nb_numa_nodes) {
+                aml_append(dev, aml_name_decl("_PXM", aml_int(j)));
+            }
+
             aml_append(cpus_dev, dev);
         }
     }
diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c
index e19d902063..f15a2402fc 100644
--- a/hw/acpi/cpu_hotplug.c
+++ b/hw/acpi/cpu_hotplug.c
@@ -15,6 +15,7 @@
 #include "qapi/error.h"
 #include "qom/cpu.h"
 #include "hw/i386/pc.h"
+#include "qemu/error-report.h"
 
 #define CPU_EJECT_METHOD "CPEJ"
 #define CPU_MAT_METHOD "CPMA"
@@ -63,7 +64,8 @@ static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu,
 
     cpu_id = k->get_arch_id(cpu);
     if ((cpu_id / 8) >= ACPI_GPE_PROC_LEN) {
-        error_setg(errp, "acpi: invalid cpu id: %" PRIi64, cpu_id);
+        object_property_set_bool(g->device, false, "cpu-hotplug-legacy",
+                                 &error_abort);
         return;
     }
 
@@ -85,13 +87,14 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner,
 {
     CPUState *cpu;
 
-    CPU_FOREACH(cpu) {
-        acpi_set_cpu_present_bit(gpe_cpu, cpu, &error_abort);
-    }
     memory_region_init_io(&gpe_cpu->io, owner, &AcpiCpuHotplug_ops,
                           gpe_cpu, "acpi-cpu-hotplug", ACPI_GPE_PROC_LEN);
     memory_region_add_subregion(parent, base, &gpe_cpu->io);
     gpe_cpu->device = owner;
+
+    CPU_FOREACH(cpu) {
+        acpi_set_cpu_present_bit(gpe_cpu, cpu, &error_abort);
+    }
 }
 
 void acpi_switch_to_modern_cphp(AcpiCpuHotplug *gpe_cpu,
@@ -234,7 +237,11 @@ void build_legacy_cpu_hotplug_aml(Aml *ctx, MachineState *machine,
     /* The current AML generator can cover the APIC ID range [0..255],
      * inclusive, for VCPU hotplug. */
     QEMU_BUILD_BUG_ON(ACPI_CPU_HOTPLUG_ID_LIMIT > 256);
-    g_assert(pcms->apic_id_limit <= ACPI_CPU_HOTPLUG_ID_LIMIT);
+    if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
+        error_report("max_cpus is too large. APIC ID of last CPU is %u",
+                     pcms->apic_id_limit - 1);
+        exit(1);
+    }
 
     /* create PCI0.PRES device and its _CRS to reserve CPU hotplug MMIO */
     dev = aml_device("PCI0." stringify(CPU_HOTPLUG_RESOURCE_DEVICE));
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index e5a3c18e52..830c475127 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -490,8 +490,12 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     if (lpc->pm.acpi_memory_hotplug.is_enabled &&
         object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug,
-                            dev, errp);
+        if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+            nvdimm_acpi_plug_cb(hotplug_dev, dev);
+        } else {
+            acpi_memory_plug_cb(hotplug_dev, &lpc->pm.acpi_memory_hotplug,
+                                dev, errp);
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
         if (lpc->pm.cpu_hotplug_legacy) {
             legacy_acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.gpe_cpu, dev, errp);
diff --git a/hw/acpi/ipmi.c b/hw/acpi/ipmi.c
index 7e74ce4460..651e2e94ea 100644
--- a/hw/acpi/ipmi.c
+++ b/hw/acpi/ipmi.c
@@ -99,6 +99,7 @@ void build_acpi_ipmi_devices(Aml *scope, BusState *bus)
 
         ii = IPMI_INTERFACE(obj);
         iic = IPMI_INTERFACE_GET_CLASS(obj);
+        memset(&info, 0, sizeof(info));
         iic->get_fwinfo(ii, &info);
         aml_append(scope, aml_ipmi_device(&info));
     }
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index e486128aa1..8e7d6ec034 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -33,35 +33,30 @@
 #include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
 
-static int nvdimm_plugged_device_list(Object *obj, void *opaque)
+static int nvdimm_device_list(Object *obj, void *opaque)
 {
     GSList **list = opaque;
 
     if (object_dynamic_cast(obj, TYPE_NVDIMM)) {
-        DeviceState *dev = DEVICE(obj);
-
-        if (dev->realized) { /* only realized NVDIMMs matter */
-            *list = g_slist_append(*list, DEVICE(obj));
-        }
+        *list = g_slist_append(*list, DEVICE(obj));
     }
 
-    object_child_foreach(obj, nvdimm_plugged_device_list, opaque);
+    object_child_foreach(obj, nvdimm_device_list, opaque);
     return 0;
 }
 
 /*
- * inquire plugged NVDIMM devices and link them into the list which is
+ * inquire NVDIMM devices and link them into the list which is
  * returned to the caller.
  *
  * Note: it is the caller's responsibility to free the list to avoid
  * memory leak.
  */
-static GSList *nvdimm_get_plugged_device_list(void)
+static GSList *nvdimm_get_device_list(void)
 {
     GSList *list = NULL;
 
-    object_child_foreach(qdev_get_machine(), nvdimm_plugged_device_list,
-                         &list);
+    object_child_foreach(qdev_get_machine(), nvdimm_device_list, &list);
     return list;
 }
 
@@ -219,7 +214,7 @@ static uint32_t nvdimm_slot_to_dcr_index(int slot)
 static NVDIMMDevice *nvdimm_get_device_by_handle(uint32_t handle)
 {
     NVDIMMDevice *nvdimm = NULL;
-    GSList *list, *device_list = nvdimm_get_plugged_device_list();
+    GSList *list, *device_list = nvdimm_get_device_list();
 
     for (list = device_list; list; list = list->next) {
         NVDIMMDevice *nvd = list->data;
@@ -289,8 +284,6 @@ static void
 nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
 {
     NvdimmNfitMemDev *nfit_memdev;
-    uint64_t addr = object_property_get_int(OBJECT(dev), PC_DIMM_ADDR_PROP,
-                                            NULL);
     uint64_t size = object_property_get_int(OBJECT(dev), PC_DIMM_SIZE_PROP,
                                             NULL);
     int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
@@ -314,7 +307,8 @@ nvdimm_build_structure_memdev(GArray *structures, DeviceState *dev)
 
     /* The memory region on the device. */
     nfit_memdev->region_len = cpu_to_le64(size);
-    nfit_memdev->region_dpa = cpu_to_le64(addr);
+    /* The device address starts from 0. */
+    nfit_memdev->region_dpa = cpu_to_le64(0);
 
     /* Only one interleave for PMEM. */
     nfit_memdev->interleave_ways = cpu_to_le16(1);
@@ -349,8 +343,9 @@ static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev)
                                          (DSM) in DSM Spec Rev1.*/);
 }
 
-static GArray *nvdimm_build_device_structure(GSList *device_list)
+static GArray *nvdimm_build_device_structure(void)
 {
+    GSList *device_list = nvdimm_get_device_list();
     GArray *structures = g_array_new(false, true /* clear */, 1);
 
     for (; device_list; device_list = device_list->next) {
@@ -368,14 +363,32 @@ static GArray *nvdimm_build_device_structure(GSList *device_list)
         /* build NVDIMM Control Region Structure. */
         nvdimm_build_structure_dcr(structures, dev);
     }
+    g_slist_free(device_list);
 
     return structures;
 }
 
-static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets,
+static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf)
+{
+    fit_buf->fit = g_array_new(false, true /* clear */, 1);
+}
+
+static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf)
+{
+    g_array_free(fit_buf->fit, true);
+    fit_buf->fit = nvdimm_build_device_structure();
+    fit_buf->dirty = true;
+}
+
+void nvdimm_plug(AcpiNVDIMMState *state)
+{
+    nvdimm_build_fit_buffer(&state->fit_buf);
+}
+
+static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
                               GArray *table_data, BIOSLinker *linker)
 {
-    GArray *structures = nvdimm_build_device_structure(device_list);
+    NvdimmFitBuffer *fit_buf = &state->fit_buf;
     unsigned int header;
 
     acpi_add_table(table_offsets, table_data);
@@ -384,14 +397,15 @@ static void nvdimm_build_nfit(GSList *device_list, GArray *table_offsets,
     header = table_data->len;
     acpi_data_push(table_data, sizeof(NvdimmNfitHeader));
     /* NVDIMM device structures. */
-    g_array_append_vals(table_data, structures->data, structures->len);
+    g_array_append_vals(table_data, fit_buf->fit->data, fit_buf->fit->len);
 
     build_header(linker, table_data,
                  (void *)(table_data->data + header), "NFIT",
-                 sizeof(NvdimmNfitHeader) + structures->len, 1, NULL, NULL);
-    g_array_free(structures, true);
+                 sizeof(NvdimmNfitHeader) + fit_buf->fit->len, 1, NULL, NULL);
 }
 
+#define NVDIMM_DSM_MEMORY_SIZE      4096
+
 struct NvdimmDsmIn {
     uint32_t handle;
     uint32_t revision;
@@ -402,7 +416,7 @@ struct NvdimmDsmIn {
     };
 } QEMU_PACKED;
 typedef struct NvdimmDsmIn NvdimmDsmIn;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmIn) != 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmIn) != NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmDsmOut {
     /* the size of buffer filled by QEMU. */
@@ -410,7 +424,7 @@ struct NvdimmDsmOut {
     uint8_t data[4092];
 } QEMU_PACKED;
 typedef struct NvdimmDsmOut NvdimmDsmOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmOut) != 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmDsmOut) != NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmDsmFunc0Out {
     /* the size of buffer filled by QEMU. */
@@ -438,7 +452,7 @@ struct NvdimmFuncGetLabelSizeOut {
     uint32_t max_xfer;
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelSizeOut NvdimmFuncGetLabelSizeOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelSizeOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncGetLabelDataIn {
     uint32_t offset; /* the offset in the namespace label data area. */
@@ -446,7 +460,7 @@ struct NvdimmFuncGetLabelDataIn {
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelDataIn NvdimmFuncGetLabelDataIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataIn) +
-                  offsetof(NvdimmDsmIn, arg3) > 4096);
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncGetLabelDataOut {
     /* the size of buffer filled by QEMU. */
@@ -455,7 +469,7 @@ struct NvdimmFuncGetLabelDataOut {
     uint8_t out_buf[0]; /* the data got via Get Namesapce Label function. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelDataOut NvdimmFuncGetLabelDataOut;
-QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > 4096);
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 struct NvdimmFuncSetLabelDataIn {
     uint32_t offset; /* the offset in the namespace label data area. */
@@ -464,7 +478,23 @@ struct NvdimmFuncSetLabelDataIn {
 } QEMU_PACKED;
 typedef struct NvdimmFuncSetLabelDataIn NvdimmFuncSetLabelDataIn;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncSetLabelDataIn) +
-                  offsetof(NvdimmDsmIn, arg3) > 4096);
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
+
+struct NvdimmFuncReadFITIn {
+    uint32_t offset; /* the offset into FIT buffer. */
+} QEMU_PACKED;
+typedef struct NvdimmFuncReadFITIn NvdimmFuncReadFITIn;
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITIn) +
+                  offsetof(NvdimmDsmIn, arg3) > NVDIMM_DSM_MEMORY_SIZE);
+
+struct NvdimmFuncReadFITOut {
+    /* the size of buffer filled by QEMU. */
+    uint32_t len;
+    uint32_t func_ret_status; /* return status code. */
+    uint8_t fit[0]; /* the FIT data. */
+} QEMU_PACKED;
+typedef struct NvdimmFuncReadFITOut NvdimmFuncReadFITOut;
+QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncReadFITOut) > NVDIMM_DSM_MEMORY_SIZE);
 
 static void
 nvdimm_dsm_function0(uint32_t supported_func, hwaddr dsm_mem_addr)
@@ -486,6 +516,79 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr)
     cpu_physical_memory_write(dsm_mem_addr, &out, sizeof(out));
 }
 
+#define NVDIMM_DSM_RET_STATUS_SUCCESS        0 /* Success */
+#define NVDIMM_DSM_RET_STATUS_UNSUPPORT      1 /* Not Supported */
+#define NVDIMM_DSM_RET_STATUS_NOMEMDEV       2 /* Non-Existing Memory Device */
+#define NVDIMM_DSM_RET_STATUS_INVALID        3 /* Invalid Input Parameters */
+#define NVDIMM_DSM_RET_STATUS_FIT_CHANGED    0x100 /* FIT Changed */
+
+#define NVDIMM_QEMU_RSVD_HANDLE_ROOT         0x10000
+
+/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */
+static void nvdimm_dsm_func_read_fit(AcpiNVDIMMState *state, NvdimmDsmIn *in,
+                                     hwaddr dsm_mem_addr)
+{
+    NvdimmFitBuffer *fit_buf = &state->fit_buf;
+    NvdimmFuncReadFITIn *read_fit;
+    NvdimmFuncReadFITOut *read_fit_out;
+    GArray *fit;
+    uint32_t read_len = 0, func_ret_status;
+    int size;
+
+    read_fit = (NvdimmFuncReadFITIn *)in->arg3;
+    le32_to_cpus(&read_fit->offset);
+
+    fit = fit_buf->fit;
+
+    nvdimm_debug("Read FIT: offset %#x FIT size %#x Dirty %s.\n",
+                 read_fit->offset, fit->len, fit_buf->dirty ? "Yes" : "No");
+
+    if (read_fit->offset > fit->len) {
+        func_ret_status = NVDIMM_DSM_RET_STATUS_INVALID;
+        goto exit;
+    }
+
+    /* It is the first time to read FIT. */
+    if (!read_fit->offset) {
+        fit_buf->dirty = false;
+    } else if (fit_buf->dirty) { /* FIT has been changed during RFIT. */
+        func_ret_status = NVDIMM_DSM_RET_STATUS_FIT_CHANGED;
+        goto exit;
+    }
+
+    func_ret_status = NVDIMM_DSM_RET_STATUS_SUCCESS;
+    read_len = MIN(fit->len - read_fit->offset,
+                   NVDIMM_DSM_MEMORY_SIZE - sizeof(NvdimmFuncReadFITOut));
+
+exit:
+    size = sizeof(NvdimmFuncReadFITOut) + read_len;
+    read_fit_out = g_malloc(size);
+
+    read_fit_out->len = cpu_to_le32(size);
+    read_fit_out->func_ret_status = cpu_to_le32(func_ret_status);
+    memcpy(read_fit_out->fit, fit->data + read_fit->offset, read_len);
+
+    cpu_physical_memory_write(dsm_mem_addr, read_fit_out, size);
+
+    g_free(read_fit_out);
+}
+
+static void
+nvdimm_dsm_handle_reserved_root_method(AcpiNVDIMMState *state,
+                                       NvdimmDsmIn *in, hwaddr dsm_mem_addr)
+{
+    switch (in->function) {
+    case 0x0:
+        nvdimm_dsm_function0(0x1 | 1 << 1 /* Read FIT */, dsm_mem_addr);
+        return;
+    case 0x1 /* Read FIT */:
+        nvdimm_dsm_func_read_fit(state, in, dsm_mem_addr);
+        return;
+    }
+
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
+}
+
 static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
 {
     /*
@@ -499,7 +602,7 @@ static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
     }
 
     /* No function except function 0 is supported yet. */
-    nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
 }
 
 /*
@@ -509,7 +612,9 @@ static void nvdimm_dsm_root(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
  */
 static uint32_t nvdimm_get_max_xfer_label_size(void)
 {
-    uint32_t max_get_size, max_set_size, dsm_memory_size = 4096;
+    uint32_t max_get_size, max_set_size, dsm_memory_size;
+
+    dsm_memory_size = NVDIMM_DSM_MEMORY_SIZE;
 
     /*
      * the max data ACPI can read one time which is transferred by
@@ -545,7 +650,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 
     nvdimm_debug("label_size %#x, max_xfer %#x.\n", label_size, mxfer);
 
-    label_size_out.func_ret_status = cpu_to_le32(0 /* Success */);
+    label_size_out.func_ret_status = cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
     label_size_out.label_size = cpu_to_le32(label_size);
     label_size_out.max_xfer = cpu_to_le32(mxfer);
 
@@ -556,7 +661,7 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
 static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
                                            uint32_t offset, uint32_t length)
 {
-    uint32_t ret = 3 /* Invalid Input Parameters */;
+    uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;
 
     if (offset + length < offset) {
         nvdimm_debug("offset %#x + length %#x is overflow.\n", offset,
@@ -576,7 +681,7 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
         return ret;
     }
 
-    return 0 /* Success */;
+    return NVDIMM_DSM_RET_STATUS_SUCCESS;
 }
 
 /*
@@ -600,17 +705,18 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
 
     status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
                                         get_label_data->length);
-    if (status != 0 /* Success */) {
+    if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
     }
 
     size = sizeof(*get_label_data_out) + get_label_data->length;
-    assert(size <= 4096);
+    assert(size <= NVDIMM_DSM_MEMORY_SIZE);
     get_label_data_out = g_malloc(size);
 
     get_label_data_out->len = cpu_to_le32(size);
-    get_label_data_out->func_ret_status = cpu_to_le32(0 /* Success */);
+    get_label_data_out->func_ret_status =
+                            cpu_to_le32(NVDIMM_DSM_RET_STATUS_SUCCESS);
     nvc->read_label_data(nvdimm, get_label_data_out->out_buf,
                          get_label_data->length, get_label_data->offset);
 
@@ -638,17 +744,17 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
 
     status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
                                         set_label_data->length);
-    if (status != 0 /* Success */) {
+    if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
         nvdimm_dsm_no_payload(status, dsm_mem_addr);
         return;
     }
 
-    assert(sizeof(*in) + sizeof(*set_label_data) + set_label_data->length <=
-           4096);
+    assert(offsetof(NvdimmDsmIn, arg3) + sizeof(*set_label_data) +
+                    set_label_data->length <= NVDIMM_DSM_MEMORY_SIZE);
 
     nvc->write_label_data(nvdimm, set_label_data->in_buf,
                           set_label_data->length, set_label_data->offset);
-    nvdimm_dsm_no_payload(0 /* Success */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_SUCCESS, dsm_mem_addr);
 }
 
 static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
@@ -672,7 +778,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
     }
 
     if (!nvdimm) {
-        nvdimm_dsm_no_payload(2 /* Non-Existing Memory Device */,
+        nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_NOMEMDEV,
                               dsm_mem_addr);
         return;
     }
@@ -699,7 +805,7 @@ static void nvdimm_dsm_device(NvdimmDsmIn *in, hwaddr dsm_mem_addr)
         break;
     }
 
-    nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+    nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
 }
 
 static uint64_t
@@ -712,6 +818,7 @@ nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
 static void
 nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 {
+    AcpiNVDIMMState *state = opaque;
     NvdimmDsmIn *in;
     hwaddr dsm_mem_addr = val;
 
@@ -735,7 +842,12 @@ nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
     if (in->revision != 0x1 /* Currently we only support DSM Spec Rev1. */) {
         nvdimm_debug("Revision %#x is not supported, expect %#x.\n",
                      in->revision, 0x1);
-        nvdimm_dsm_no_payload(1 /* Not Supported */, dsm_mem_addr);
+        nvdimm_dsm_no_payload(NVDIMM_DSM_RET_STATUS_UNSUPPORT, dsm_mem_addr);
+        goto exit;
+    }
+
+    if (in->handle == NVDIMM_QEMU_RSVD_HANDLE_ROOT) {
+        nvdimm_dsm_handle_reserved_root_method(state, in, dsm_mem_addr);
         goto exit;
     }
 
@@ -761,6 +873,13 @@ static const MemoryRegionOps nvdimm_dsm_ops = {
     },
 };
 
+void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    if (dev->hotplugged) {
+        acpi_send_event(DEVICE(hotplug_dev), ACPI_NVDIMM_HOTPLUG_STATUS);
+    }
+}
+
 void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
                             FWCfgState *fw_cfg, Object *owner)
 {
@@ -772,23 +891,105 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
     acpi_data_push(state->dsm_mem, sizeof(NvdimmDsmIn));
     fw_cfg_add_file(fw_cfg, NVDIMM_DSM_MEM_FILE, state->dsm_mem->data,
                     state->dsm_mem->len);
+
+    nvdimm_init_fit_buffer(&state->fit_buf);
 }
 
-#define NVDIMM_COMMON_DSM      "NCAL"
-#define NVDIMM_ACPI_MEM_ADDR   "MEMA"
+#define NVDIMM_COMMON_DSM       "NCAL"
+#define NVDIMM_ACPI_MEM_ADDR    "MEMA"
+
+#define NVDIMM_DSM_MEMORY       "NRAM"
+#define NVDIMM_DSM_IOPORT       "NPIO"
+
+#define NVDIMM_DSM_NOTIFY       "NTFI"
+#define NVDIMM_DSM_HANDLE       "HDLE"
+#define NVDIMM_DSM_REVISION     "REVS"
+#define NVDIMM_DSM_FUNCTION     "FUNC"
+#define NVDIMM_DSM_ARG3         "FARG"
+
+#define NVDIMM_DSM_OUT_BUF_SIZE "RLEN"
+#define NVDIMM_DSM_OUT_BUF      "ODAT"
+
+#define NVDIMM_DSM_RFIT_STATUS  "RSTA"
+
+#define NVDIMM_QEMU_RSVD_UUID   "648B9CF2-CDA1-4312-8AD9-49C4AF32BD62"
 
 static void nvdimm_build_common_dsm(Aml *dev)
 {
-    Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *result_size;
+    Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *elsectx2;
     Aml *elsectx, *unsupport, *unpatched, *expected_uuid, *uuid_invalid;
-    Aml *pckg, *pckg_index, *pckg_buf;
+    Aml *pckg, *pckg_index, *pckg_buf, *field, *dsm_out_buf, *dsm_out_buf_size;
     uint8_t byte_list[1];
 
     method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED);
     uuid = aml_arg(0);
     function = aml_arg(2);
     handle = aml_arg(4);
-    dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR);
+    dsm_mem = aml_local(6);
+    dsm_out_buf = aml_local(7);
+
+    aml_append(method, aml_store(aml_name(NVDIMM_ACPI_MEM_ADDR), dsm_mem));
+
+    /* map DSM memory and IO into ACPI namespace. */
+    aml_append(method, aml_operation_region(NVDIMM_DSM_IOPORT, AML_SYSTEM_IO,
+               aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN));
+    aml_append(method, aml_operation_region(NVDIMM_DSM_MEMORY,
+               AML_SYSTEM_MEMORY, dsm_mem, sizeof(NvdimmDsmIn)));
+
+    /*
+     * DSM notifier:
+     * NVDIMM_DSM_NOTIFY: write the address of DSM memory and notify QEMU to
+     *                    emulate the access.
+     *
+     * It is the IO port so that accessing them will cause VM-exit, the
+     * control will be transferred to QEMU.
+     */
+    field = aml_field(NVDIMM_DSM_IOPORT, AML_DWORD_ACC, AML_NOLOCK,
+                      AML_PRESERVE);
+    aml_append(field, aml_named_field(NVDIMM_DSM_NOTIFY,
+               sizeof(uint32_t) * BITS_PER_BYTE));
+    aml_append(method, field);
+
+    /*
+     * DSM input:
+     * NVDIMM_DSM_HANDLE: store device's handle, it's zero if the _DSM call
+     *                    happens on NVDIMM Root Device.
+     * NVDIMM_DSM_REVISION: store the Arg1 of _DSM call.
+     * NVDIMM_DSM_FUNCTION: store the Arg2 of _DSM call.
+     * NVDIMM_DSM_ARG3: store the Arg3 of _DSM call which is a Package
+     *                  containing function-specific arguments.
+     *
+     * They are RAM mapping on host so that these accesses never cause
+     * VM-EXIT.
+     */
+    field = aml_field(NVDIMM_DSM_MEMORY, AML_DWORD_ACC, AML_NOLOCK,
+                      AML_PRESERVE);
+    aml_append(field, aml_named_field(NVDIMM_DSM_HANDLE,
+               sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE));
+    aml_append(field, aml_named_field(NVDIMM_DSM_REVISION,
+               sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE));
+    aml_append(field, aml_named_field(NVDIMM_DSM_FUNCTION,
+               sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE));
+    aml_append(field, aml_named_field(NVDIMM_DSM_ARG3,
+         (sizeof(NvdimmDsmIn) - offsetof(NvdimmDsmIn, arg3)) * BITS_PER_BYTE));
+    aml_append(method, field);
+
+    /*
+     * DSM output:
+     * NVDIMM_DSM_OUT_BUF_SIZE: the size of the buffer filled by QEMU.
+     * NVDIMM_DSM_OUT_BUF: the buffer QEMU uses to store the result.
+     *
+     * Since the page is reused by both input and out, the input data
+     * will be lost after storing new result into ODAT so we should fetch
+     * all the input data before writing the result.
+     */
+    field = aml_field(NVDIMM_DSM_MEMORY, AML_DWORD_ACC, AML_NOLOCK,
+                      AML_PRESERVE);
+    aml_append(field, aml_named_field(NVDIMM_DSM_OUT_BUF_SIZE,
+               sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE));
+    aml_append(field, aml_named_field(NVDIMM_DSM_OUT_BUF,
+       (sizeof(NvdimmDsmOut) - offsetof(NvdimmDsmOut, data)) * BITS_PER_BYTE));
+    aml_append(method, field);
 
     /*
      * do not support any method if DSM memory address has not been
@@ -804,9 +1005,15 @@ static void nvdimm_build_common_dsm(Aml *dev)
                /* UUID for NVDIMM Root Device */, expected_uuid));
     aml_append(method, ifctx);
     elsectx = aml_else();
-    aml_append(elsectx, aml_store(
+    ifctx = aml_if(aml_equal(handle, aml_int(NVDIMM_QEMU_RSVD_HANDLE_ROOT)));
+    aml_append(ifctx, aml_store(aml_touuid(NVDIMM_QEMU_RSVD_UUID
+               /* UUID for QEMU internal use */), expected_uuid));
+    aml_append(elsectx, ifctx);
+    elsectx2 = aml_else();
+    aml_append(elsectx2, aml_store(
                aml_touuid("4309AC30-0D11-11E4-9191-0800200C9A66")
                /* UUID for NVDIMM Devices */, expected_uuid));
+    aml_append(elsectx, elsectx2);
     aml_append(method, elsectx);
 
     uuid_invalid = aml_lnot(aml_equal(uuid, expected_uuid));
@@ -823,7 +1030,7 @@ static void nvdimm_build_common_dsm(Aml *dev)
     aml_append(unsupport, ifctx);
 
     /* No function is supported yet. */
-    byte_list[0] = 1 /* Not Supported */;
+    byte_list[0] = NVDIMM_DSM_RET_STATUS_UNSUPPORT;
     aml_append(unsupport, aml_return(aml_buffer(1, byte_list)));
     aml_append(method, unsupport);
 
@@ -832,9 +1039,9 @@ static void nvdimm_build_common_dsm(Aml *dev)
      * it reserves 0 for root device and is the handle for NVDIMM devices.
      * See the comments in nvdimm_slot_to_handle().
      */
-    aml_append(method, aml_store(handle, aml_name("HDLE")));
-    aml_append(method, aml_store(aml_arg(1), aml_name("REVS")));
-    aml_append(method, aml_store(aml_arg(2), aml_name("FUNC")));
+    aml_append(method, aml_store(handle, aml_name(NVDIMM_DSM_HANDLE)));
+    aml_append(method, aml_store(aml_arg(1), aml_name(NVDIMM_DSM_REVISION)));
+    aml_append(method, aml_store(aml_arg(2), aml_name(NVDIMM_DSM_FUNCTION)));
 
     /*
      * The fourth parameter (Arg3) of _DSM is a package which contains
@@ -852,24 +1059,26 @@ static void nvdimm_build_common_dsm(Aml *dev)
     pckg_buf = aml_local(3);
     aml_append(ifctx, aml_store(aml_index(pckg, aml_int(0)), pckg_index));
     aml_append(ifctx, aml_store(aml_derefof(pckg_index), pckg_buf));
-    aml_append(ifctx, aml_store(pckg_buf, aml_name("ARG3")));
+    aml_append(ifctx, aml_store(pckg_buf, aml_name(NVDIMM_DSM_ARG3)));
     aml_append(method, ifctx);
 
     /*
      * tell QEMU about the real address of DSM memory, then QEMU
      * gets the control and fills the result in DSM memory.
      */
-    aml_append(method, aml_store(dsm_mem, aml_name("NTFI")));
-
-    result_size = aml_local(1);
-    aml_append(method, aml_store(aml_name("RLEN"), result_size));
-    aml_append(method, aml_store(aml_shiftleft(result_size, aml_int(3)),
-                                 result_size));
-    aml_append(method, aml_create_field(aml_name("ODAT"), aml_int(0),
-                                        result_size, "OBUF"));
+    aml_append(method, aml_store(dsm_mem, aml_name(NVDIMM_DSM_NOTIFY)));
+
+    dsm_out_buf_size = aml_local(1);
+    /* RLEN is not included in the payload returned to guest. */
+    aml_append(method, aml_subtract(aml_name(NVDIMM_DSM_OUT_BUF_SIZE),
+               aml_int(4), dsm_out_buf_size));
+    aml_append(method, aml_store(aml_shiftleft(dsm_out_buf_size, aml_int(3)),
+                                 dsm_out_buf_size));
+    aml_append(method, aml_create_field(aml_name(NVDIMM_DSM_OUT_BUF),
+               aml_int(0), dsm_out_buf_size, "OBUF"));
     aml_append(method, aml_concatenate(aml_buffer(0, NULL), aml_name("OBUF"),
-                                       aml_arg(6)));
-    aml_append(method, aml_return(aml_arg(6)));
+                                       dsm_out_buf));
+    aml_append(method, aml_return(dsm_out_buf));
     aml_append(dev, method);
 }
 
@@ -884,12 +1093,107 @@ static void nvdimm_build_device_dsm(Aml *dev, uint32_t handle)
     aml_append(dev, method);
 }
 
-static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev)
+static void nvdimm_build_fit(Aml *dev)
 {
-    for (; device_list; device_list = device_list->next) {
-        DeviceState *dev = device_list->data;
-        int slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
-                                           NULL);
+    Aml *method, *pkg, *buf, *buf_size, *offset, *call_result;
+    Aml *whilectx, *ifcond, *ifctx, *elsectx, *fit;
+
+    buf = aml_local(0);
+    buf_size = aml_local(1);
+    fit = aml_local(2);
+
+    aml_append(dev, aml_name_decl(NVDIMM_DSM_RFIT_STATUS, aml_int(0)));
+
+    /* build helper function, RFIT. */
+    method = aml_method("RFIT", 1, AML_SERIALIZED);
+    aml_append(method, aml_name_decl("OFST", aml_int(0)));
+
+    /* prepare input package. */
+    pkg = aml_package(1);
+    aml_append(method, aml_store(aml_arg(0), aml_name("OFST")));
+    aml_append(pkg, aml_name("OFST"));
+
+    /* call Read_FIT function. */
+    call_result = aml_call5(NVDIMM_COMMON_DSM,
+                            aml_touuid(NVDIMM_QEMU_RSVD_UUID),
+                            aml_int(1) /* Revision 1 */,
+                            aml_int(0x1) /* Read FIT */,
+                            pkg, aml_int(NVDIMM_QEMU_RSVD_HANDLE_ROOT));
+    aml_append(method, aml_store(call_result, buf));
+
+    /* handle _DSM result. */
+    aml_append(method, aml_create_dword_field(buf,
+               aml_int(0) /* offset at byte 0 */, "STAU"));
+
+    aml_append(method, aml_store(aml_name("STAU"),
+                                 aml_name(NVDIMM_DSM_RFIT_STATUS)));
+
+     /* if something is wrong during _DSM. */
+    ifcond = aml_equal(aml_int(NVDIMM_DSM_RET_STATUS_SUCCESS),
+                       aml_name("STAU"));
+    ifctx = aml_if(aml_lnot(ifcond));
+    aml_append(ifctx, aml_return(aml_buffer(0, NULL)));
+    aml_append(method, ifctx);
+
+    aml_append(method, aml_store(aml_sizeof(buf), buf_size));
+    aml_append(method, aml_subtract(buf_size,
+                                    aml_int(4) /* the size of "STAU" */,
+                                    buf_size));
+
+    /* if we read the end of fit. */
+    ifctx = aml_if(aml_equal(buf_size, aml_int(0)));
+    aml_append(ifctx, aml_return(aml_buffer(0, NULL)));
+    aml_append(method, ifctx);
+
+    aml_append(method, aml_create_field(buf,
+                            aml_int(4 * BITS_PER_BYTE), /* offset at byte 4.*/
+                            aml_shiftleft(buf_size, aml_int(3)), "BUFF"));
+    aml_append(method, aml_return(aml_name("BUFF")));
+    aml_append(dev, method);
+
+    /* build _FIT. */
+    method = aml_method("_FIT", 0, AML_SERIALIZED);
+    offset = aml_local(3);
+
+    aml_append(method, aml_store(aml_buffer(0, NULL), fit));
+    aml_append(method, aml_store(aml_int(0), offset));
+
+    whilectx = aml_while(aml_int(1));
+    aml_append(whilectx, aml_store(aml_call1("RFIT", offset), buf));
+    aml_append(whilectx, aml_store(aml_sizeof(buf), buf_size));
+
+    /*
+     * if fit buffer was changed during RFIT, read from the beginning
+     * again.
+     */
+    ifctx = aml_if(aml_equal(aml_name(NVDIMM_DSM_RFIT_STATUS),
+                             aml_int(NVDIMM_DSM_RET_STATUS_FIT_CHANGED)));
+    aml_append(ifctx, aml_store(aml_buffer(0, NULL), fit));
+    aml_append(ifctx, aml_store(aml_int(0), offset));
+    aml_append(whilectx, ifctx);
+
+    elsectx = aml_else();
+
+    /* finish fit read if no data is read out. */
+    ifctx = aml_if(aml_equal(buf_size, aml_int(0)));
+    aml_append(ifctx, aml_return(fit));
+    aml_append(elsectx, ifctx);
+
+    /* update the offset. */
+    aml_append(elsectx, aml_add(offset, buf_size, offset));
+    /* append the data we read out to the fit buffer. */
+    aml_append(elsectx, aml_concatenate(fit, buf, fit));
+    aml_append(whilectx, elsectx);
+    aml_append(method, whilectx);
+
+    aml_append(dev, method);
+}
+
+static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots)
+{
+    uint32_t slot;
+
+    for (slot = 0; slot < ram_slots; slot++) {
         uint32_t handle = nvdimm_slot_to_handle(slot);
         Aml *nvdimm_dev;
 
@@ -910,11 +1214,11 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev)
     }
 }
 
-static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
-                              GArray *table_data, BIOSLinker *linker,
-                              GArray *dsm_dma_arrea)
+static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data,
+                              BIOSLinker *linker, GArray *dsm_dma_arrea,
+                              uint32_t ram_slots)
 {
-    Aml *ssdt, *sb_scope, *dev, *field;
+    Aml *ssdt, *sb_scope, *dev;
     int mem_addr_offset, nvdimm_ssdt;
 
     acpi_add_table(table_offsets, table_data);
@@ -939,69 +1243,13 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
      */
     aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012")));
 
-    /* map DSM memory and IO into ACPI namespace. */
-    aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO,
-               aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN));
-    aml_append(dev, aml_operation_region("NRAM", AML_SYSTEM_MEMORY,
-               aml_name(NVDIMM_ACPI_MEM_ADDR), sizeof(NvdimmDsmIn)));
-
-    /*
-     * DSM notifier:
-     * NTFI: write the address of DSM memory and notify QEMU to emulate
-     *       the access.
-     *
-     * It is the IO port so that accessing them will cause VM-exit, the
-     * control will be transferred to QEMU.
-     */
-    field = aml_field("NPIO", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
-    aml_append(field, aml_named_field("NTFI",
-               sizeof(uint32_t) * BITS_PER_BYTE));
-    aml_append(dev, field);
-
-    /*
-     * DSM input:
-     * HDLE: store device's handle, it's zero if the _DSM call happens
-     *       on NVDIMM Root Device.
-     * REVS: store the Arg1 of _DSM call.
-     * FUNC: store the Arg2 of _DSM call.
-     * ARG3: store the Arg3 of _DSM call.
-     *
-     * They are RAM mapping on host so that these accesses never cause
-     * VM-EXIT.
-     */
-    field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
-    aml_append(field, aml_named_field("HDLE",
-               sizeof(typeof_field(NvdimmDsmIn, handle)) * BITS_PER_BYTE));
-    aml_append(field, aml_named_field("REVS",
-               sizeof(typeof_field(NvdimmDsmIn, revision)) * BITS_PER_BYTE));
-    aml_append(field, aml_named_field("FUNC",
-               sizeof(typeof_field(NvdimmDsmIn, function)) * BITS_PER_BYTE));
-    aml_append(field, aml_named_field("ARG3",
-               (sizeof(NvdimmDsmIn) - offsetof(NvdimmDsmIn, arg3)) * BITS_PER_BYTE));
-    aml_append(dev, field);
-
-    /*
-     * DSM output:
-     * RLEN: the size of the buffer filled by QEMU.
-     * ODAT: the buffer QEMU uses to store the result.
-     *
-     * Since the page is reused by both input and out, the input data
-     * will be lost after storing new result into ODAT so we should fetch
-     * all the input data before writing the result.
-     */
-    field = aml_field("NRAM", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
-    aml_append(field, aml_named_field("RLEN",
-               sizeof(typeof_field(NvdimmDsmOut, len)) * BITS_PER_BYTE));
-    aml_append(field, aml_named_field("ODAT",
-               (sizeof(NvdimmDsmOut) - offsetof(NvdimmDsmOut, data)) * BITS_PER_BYTE));
-    aml_append(dev, field);
-
     nvdimm_build_common_dsm(dev);
 
     /* 0 is reserved for root device. */
     nvdimm_build_device_dsm(dev, 0);
+    nvdimm_build_fit(dev);
 
-    nvdimm_build_nvdimm_devices(device_list, dev);
+    nvdimm_build_nvdimm_devices(dev, ram_slots);
 
     aml_append(sb_scope, dev);
     aml_append(ssdt, sb_scope);
@@ -1026,17 +1274,25 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
 }
 
 void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
-                       BIOSLinker *linker, GArray *dsm_dma_arrea)
+                       BIOSLinker *linker, AcpiNVDIMMState *state,
+                       uint32_t ram_slots)
 {
     GSList *device_list;
 
+    /* no nvdimm device can be plugged. */
+    if (!ram_slots) {
+        return;
+    }
+
+    nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem,
+                      ram_slots);
+
+    device_list = nvdimm_get_device_list();
     /* no NVDIMM device is plugged. */
-    device_list = nvdimm_get_plugged_device_list();
     if (!device_list) {
         return;
     }
-    nvdimm_build_nfit(device_list, table_offsets, table_data, linker);
-    nvdimm_build_ssdt(device_list, table_offsets, table_data, linker,
-                      dsm_dma_arrea);
+
+    nvdimm_build_nfit(state, table_offsets, table_data, linker);
     g_slist_free(device_list);
 }
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 2adc246b00..17d36bd595 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -378,7 +378,12 @@ static void piix4_device_plug_cb(HotplugHandler *hotplug_dev,
 
     if (s->acpi_memory_hotplug.is_enabled &&
         object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug, dev, errp);
+        if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+            nvdimm_acpi_plug_cb(hotplug_dev, dev);
+        } else {
+            acpi_memory_plug_cb(hotplug_dev, &s->acpi_memory_hotplug,
+                                dev, errp);
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
         acpi_pcihp_device_plug_cb(hotplug_dev, &s->acpi_pci_hotplug, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
diff --git a/hw/adc/Makefile.objs b/hw/adc/Makefile.objs
new file mode 100644
index 0000000000..3f6dfdedae
--- /dev/null
+++ b/hw/adc/Makefile.objs
@@ -0,0 +1 @@
+obj-$(CONFIG_STM32F2XX_ADC) += stm32f2xx_adc.o
diff --git a/hw/adc/stm32f2xx_adc.c b/hw/adc/stm32f2xx_adc.c
new file mode 100644
index 0000000000..90fe9de299
--- /dev/null
+++ b/hw/adc/stm32f2xx_adc.c
@@ -0,0 +1,306 @@
+/*
+ * STM32F2XX ADC
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/hw.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "hw/adc/stm32f2xx_adc.h"
+
+#ifndef STM_ADC_ERR_DEBUG
+#define STM_ADC_ERR_DEBUG 0
+#endif
+
+#define DB_PRINT_L(lvl, fmt, args...) do { \
+    if (STM_ADC_ERR_DEBUG >= lvl) { \
+        qemu_log("%s: " fmt, __func__, ## args); \
+    } \
+} while (0);
+
+#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
+
+static void stm32f2xx_adc_reset(DeviceState *dev)
+{
+    STM32F2XXADCState *s = STM32F2XX_ADC(dev);
+
+    s->adc_sr = 0x00000000;
+    s->adc_cr1 = 0x00000000;
+    s->adc_cr2 = 0x00000000;
+    s->adc_smpr1 = 0x00000000;
+    s->adc_smpr2 = 0x00000000;
+    s->adc_jofr[0] = 0x00000000;
+    s->adc_jofr[1] = 0x00000000;
+    s->adc_jofr[2] = 0x00000000;
+    s->adc_jofr[3] = 0x00000000;
+    s->adc_htr = 0x00000FFF;
+    s->adc_ltr = 0x00000000;
+    s->adc_sqr1 = 0x00000000;
+    s->adc_sqr2 = 0x00000000;
+    s->adc_sqr3 = 0x00000000;
+    s->adc_jsqr = 0x00000000;
+    s->adc_jdr[0] = 0x00000000;
+    s->adc_jdr[1] = 0x00000000;
+    s->adc_jdr[2] = 0x00000000;
+    s->adc_jdr[3] = 0x00000000;
+    s->adc_dr = 0x00000000;
+}
+
+static uint32_t stm32f2xx_adc_generate_value(STM32F2XXADCState *s)
+{
+    /* Attempts to fake some ADC values */
+    s->adc_dr = s->adc_dr + 7;
+
+    switch ((s->adc_cr1 & ADC_CR1_RES) >> 24) {
+    case 0:
+        /* 12-bit */
+        s->adc_dr &= 0xFFF;
+        break;
+    case 1:
+        /* 10-bit */
+        s->adc_dr &= 0x3FF;
+        break;
+    case 2:
+        /* 8-bit */
+        s->adc_dr &= 0xFF;
+        break;
+    default:
+        /* 6-bit */
+        s->adc_dr &= 0x3F;
+    }
+
+    if (s->adc_cr2 & ADC_CR2_ALIGN) {
+        return (s->adc_dr << 1) & 0xFFF0;
+    } else {
+        return s->adc_dr;
+    }
+}
+
+static uint64_t stm32f2xx_adc_read(void *opaque, hwaddr addr,
+                                     unsigned int size)
+{
+    STM32F2XXADCState *s = opaque;
+
+    DB_PRINT("Address: 0x%" HWADDR_PRIx "\n", addr);
+
+    if (addr >= ADC_COMMON_ADDRESS) {
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: ADC Common Register Unsupported\n", __func__);
+    }
+
+    switch (addr) {
+    case ADC_SR:
+        return s->adc_sr;
+    case ADC_CR1:
+        return s->adc_cr1;
+    case ADC_CR2:
+        return s->adc_cr2 & 0xFFFFFFF;
+    case ADC_SMPR1:
+        return s->adc_smpr1;
+    case ADC_SMPR2:
+        return s->adc_smpr2;
+    case ADC_JOFR1:
+    case ADC_JOFR2:
+    case ADC_JOFR3:
+    case ADC_JOFR4:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        return s->adc_jofr[(addr - ADC_JOFR1) / 4];
+    case ADC_HTR:
+        return s->adc_htr;
+    case ADC_LTR:
+        return s->adc_ltr;
+    case ADC_SQR1:
+        return s->adc_sqr1;
+    case ADC_SQR2:
+        return s->adc_sqr2;
+    case ADC_SQR3:
+        return s->adc_sqr3;
+    case ADC_JSQR:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        return s->adc_jsqr;
+    case ADC_JDR1:
+    case ADC_JDR2:
+    case ADC_JDR3:
+    case ADC_JDR4:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        return s->adc_jdr[(addr - ADC_JDR1) / 4] -
+               s->adc_jofr[(addr - ADC_JDR1) / 4];
+    case ADC_DR:
+        if ((s->adc_cr2 & ADC_CR2_ADON) && (s->adc_cr2 & ADC_CR2_SWSTART)) {
+            s->adc_cr2 ^= ADC_CR2_SWSTART;
+            return stm32f2xx_adc_generate_value(s);
+        } else {
+            return 0;
+        }
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, addr);
+    }
+
+    return 0;
+}
+
+static void stm32f2xx_adc_write(void *opaque, hwaddr addr,
+                       uint64_t val64, unsigned int size)
+{
+    STM32F2XXADCState *s = opaque;
+    uint32_t value = (uint32_t) val64;
+
+    DB_PRINT("Address: 0x%" HWADDR_PRIx ", Value: 0x%x\n",
+             addr, value);
+
+    if (addr >= 0x100) {
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: ADC Common Register Unsupported\n", __func__);
+    }
+
+    switch (addr) {
+    case ADC_SR:
+        s->adc_sr &= (value & 0x3F);
+        break;
+    case ADC_CR1:
+        s->adc_cr1 = value;
+        break;
+    case ADC_CR2:
+        s->adc_cr2 = value;
+        break;
+    case ADC_SMPR1:
+        s->adc_smpr1 = value;
+        break;
+    case ADC_SMPR2:
+        s->adc_smpr2 = value;
+        break;
+    case ADC_JOFR1:
+    case ADC_JOFR2:
+    case ADC_JOFR3:
+    case ADC_JOFR4:
+        s->adc_jofr[(addr - ADC_JOFR1) / 4] = (value & 0xFFF);
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        break;
+    case ADC_HTR:
+        s->adc_htr = value;
+        break;
+    case ADC_LTR:
+        s->adc_ltr = value;
+        break;
+    case ADC_SQR1:
+        s->adc_sqr1 = value;
+        break;
+    case ADC_SQR2:
+        s->adc_sqr2 = value;
+        break;
+    case ADC_SQR3:
+        s->adc_sqr3 = value;
+        break;
+    case ADC_JSQR:
+        s->adc_jsqr = value;
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        break;
+    case ADC_JDR1:
+    case ADC_JDR2:
+    case ADC_JDR3:
+    case ADC_JDR4:
+        s->adc_jdr[(addr - ADC_JDR1) / 4] = value;
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Injection ADC is not implemented, the registers are " \
+                      "included for compatibility\n", __func__);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, addr);
+    }
+}
+
+static const MemoryRegionOps stm32f2xx_adc_ops = {
+    .read = stm32f2xx_adc_read,
+    .write = stm32f2xx_adc_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_stm32f2xx_adc = {
+    .name = TYPE_STM32F2XX_ADC,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(adc_sr, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_cr1, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_cr2, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_smpr1, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_smpr2, STM32F2XXADCState),
+        VMSTATE_UINT32_ARRAY(adc_jofr, STM32F2XXADCState, 4),
+        VMSTATE_UINT32(adc_htr, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_ltr, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_sqr1, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_sqr2, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_sqr3, STM32F2XXADCState),
+        VMSTATE_UINT32(adc_jsqr, STM32F2XXADCState),
+        VMSTATE_UINT32_ARRAY(adc_jdr, STM32F2XXADCState, 4),
+        VMSTATE_UINT32(adc_dr, STM32F2XXADCState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void stm32f2xx_adc_init(Object *obj)
+{
+    STM32F2XXADCState *s = STM32F2XX_ADC(obj);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+    memory_region_init_io(&s->mmio, obj, &stm32f2xx_adc_ops, s,
+                          TYPE_STM32F2XX_ADC, 0xFF);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static void stm32f2xx_adc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = stm32f2xx_adc_reset;
+    dc->vmsd = &vmstate_stm32f2xx_adc;
+}
+
+static const TypeInfo stm32f2xx_adc_info = {
+    .name          = TYPE_STM32F2XX_ADC,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(STM32F2XXADCState),
+    .instance_init = stm32f2xx_adc_init,
+    .class_init    = stm32f2xx_adc_class_init,
+};
+
+static void stm32f2xx_adc_register_types(void)
+{
+    type_register_static(&stm32f2xx_adc_info);
+}
+
+type_init(stm32f2xx_adc_register_types)
diff --git a/hw/alpha/dp264.c b/hw/alpha/dp264.c
index f1267b5441..d6431fd586 100644
--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -88,7 +88,7 @@ static void clipper_init(MachineState *machine)
     pci_vga_init(pci_bus);
 
     /* Serial code setup.  */
-    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+    serial_hds_isa_init(isa_bus, 0, MAX_SERIAL_PORTS);
 
     /* Network setup.  e1000 is good enough, failing Tulip support.  */
     for (i = 0; i < nb_nics; i++) {
diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c
index 883db13f96..f50f5cf186 100644
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -376,7 +376,7 @@ static void cchip_write(void *opaque, hwaddr addr,
         break;
     case 0x0240: /* DIM1 */
         /* DIM: Device Interrupt Mask Register, CPU1.  */
-        s->cchip.dim[0] = val;
+        s->cchip.dim[1] = val;
         cpu_irq_change(s->cchip.cpu[1], val & s->cchip.drir);
         break;
 
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index d98cee32fe..dbe6f819ab 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -29,5 +29,5 @@ obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp.o xlnx-ep108.o
 obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
 obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
 obj-$(CONFIG_FSL_IMX6) += fsl-imx6.o sabrelite.o
-obj-$(CONFIG_ASPEED_SOC) += ast2400.o palmetto-bmc.o
+obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o
 endif
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
new file mode 100644
index 0000000000..c7206fda6d
--- /dev/null
+++ b/hw/arm/aspeed.c
@@ -0,0 +1,197 @@
+/*
+ * OpenPOWER Palmetto BMC
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/arm.h"
+#include "hw/arm/aspeed_soc.h"
+#include "hw/boards.h"
+#include "qemu/log.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/blockdev.h"
+
+static struct arm_boot_info aspeed_board_binfo = {
+    .board_id = -1, /* device-tree-only board */
+    .nb_cpus = 1,
+};
+
+typedef struct AspeedBoardState {
+    AspeedSoCState soc;
+    MemoryRegion ram;
+} AspeedBoardState;
+
+typedef struct AspeedBoardConfig {
+    const char *soc_name;
+    uint32_t hw_strap1;
+} AspeedBoardConfig;
+
+enum {
+    PALMETTO_BMC,
+    AST2500_EVB,
+};
+
+#define PALMETTO_BMC_HW_STRAP1 (                                        \
+        SCU_AST2400_HW_STRAP_DRAM_SIZE(DRAM_SIZE_256MB) |               \
+        SCU_AST2400_HW_STRAP_DRAM_CONFIG(2 /* DDR3 with CL=6, CWL=5 */) | \
+        SCU_AST2400_HW_STRAP_ACPI_DIS |                                 \
+        SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(AST2400_CLK_48M_IN) |       \
+        SCU_HW_STRAP_VGA_CLASS_CODE |                                   \
+        SCU_HW_STRAP_LPC_RESET_PIN |                                    \
+        SCU_HW_STRAP_SPI_MODE(SCU_HW_STRAP_SPI_M_S_EN) |                \
+        SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(AST2400_CPU_AHB_RATIO_2_1) | \
+        SCU_HW_STRAP_SPI_WIDTH |                                        \
+        SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) |                       \
+        SCU_AST2400_HW_STRAP_BOOT_MODE(AST2400_SPI_BOOT))
+
+#define AST2500_EVB_HW_STRAP1 ((                                        \
+        AST2500_HW_STRAP1_DEFAULTS |                                    \
+        SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE |                     \
+        SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE |                        \
+        SCU_AST2500_HW_STRAP_UART_DEBUG |                               \
+        SCU_AST2500_HW_STRAP_DDR4_ENABLE |                              \
+        SCU_HW_STRAP_MAC1_RGMII |                                       \
+        SCU_HW_STRAP_MAC0_RGMII) &                                      \
+        ~SCU_HW_STRAP_2ND_BOOT_WDT)
+
+static const AspeedBoardConfig aspeed_boards[] = {
+    [PALMETTO_BMC] = { "ast2400-a0", PALMETTO_BMC_HW_STRAP1 },
+    [AST2500_EVB]  = { "ast2500-a1", AST2500_EVB_HW_STRAP1 },
+};
+
+static void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
+                                      Error **errp)
+{
+    int i ;
+
+    for (i = 0; i < s->num_cs; ++i) {
+        AspeedSMCFlash *fl = &s->flashes[i];
+        DriveInfo *dinfo = drive_get_next(IF_MTD);
+        qemu_irq cs_line;
+
+        /*
+         * FIXME: check that we are not using a flash module exceeding
+         * the controller segment size
+         */
+        fl->flash = ssi_create_slave_no_init(s->spi, flashtype);
+        if (dinfo) {
+            qdev_prop_set_drive(fl->flash, "drive", blk_by_legacy_dinfo(dinfo),
+                                errp);
+        }
+        qdev_init_nofail(fl->flash);
+
+        cs_line = qdev_get_gpio_in_named(fl->flash, SSI_GPIO_CS, 0);
+        sysbus_connect_irq(SYS_BUS_DEVICE(s), i + 1, cs_line);
+    }
+}
+
+static void aspeed_board_init(MachineState *machine,
+                              const AspeedBoardConfig *cfg)
+{
+    AspeedBoardState *bmc;
+    AspeedSoCClass *sc;
+
+    bmc = g_new0(AspeedBoardState, 1);
+    object_initialize(&bmc->soc, (sizeof(bmc->soc)), cfg->soc_name);
+    object_property_add_child(OBJECT(machine), "soc", OBJECT(&bmc->soc),
+                              &error_abort);
+
+    sc = ASPEED_SOC_GET_CLASS(&bmc->soc);
+
+    object_property_set_int(OBJECT(&bmc->soc), ram_size, "ram-size",
+                           &error_abort);
+    object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap1, "hw-strap1",
+                            &error_abort);
+    object_property_set_bool(OBJECT(&bmc->soc), true, "realized",
+                             &error_abort);
+
+    /*
+     * Allocate RAM after the memory controller has checked the size
+     * was valid. If not, a default value is used.
+     */
+    ram_size = object_property_get_int(OBJECT(&bmc->soc), "ram-size",
+                                       &error_abort);
+
+    memory_region_allocate_system_memory(&bmc->ram, NULL, "ram", ram_size);
+    memory_region_add_subregion(get_system_memory(), sc->info->sdram_base,
+                                &bmc->ram);
+    object_property_add_const_link(OBJECT(&bmc->soc), "ram", OBJECT(&bmc->ram),
+                                   &error_abort);
+
+    aspeed_board_init_flashes(&bmc->soc.fmc, "n25q256a", &error_abort);
+    aspeed_board_init_flashes(&bmc->soc.spi[0], "mx25l25635e", &error_abort);
+
+    aspeed_board_binfo.kernel_filename = machine->kernel_filename;
+    aspeed_board_binfo.initrd_filename = machine->initrd_filename;
+    aspeed_board_binfo.kernel_cmdline = machine->kernel_cmdline;
+    aspeed_board_binfo.ram_size = ram_size;
+    aspeed_board_binfo.loader_start = sc->info->sdram_base;
+
+    arm_load_kernel(ARM_CPU(first_cpu), &aspeed_board_binfo);
+}
+
+static void palmetto_bmc_init(MachineState *machine)
+{
+    aspeed_board_init(machine, &aspeed_boards[PALMETTO_BMC]);
+}
+
+static void palmetto_bmc_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "OpenPOWER Palmetto BMC (ARM926EJ-S)";
+    mc->init = palmetto_bmc_init;
+    mc->max_cpus = 1;
+    mc->no_sdcard = 1;
+    mc->no_floppy = 1;
+    mc->no_cdrom = 1;
+    mc->no_parallel = 1;
+}
+
+static const TypeInfo palmetto_bmc_type = {
+    .name = MACHINE_TYPE_NAME("palmetto-bmc"),
+    .parent = TYPE_MACHINE,
+    .class_init = palmetto_bmc_class_init,
+};
+
+static void ast2500_evb_init(MachineState *machine)
+{
+    aspeed_board_init(machine, &aspeed_boards[AST2500_EVB]);
+}
+
+static void ast2500_evb_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "Aspeed AST2500 EVB (ARM1176)";
+    mc->init = ast2500_evb_init;
+    mc->max_cpus = 1;
+    mc->no_sdcard = 1;
+    mc->no_floppy = 1;
+    mc->no_cdrom = 1;
+    mc->no_parallel = 1;
+}
+
+static const TypeInfo ast2500_evb_type = {
+    .name = MACHINE_TYPE_NAME("ast2500-evb"),
+    .parent = TYPE_MACHINE,
+    .class_init = ast2500_evb_class_init,
+};
+
+static void aspeed_machine_init(void)
+{
+    type_register_static(&palmetto_bmc_type);
+    type_register_static(&ast2500_evb_type);
+}
+
+type_init(aspeed_machine_init)
diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
new file mode 100644
index 0000000000..e14f5c217e
--- /dev/null
+++ b/hw/arm/aspeed_soc.c
@@ -0,0 +1,276 @@
+/*
+ * ASPEED SoC family
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ * Jeremy Kerr <jk@ozlabs.org>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/aspeed_soc.h"
+#include "hw/char/serial.h"
+#include "qemu/log.h"
+#include "hw/i2c/aspeed_i2c.h"
+
+#define ASPEED_SOC_UART_5_BASE      0x00184000
+#define ASPEED_SOC_IOMEM_SIZE       0x00200000
+#define ASPEED_SOC_IOMEM_BASE       0x1E600000
+#define ASPEED_SOC_FMC_BASE         0x1E620000
+#define ASPEED_SOC_SPI_BASE         0x1E630000
+#define ASPEED_SOC_SPI2_BASE        0x1E631000
+#define ASPEED_SOC_VIC_BASE         0x1E6C0000
+#define ASPEED_SOC_SDMC_BASE        0x1E6E0000
+#define ASPEED_SOC_SCU_BASE         0x1E6E2000
+#define ASPEED_SOC_TIMER_BASE       0x1E782000
+#define ASPEED_SOC_I2C_BASE         0x1E78A000
+
+static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
+static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
+
+#define AST2400_SDRAM_BASE       0x40000000
+#define AST2500_SDRAM_BASE       0x80000000
+
+static const hwaddr aspeed_soc_ast2400_spi_bases[] = { ASPEED_SOC_SPI_BASE };
+static const char *aspeed_soc_ast2400_typenames[] = { "aspeed.smc.spi" };
+
+static const hwaddr aspeed_soc_ast2500_spi_bases[] = { ASPEED_SOC_SPI_BASE,
+                                                       ASPEED_SOC_SPI2_BASE};
+static const char *aspeed_soc_ast2500_typenames[] = {
+    "aspeed.smc.ast2500-spi1", "aspeed.smc.ast2500-spi2" };
+
+static const AspeedSoCInfo aspeed_socs[] = {
+    { "ast2400-a0", "arm926", AST2400_A0_SILICON_REV, AST2400_SDRAM_BASE,
+      1, aspeed_soc_ast2400_spi_bases,
+      "aspeed.smc.fmc", aspeed_soc_ast2400_typenames },
+    { "ast2400",    "arm926", AST2400_A0_SILICON_REV, AST2400_SDRAM_BASE,
+      1, aspeed_soc_ast2400_spi_bases,
+     "aspeed.smc.fmc", aspeed_soc_ast2400_typenames },
+    { "ast2500-a1", "arm1176", AST2500_A1_SILICON_REV, AST2500_SDRAM_BASE,
+      2, aspeed_soc_ast2500_spi_bases,
+      "aspeed.smc.ast2500-fmc", aspeed_soc_ast2500_typenames },
+};
+
+/*
+ * IO handlers: simply catch any reads/writes to IO addresses that aren't
+ * handled by a device mapping.
+ */
+
+static uint64_t aspeed_soc_io_read(void *p, hwaddr offset, unsigned size)
+{
+    qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
+                  __func__, offset, size);
+    return 0;
+}
+
+static void aspeed_soc_io_write(void *opaque, hwaddr offset, uint64_t value,
+                unsigned size)
+{
+    qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n",
+                  __func__, offset, value, size);
+}
+
+static const MemoryRegionOps aspeed_soc_io_ops = {
+    .read = aspeed_soc_io_read,
+    .write = aspeed_soc_io_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void aspeed_soc_init(Object *obj)
+{
+    AspeedSoCState *s = ASPEED_SOC(obj);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    int i;
+
+    s->cpu = cpu_arm_init(sc->info->cpu_model);
+
+    object_initialize(&s->vic, sizeof(s->vic), TYPE_ASPEED_VIC);
+    object_property_add_child(obj, "vic", OBJECT(&s->vic), NULL);
+    qdev_set_parent_bus(DEVICE(&s->vic), sysbus_get_default());
+
+    object_initialize(&s->timerctrl, sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
+    object_property_add_child(obj, "timerctrl", OBJECT(&s->timerctrl), NULL);
+    qdev_set_parent_bus(DEVICE(&s->timerctrl), sysbus_get_default());
+
+    object_initialize(&s->i2c, sizeof(s->i2c), TYPE_ASPEED_I2C);
+    object_property_add_child(obj, "i2c", OBJECT(&s->i2c), NULL);
+    qdev_set_parent_bus(DEVICE(&s->i2c), sysbus_get_default());
+
+    object_initialize(&s->scu, sizeof(s->scu), TYPE_ASPEED_SCU);
+    object_property_add_child(obj, "scu", OBJECT(&s->scu), NULL);
+    qdev_set_parent_bus(DEVICE(&s->scu), sysbus_get_default());
+    qdev_prop_set_uint32(DEVICE(&s->scu), "silicon-rev",
+                         sc->info->silicon_rev);
+    object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu),
+                              "hw-strap1", &error_abort);
+    object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scu),
+                              "hw-strap2", &error_abort);
+
+    object_initialize(&s->fmc, sizeof(s->fmc), sc->info->fmc_typename);
+    object_property_add_child(obj, "fmc", OBJECT(&s->fmc), NULL);
+    qdev_set_parent_bus(DEVICE(&s->fmc), sysbus_get_default());
+
+    for (i = 0; i < sc->info->spis_num; i++) {
+        object_initialize(&s->spi[i], sizeof(s->spi[i]),
+                          sc->info->spi_typename[i]);
+        object_property_add_child(obj, "spi", OBJECT(&s->spi[i]), NULL);
+        qdev_set_parent_bus(DEVICE(&s->spi[i]), sysbus_get_default());
+    }
+
+    object_initialize(&s->sdmc, sizeof(s->sdmc), TYPE_ASPEED_SDMC);
+    object_property_add_child(obj, "sdmc", OBJECT(&s->sdmc), NULL);
+    qdev_set_parent_bus(DEVICE(&s->sdmc), sysbus_get_default());
+    qdev_prop_set_uint32(DEVICE(&s->sdmc), "silicon-rev",
+                         sc->info->silicon_rev);
+    object_property_add_alias(obj, "ram-size", OBJECT(&s->sdmc),
+                              "ram-size", &error_abort);
+}
+
+static void aspeed_soc_realize(DeviceState *dev, Error **errp)
+{
+    int i;
+    AspeedSoCState *s = ASPEED_SOC(dev);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    Error *err = NULL, *local_err = NULL;
+
+    /* IO space */
+    memory_region_init_io(&s->iomem, NULL, &aspeed_soc_io_ops, NULL,
+            "aspeed_soc.io", ASPEED_SOC_IOMEM_SIZE);
+    memory_region_add_subregion_overlap(get_system_memory(),
+                                        ASPEED_SOC_IOMEM_BASE, &s->iomem, -1);
+
+    /* VIC */
+    object_property_set_bool(OBJECT(&s->vic), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->vic), 0, ASPEED_SOC_VIC_BASE);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 0,
+                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 1,
+                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_FIQ));
+
+    /* Timer */
+    object_property_set_bool(OBJECT(&s->timerctrl), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->timerctrl), 0, ASPEED_SOC_TIMER_BASE);
+    for (i = 0; i < ARRAY_SIZE(timer_irqs); i++) {
+        qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->vic), timer_irqs[i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
+    }
+
+    /* SCU */
+    object_property_set_bool(OBJECT(&s->scu), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->scu), 0, ASPEED_SOC_SCU_BASE);
+
+    /* UART - attach an 8250 to the IO space as our UART5 */
+    if (serial_hds[0]) {
+        qemu_irq uart5 = qdev_get_gpio_in(DEVICE(&s->vic), uart_irqs[4]);
+        serial_mm_init(&s->iomem, ASPEED_SOC_UART_5_BASE, 2,
+                       uart5, 38400, serial_hds[0], DEVICE_LITTLE_ENDIAN);
+    }
+
+    /* I2C */
+    object_property_set_bool(OBJECT(&s->i2c), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c), 0, ASPEED_SOC_I2C_BASE);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c), 0,
+                       qdev_get_gpio_in(DEVICE(&s->vic), 12));
+
+    /* FMC */
+    object_property_set_int(OBJECT(&s->fmc), 1, "num-cs", &err);
+    object_property_set_bool(OBJECT(&s->fmc), true, "realized", &local_err);
+    error_propagate(&err, local_err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 0, ASPEED_SOC_FMC_BASE);
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->fmc), 1,
+                    s->fmc.ctrl->flash_window_base);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
+                       qdev_get_gpio_in(DEVICE(&s->vic), 19));
+
+    /* SPI */
+    for (i = 0; i < sc->info->spis_num; i++) {
+        object_property_set_int(OBJECT(&s->spi[i]), 1, "num-cs", &err);
+        object_property_set_bool(OBJECT(&s->spi[i]), true, "realized",
+                                 &local_err);
+        error_propagate(&err, local_err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 0, sc->info->spi_bases[i]);
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi[i]), 1,
+                        s->spi[i].ctrl->flash_window_base);
+    }
+
+    /* SDMC - SDRAM Memory Controller */
+    object_property_set_bool(OBJECT(&s->sdmc), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(&s->sdmc), 0, ASPEED_SOC_SDMC_BASE);
+}
+
+static void aspeed_soc_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    AspeedSoCClass *sc = ASPEED_SOC_CLASS(oc);
+
+    sc->info = (AspeedSoCInfo *) data;
+    dc->realize = aspeed_soc_realize;
+
+    /*
+     * Reason: creates an ARM CPU, thus use after free(), see
+     * arm_cpu_class_init()
+     */
+    dc->cannot_destroy_with_object_finalize_yet = true;
+}
+
+static const TypeInfo aspeed_soc_type_info = {
+    .name           = TYPE_ASPEED_SOC,
+    .parent         = TYPE_DEVICE,
+    .instance_init  = aspeed_soc_init,
+    .instance_size  = sizeof(AspeedSoCState),
+    .class_size     = sizeof(AspeedSoCClass),
+    .abstract       = true,
+};
+
+static void aspeed_soc_register_types(void)
+{
+    int i;
+
+    type_register_static(&aspeed_soc_type_info);
+    for (i = 0; i < ARRAY_SIZE(aspeed_socs); ++i) {
+        TypeInfo ti = {
+            .name       = aspeed_socs[i].name,
+            .parent     = TYPE_ASPEED_SOC,
+            .class_init = aspeed_soc_class_init,
+            .class_data = (void *) &aspeed_socs[i],
+        };
+        type_register(&ti);
+    }
+}
+
+type_init(aspeed_soc_register_types)
diff --git a/hw/arm/ast2400.c b/hw/arm/ast2400.c
deleted file mode 100644
index 326fdb36ee..0000000000
--- a/hw/arm/ast2400.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * AST2400 SoC
- *
- * Andrew Jeffery <andrew@aj.id.au>
- * Jeremy Kerr <jk@ozlabs.org>
- *
- * Copyright 2016 IBM Corp.
- *
- * This code is licensed under the GPL version 2 or later.  See
- * the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
-#include "hw/arm/ast2400.h"
-#include "hw/char/serial.h"
-#include "qemu/log.h"
-#include "hw/i2c/aspeed_i2c.h"
-
-#define AST2400_UART_5_BASE      0x00184000
-#define AST2400_IOMEM_SIZE       0x00200000
-#define AST2400_IOMEM_BASE       0x1E600000
-#define AST2400_SMC_BASE         AST2400_IOMEM_BASE /* Legacy SMC */
-#define AST2400_FMC_BASE         0X1E620000
-#define AST2400_SPI_BASE         0X1E630000
-#define AST2400_VIC_BASE         0x1E6C0000
-#define AST2400_SCU_BASE         0x1E6E2000
-#define AST2400_TIMER_BASE       0x1E782000
-#define AST2400_I2C_BASE         0x1E78A000
-
-#define AST2400_FMC_FLASH_BASE   0x20000000
-#define AST2400_SPI_FLASH_BASE   0x30000000
-
-static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
-static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
-
-/*
- * IO handlers: simply catch any reads/writes to IO addresses that aren't
- * handled by a device mapping.
- */
-
-static uint64_t ast2400_io_read(void *p, hwaddr offset, unsigned size)
-{
-    qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
-                  __func__, offset, size);
-    return 0;
-}
-
-static void ast2400_io_write(void *opaque, hwaddr offset, uint64_t value,
-                unsigned size)
-{
-    qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n",
-                  __func__, offset, value, size);
-}
-
-static const MemoryRegionOps ast2400_io_ops = {
-    .read = ast2400_io_read,
-    .write = ast2400_io_write,
-    .endianness = DEVICE_LITTLE_ENDIAN,
-};
-
-static void ast2400_init(Object *obj)
-{
-    AST2400State *s = AST2400(obj);
-
-    s->cpu = cpu_arm_init("arm926");
-
-    object_initialize(&s->vic, sizeof(s->vic), TYPE_ASPEED_VIC);
-    object_property_add_child(obj, "vic", OBJECT(&s->vic), NULL);
-    qdev_set_parent_bus(DEVICE(&s->vic), sysbus_get_default());
-
-    object_initialize(&s->timerctrl, sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
-    object_property_add_child(obj, "timerctrl", OBJECT(&s->timerctrl), NULL);
-    qdev_set_parent_bus(DEVICE(&s->timerctrl), sysbus_get_default());
-
-    object_initialize(&s->i2c, sizeof(s->i2c), TYPE_ASPEED_I2C);
-    object_property_add_child(obj, "i2c", OBJECT(&s->i2c), NULL);
-    qdev_set_parent_bus(DEVICE(&s->i2c), sysbus_get_default());
-
-    object_initialize(&s->scu, sizeof(s->scu), TYPE_ASPEED_SCU);
-    object_property_add_child(obj, "scu", OBJECT(&s->scu), NULL);
-    qdev_set_parent_bus(DEVICE(&s->scu), sysbus_get_default());
-    qdev_prop_set_uint32(DEVICE(&s->scu), "silicon-rev",
-                         AST2400_A0_SILICON_REV);
-    object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu),
-                              "hw-strap1", &error_abort);
-    object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scu),
-                              "hw-strap2", &error_abort);
-
-    object_initialize(&s->smc, sizeof(s->smc), "aspeed.smc.fmc");
-    object_property_add_child(obj, "smc", OBJECT(&s->smc), NULL);
-    qdev_set_parent_bus(DEVICE(&s->smc), sysbus_get_default());
-
-    object_initialize(&s->spi, sizeof(s->spi), "aspeed.smc.spi");
-    object_property_add_child(obj, "spi", OBJECT(&s->spi), NULL);
-    qdev_set_parent_bus(DEVICE(&s->spi), sysbus_get_default());
-}
-
-static void ast2400_realize(DeviceState *dev, Error **errp)
-{
-    int i;
-    AST2400State *s = AST2400(dev);
-    Error *err = NULL, *local_err = NULL;
-
-    /* IO space */
-    memory_region_init_io(&s->iomem, NULL, &ast2400_io_ops, NULL,
-            "ast2400.io", AST2400_IOMEM_SIZE);
-    memory_region_add_subregion_overlap(get_system_memory(), AST2400_IOMEM_BASE,
-            &s->iomem, -1);
-
-    /* VIC */
-    object_property_set_bool(OBJECT(&s->vic), true, "realized", &err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->vic), 0, AST2400_VIC_BASE);
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 0,
-                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->vic), 1,
-                       qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_FIQ));
-
-    /* Timer */
-    object_property_set_bool(OBJECT(&s->timerctrl), true, "realized", &err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->timerctrl), 0, AST2400_TIMER_BASE);
-    for (i = 0; i < ARRAY_SIZE(timer_irqs); i++) {
-        qemu_irq irq = qdev_get_gpio_in(DEVICE(&s->vic), timer_irqs[i]);
-        sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
-    }
-
-    /* SCU */
-    object_property_set_bool(OBJECT(&s->scu), true, "realized", &err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->scu), 0, AST2400_SCU_BASE);
-
-    /* UART - attach an 8250 to the IO space as our UART5 */
-    if (serial_hds[0]) {
-        qemu_irq uart5 = qdev_get_gpio_in(DEVICE(&s->vic), uart_irqs[4]);
-        serial_mm_init(&s->iomem, AST2400_UART_5_BASE, 2,
-                       uart5, 38400, serial_hds[0], DEVICE_LITTLE_ENDIAN);
-    }
-
-    /* I2C */
-    object_property_set_bool(OBJECT(&s->i2c), true, "realized", &err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->i2c), 0, AST2400_I2C_BASE);
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c), 0,
-                       qdev_get_gpio_in(DEVICE(&s->vic), 12));
-
-    /* SMC */
-    object_property_set_int(OBJECT(&s->smc), 1, "num-cs", &err);
-    object_property_set_bool(OBJECT(&s->smc), true, "realized", &local_err);
-    error_propagate(&err, local_err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->smc), 0, AST2400_FMC_BASE);
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->smc), 1, AST2400_FMC_FLASH_BASE);
-    sysbus_connect_irq(SYS_BUS_DEVICE(&s->smc), 0,
-                       qdev_get_gpio_in(DEVICE(&s->vic), 19));
-
-    /* SPI */
-    object_property_set_int(OBJECT(&s->spi), 1, "num-cs", &err);
-    object_property_set_bool(OBJECT(&s->spi), true, "realized", &local_err);
-    error_propagate(&err, local_err);
-    if (err) {
-        error_propagate(errp, err);
-        return;
-    }
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi), 0, AST2400_SPI_BASE);
-    sysbus_mmio_map(SYS_BUS_DEVICE(&s->spi), 1, AST2400_SPI_FLASH_BASE);
-}
-
-static void ast2400_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-
-    dc->realize = ast2400_realize;
-
-    /*
-     * Reason: creates an ARM CPU, thus use after free(), see
-     * arm_cpu_class_init()
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
-}
-
-static const TypeInfo ast2400_type_info = {
-    .name = TYPE_AST2400,
-    .parent = TYPE_SYS_BUS_DEVICE,
-    .instance_size = sizeof(AST2400State),
-    .instance_init = ast2400_init,
-    .class_init = ast2400_class_init,
-};
-
-static void ast2400_register_types(void)
-{
-    type_register_static(&ast2400_type_info);
-}
-
-type_init(ast2400_register_types)
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 1b913a43ca..ff621e4b6a 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -9,6 +9,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include <libfdt.h>
 #include "hw/hw.h"
 #include "hw/arm/arm.h"
 #include "hw/arm/linux-boot-if.h"
@@ -486,6 +487,17 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
             g_free(nodename);
         }
     } else {
+        Error *err = NULL;
+
+        rc = fdt_path_offset(fdt, "/memory");
+        if (rc < 0) {
+            qemu_fdt_add_subnode(fdt, "/memory");
+        }
+
+        if (!qemu_fdt_getprop(fdt, "/memory", "device_type", NULL, &err)) {
+            qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
+        }
+
         rc = qemu_fdt_setprop_sized_cells(fdt, "/memory", "reg",
                                           acells, binfo->loader_start,
                                           scells, binfo->ram_size);
@@ -495,6 +507,11 @@ static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
         }
     }
 
+    rc = fdt_path_offset(fdt, "/chosen");
+    if (rc < 0) {
+        qemu_fdt_add_subnode(fdt, "/chosen");
+    }
+
     if (binfo->kernel_cmdline && *binfo->kernel_cmdline) {
         rc = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
                                      binfo->kernel_cmdline);
@@ -773,6 +790,8 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
      */
     assert(!(info->secure_board_setup && kvm_enabled()));
 
+    info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
+
     /* Load the kernel.  */
     if (!info->kernel_filename || info->firmware_loaded) {
 
@@ -833,8 +852,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data)
         elf_machine = EM_ARM;
     }
 
-    info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb");
-
     if (!info->secondary_cpu_reset_hook) {
         info->secondary_cpu_reset_hook = default_reset_secondary;
     }
diff --git a/hw/arm/cubieboard.c b/hw/arm/cubieboard.c
index fbd78ed01c..dd19ba3c99 100644
--- a/hw/arm/cubieboard.c
+++ b/hw/arm/cubieboard.c
@@ -74,6 +74,7 @@ static void cubieboard_init(MachineState *machine)
     cubieboard_binfo.ram_size = machine->ram_size;
     cubieboard_binfo.kernel_filename = machine->kernel_filename;
     cubieboard_binfo.kernel_cmdline = machine->kernel_cmdline;
+    cubieboard_binfo.initrd_filename = machine->initrd_filename;
     arm_load_kernel(&s->a10->cpu, &cubieboard_binfo);
 }
 
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index b4e358db65..7bb7be76b6 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -125,7 +125,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp)
             if (!chr) {
                 char label[20];
                 snprintf(label, sizeof(label), "imx31.uart%d", i);
-                chr = qemu_chr_new(label, "null", NULL);
+                chr = qemu_chr_new(label, "null");
             }
 
             qdev_prop_set_chr(DEVICE(&s->uart[i]), "chardev", chr);
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index fe204ace62..f23672b222 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -114,7 +114,7 @@ static void fsl_imx31_realize(DeviceState *dev, Error **errp)
             if (!chr) {
                 char label[20];
                 snprintf(label, sizeof(label), "imx31.uart%d", i);
-                chr = qemu_chr_new(label, "null", NULL);
+                chr = qemu_chr_new(label, "null");
             }
 
             qdev_prop_set_chr(DEVICE(&s->uart[i]), "chardev", chr);
diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c
index 6a1bf263a5..e93532fb57 100644
--- a/hw/arm/fsl-imx6.c
+++ b/hw/arm/fsl-imx6.c
@@ -193,7 +193,7 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp)
 
             if (!chr) {
                 char *label = g_strdup_printf("imx6.uart%d", i + 1);
-                chr = qemu_chr_new(label, "null", NULL);
+                chr = qemu_chr_new(label, "null");
                 g_free(label);
                 serial_hds[i] = chr;
             }
diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index 96dc150025..039812a3fd 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -252,6 +252,26 @@ static void integratorcm_init(Object *obj)
     /* ??? What should the high bits of this value be?  */
     s->cm_auxosc = 0x0007feff;
     s->cm_sdram = 0x00011122;
+    memcpy(integrator_spd + 73, "QEMU-MEMORY", 11);
+    s->cm_init = 0x00000112;
+    s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
+                                   1000);
+    memory_region_init_ram(&s->flash, obj, "integrator.flash", 0x100000,
+                           &error_fatal);
+    vmstate_register_ram_global(&s->flash);
+
+    memory_region_init_io(&s->iomem, obj, &integratorcm_ops, s,
+                          "integratorcm", 0x00800000);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    integratorcm_do_remap(s);
+    /* ??? Save/restore.  */
+}
+
+static void integratorcm_realize(DeviceState *d, Error **errp)
+{
+    IntegratorCMState *s = INTEGRATOR_CM(d);
+
     if (s->memsz >= 256) {
         integrator_spd[31] = 64;
         s->cm_sdram |= 0x10;
@@ -267,20 +287,6 @@ static void integratorcm_init(Object *obj)
     } else {
         integrator_spd[31] = 2;
     }
-    memcpy(integrator_spd + 73, "QEMU-MEMORY", 11);
-    s->cm_init = 0x00000112;
-    s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
-                                   1000);
-    memory_region_init_ram(&s->flash, obj, "integrator.flash", 0x100000,
-                           &error_fatal);
-    vmstate_register_ram_global(&s->flash);
-
-    memory_region_init_io(&s->iomem, obj, &integratorcm_ops, s,
-                          "integratorcm", 0x00800000);
-    sysbus_init_mmio(dev, &s->iomem);
-
-    integratorcm_do_remap(s);
-    /* ??? Save/restore.  */
 }
 
 /* Integrator/CP hardware emulation.  */
@@ -633,6 +639,7 @@ static void core_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->props = core_properties;
+    dc->realize = integratorcm_realize;
 }
 
 static const TypeInfo core_info = {
diff --git a/hw/arm/mainstone.c b/hw/arm/mainstone.c
index 454acc5d2b..f962236cf4 100644
--- a/hw/arm/mainstone.c
+++ b/hw/arm/mainstone.c
@@ -73,8 +73,10 @@ static const struct keymap map[0xE0] = {
     [0x2f] = {3,3}, /* v */
     [0x11] = {3,4}, /* w */
     [0x2d] = {3,5}, /* x */
+    [0x34] = {4,0}, /* . */
     [0x15] = {4,2}, /* y */
     [0x2c] = {4,3}, /* z */
+    [0x35] = {4,4}, /* / */
     [0xc7] = {5,0}, /* Home */
     [0x2a] = {5,1}, /* shift */
     /*
@@ -88,7 +90,8 @@ static const struct keymap map[0xE0] = {
      * Matrix position {5,4} and other keys are missing here.
      * TODO: Compare with Linux code and test real hardware.
      */
-    [0x1c] = {5,5}, /* enter (TODO: might be wrong) */
+    [0x1c] = {5,4}, /* enter */
+    [0x0e] = {5,5}, /* backspace */
     [0xc8] = {6,0}, /* up */
     [0xd0] = {6,1}, /* down */
     [0xcb] = {6,2}, /* left */
diff --git a/hw/arm/musicpal.c b/hw/arm/musicpal.c
index cc50ace13d..cbbca4e17a 100644
--- a/hw/arm/musicpal.c
+++ b/hw/arm/musicpal.c
@@ -384,18 +384,24 @@ static NetClientInfo net_mv88w8618_info = {
     .cleanup = eth_cleanup,
 };
 
-static int mv88w8618_eth_init(SysBusDevice *sbd)
+static void mv88w8618_eth_init(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     mv88w8618_eth_state *s = MV88W8618_ETH(dev);
 
     sysbus_init_irq(sbd, &s->irq);
-    s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf,
-                          object_get_typename(OBJECT(dev)), dev->id, s);
-    memory_region_init_io(&s->iomem, OBJECT(s), &mv88w8618_eth_ops, s,
+    memory_region_init_io(&s->iomem, obj, &mv88w8618_eth_ops, s,
                           "mv88w8618-eth", MP_ETH_SIZE);
     sysbus_init_mmio(sbd, &s->iomem);
-    return 0;
+}
+
+static void mv88w8618_eth_realize(DeviceState *dev, Error **errp)
+{
+    mv88w8618_eth_state *s = MV88W8618_ETH(dev);
+
+    s->nic = qemu_new_nic(&net_mv88w8618_info, &s->conf,
+                          object_get_typename(OBJECT(dev)), dev->id, s);
 }
 
 static const VMStateDescription mv88w8618_eth_vmsd = {
@@ -423,17 +429,17 @@ static Property mv88w8618_eth_properties[] = {
 static void mv88w8618_eth_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = mv88w8618_eth_init;
     dc->vmsd = &mv88w8618_eth_vmsd;
     dc->props = mv88w8618_eth_properties;
+    dc->realize = mv88w8618_eth_realize;
 }
 
 static const TypeInfo mv88w8618_eth_info = {
     .name          = TYPE_MV88W8618_ETH,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(mv88w8618_eth_state),
+    .instance_init = mv88w8618_eth_init,
     .class_init    = mv88w8618_eth_class_init,
 };
 
@@ -615,23 +621,26 @@ static const GraphicHwOps musicpal_gfx_ops = {
     .gfx_update  = lcd_refresh,
 };
 
-static int musicpal_lcd_init(SysBusDevice *sbd)
+static void musicpal_lcd_realize(DeviceState *dev, Error **errp)
+{
+    musicpal_lcd_state *s = MUSICPAL_LCD(dev);
+    s->con = graphic_console_init(dev, 0, &musicpal_gfx_ops, s);
+    qemu_console_resize(s->con, 128 * 3, 64 * 3);
+}
+
+static void musicpal_lcd_init(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     musicpal_lcd_state *s = MUSICPAL_LCD(dev);
 
     s->brightness = 7;
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &musicpal_lcd_ops, s,
+    memory_region_init_io(&s->iomem, obj, &musicpal_lcd_ops, s,
                           "musicpal-lcd", MP_LCD_SIZE);
     sysbus_init_mmio(sbd, &s->iomem);
 
-    s->con = graphic_console_init(dev, 0, &musicpal_gfx_ops, s);
-    qemu_console_resize(s->con, 128*3, 64*3);
-
     qdev_init_gpio_in(dev, musicpal_lcd_gpio_brightness_in, 3);
-
-    return 0;
 }
 
 static const VMStateDescription musicpal_lcd_vmsd = {
@@ -652,16 +661,16 @@ static const VMStateDescription musicpal_lcd_vmsd = {
 static void musicpal_lcd_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = musicpal_lcd_init;
     dc->vmsd = &musicpal_lcd_vmsd;
+    dc->realize = musicpal_lcd_realize;
 }
 
 static const TypeInfo musicpal_lcd_info = {
     .name          = TYPE_MUSICPAL_LCD,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(musicpal_lcd_state),
+    .instance_init = musicpal_lcd_init,
     .class_init    = musicpal_lcd_class_init,
 };
 
@@ -748,16 +757,16 @@ static const MemoryRegionOps mv88w8618_pic_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int mv88w8618_pic_init(SysBusDevice *dev)
+static void mv88w8618_pic_init(Object *obj)
 {
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
     mv88w8618_pic_state *s = MV88W8618_PIC(dev);
 
     qdev_init_gpio_in(DEVICE(dev), mv88w8618_pic_set_irq, 32);
     sysbus_init_irq(dev, &s->parent_irq);
-    memory_region_init_io(&s->iomem, OBJECT(s), &mv88w8618_pic_ops, s,
+    memory_region_init_io(&s->iomem, obj, &mv88w8618_pic_ops, s,
                           "musicpal-pic", MP_PIC_SIZE);
     sysbus_init_mmio(dev, &s->iomem);
-    return 0;
 }
 
 static const VMStateDescription mv88w8618_pic_vmsd = {
@@ -774,9 +783,7 @@ static const VMStateDescription mv88w8618_pic_vmsd = {
 static void mv88w8618_pic_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = mv88w8618_pic_init;
     dc->reset = mv88w8618_pic_reset;
     dc->vmsd = &mv88w8618_pic_vmsd;
 }
@@ -785,6 +792,7 @@ static const TypeInfo mv88w8618_pic_info = {
     .name          = TYPE_MV88W8618_PIC,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(mv88w8618_pic_state),
+    .instance_init = mv88w8618_pic_init,
     .class_init    = mv88w8618_pic_class_init,
 };
 
@@ -837,7 +845,7 @@ static void mv88w8618_timer_init(SysBusDevice *dev, mv88w8618_timer_state *s,
     s->freq = freq;
 
     bh = qemu_bh_new(mv88w8618_timer_tick, s);
-    s->ptimer = ptimer_init(bh);
+    s->ptimer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
 }
 
 static uint64_t mv88w8618_pit_read(void *opaque, hwaddr offset,
@@ -913,8 +921,9 @@ static const MemoryRegionOps mv88w8618_pit_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int mv88w8618_pit_init(SysBusDevice *dev)
+static void mv88w8618_pit_init(Object *obj)
 {
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
     mv88w8618_pit_state *s = MV88W8618_PIT(dev);
     int i;
 
@@ -924,10 +933,9 @@ static int mv88w8618_pit_init(SysBusDevice *dev)
         mv88w8618_timer_init(dev, &s->timer[i], 1000000);
     }
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &mv88w8618_pit_ops, s,
+    memory_region_init_io(&s->iomem, obj, &mv88w8618_pit_ops, s,
                           "musicpal-pit", MP_PIT_SIZE);
     sysbus_init_mmio(dev, &s->iomem);
-    return 0;
 }
 
 static const VMStateDescription mv88w8618_timer_vmsd = {
@@ -955,9 +963,7 @@ static const VMStateDescription mv88w8618_pit_vmsd = {
 static void mv88w8618_pit_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = mv88w8618_pit_init;
     dc->reset = mv88w8618_pit_reset;
     dc->vmsd = &mv88w8618_pit_vmsd;
 }
@@ -966,6 +972,7 @@ static const TypeInfo mv88w8618_pit_info = {
     .name          = TYPE_MV88W8618_PIT,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(mv88w8618_pit_state),
+    .instance_init = mv88w8618_pit_init,
     .class_init    = mv88w8618_pit_class_init,
 };
 
@@ -1018,15 +1025,15 @@ static const MemoryRegionOps mv88w8618_flashcfg_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int mv88w8618_flashcfg_init(SysBusDevice *dev)
+static void mv88w8618_flashcfg_init(Object *obj)
 {
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
     mv88w8618_flashcfg_state *s = MV88W8618_FLASHCFG(dev);
 
     s->cfgr0 = 0xfffe4285; /* Default as set by U-Boot for 8 MB flash */
-    memory_region_init_io(&s->iomem, OBJECT(s), &mv88w8618_flashcfg_ops, s,
+    memory_region_init_io(&s->iomem, obj, &mv88w8618_flashcfg_ops, s,
                           "musicpal-flashcfg", MP_FLASHCFG_SIZE);
     sysbus_init_mmio(dev, &s->iomem);
-    return 0;
 }
 
 static const VMStateDescription mv88w8618_flashcfg_vmsd = {
@@ -1042,9 +1049,7 @@ static const VMStateDescription mv88w8618_flashcfg_vmsd = {
 static void mv88w8618_flashcfg_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = mv88w8618_flashcfg_init;
     dc->vmsd = &mv88w8618_flashcfg_vmsd;
 }
 
@@ -1052,6 +1057,7 @@ static const TypeInfo mv88w8618_flashcfg_info = {
     .name          = TYPE_MV88W8618_FLASHCFG,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(mv88w8618_flashcfg_state),
+    .instance_init = mv88w8618_flashcfg_init,
     .class_init    = mv88w8618_flashcfg_class_init,
 };
 
@@ -1350,22 +1356,21 @@ static void musicpal_gpio_reset(DeviceState *d)
     s->isr = 0;
 }
 
-static int musicpal_gpio_init(SysBusDevice *sbd)
+static void musicpal_gpio_init(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     musicpal_gpio_state *s = MUSICPAL_GPIO(dev);
 
     sysbus_init_irq(sbd, &s->irq);
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &musicpal_gpio_ops, s,
+    memory_region_init_io(&s->iomem, obj, &musicpal_gpio_ops, s,
                           "musicpal-gpio", MP_GPIO_SIZE);
     sysbus_init_mmio(sbd, &s->iomem);
 
     qdev_init_gpio_out(dev, s->out, ARRAY_SIZE(s->out));
 
     qdev_init_gpio_in(dev, musicpal_gpio_pin_event, 32);
-
-    return 0;
 }
 
 static const VMStateDescription musicpal_gpio_vmsd = {
@@ -1386,9 +1391,7 @@ static const VMStateDescription musicpal_gpio_vmsd = {
 static void musicpal_gpio_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = musicpal_gpio_init;
     dc->reset = musicpal_gpio_reset;
     dc->vmsd = &musicpal_gpio_vmsd;
 }
@@ -1397,6 +1400,7 @@ static const TypeInfo musicpal_gpio_info = {
     .name          = TYPE_MUSICPAL_GPIO,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(musicpal_gpio_state),
+    .instance_init = musicpal_gpio_init,
     .class_init    = musicpal_gpio_class_init,
 };
 
@@ -1516,12 +1520,13 @@ static void musicpal_key_event(void *opaque, int keycode)
     s->kbd_extended = 0;
 }
 
-static int musicpal_key_init(SysBusDevice *sbd)
+static void musicpal_key_init(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     musicpal_key_state *s = MUSICPAL_KEY(dev);
 
-    memory_region_init(&s->iomem, OBJECT(s), "dummy", 0);
+    memory_region_init(&s->iomem, obj, "dummy", 0);
     sysbus_init_mmio(sbd, &s->iomem);
 
     s->kbd_extended = 0;
@@ -1530,8 +1535,6 @@ static int musicpal_key_init(SysBusDevice *sbd)
     qdev_init_gpio_out(dev, s->out, ARRAY_SIZE(s->out));
 
     qemu_add_kbd_event_handler(musicpal_key_event, s);
-
-    return 0;
 }
 
 static const VMStateDescription musicpal_key_vmsd = {
@@ -1548,9 +1551,7 @@ static const VMStateDescription musicpal_key_vmsd = {
 static void musicpal_key_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = musicpal_key_init;
     dc->vmsd = &musicpal_key_vmsd;
 }
 
@@ -1558,6 +1559,7 @@ static const TypeInfo musicpal_key_info = {
     .name          = TYPE_MUSICPAL_KEY,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(musicpal_key_state),
+    .instance_init = musicpal_key_init,
     .class_init    = musicpal_key_class_init,
 };
 
diff --git a/hw/arm/nseries.c b/hw/arm/nseries.c
index fea911e3e3..c86cf80514 100644
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -786,8 +786,7 @@ static void n8x0_cbus_setup(struct n800_s *s)
 
 static void n8x0_uart_setup(struct n800_s *s)
 {
-    CharDriverState *radio = uart_hci_init(
-                    qdev_get_gpio_in(s->mpu->gpio, N8X0_BT_HOST_WKUP_GPIO));
+    CharDriverState *radio = uart_hci_init();
 
     qdev_connect_gpio_out(s->mpu->gpio, N8X0_BT_RESET_GPIO,
                     csrhci_pins_get(radio)[csrhci_pin_reset]);
diff --git a/hw/arm/omap2.c b/hw/arm/omap2.c
index 3a0d77714a..6f05c98d3e 100644
--- a/hw/arm/omap2.c
+++ b/hw/arm/omap2.c
@@ -621,7 +621,7 @@ struct omap_sti_s {
     qemu_irq irq;
     MemoryRegion iomem;
     MemoryRegion iomem_fifo;
-    CharDriverState *chr;
+    CharBackend chr;
 
     uint32_t sysconfig;
     uint32_t systest;
@@ -769,14 +769,17 @@ static void omap_sti_fifo_write(void *opaque, hwaddr addr,
 
     if (ch == STI_TRACE_CONTROL_CHANNEL) {
         /* Flush channel <i>value</i>.  */
-        qemu_chr_fe_write(s->chr, (const uint8_t *) "\r", 1);
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, (const uint8_t *) "\r", 1);
     } else if (ch == STI_TRACE_CONSOLE_CHANNEL || 1) {
         if (value == 0xc0 || value == 0xc3) {
             /* Open channel <i>ch</i>.  */
-        } else if (value == 0x00)
-            qemu_chr_fe_write(s->chr, (const uint8_t *) "\n", 1);
-        else
-            qemu_chr_fe_write(s->chr, &byte, 1);
+        } else if (value == 0x00) {
+            qemu_chr_fe_write_all(&s->chr, (const uint8_t *) "\n", 1);
+        } else {
+            qemu_chr_fe_write_all(&s->chr, &byte, 1);
+        }
     }
 }
 
@@ -796,7 +799,8 @@ static struct omap_sti_s *omap_sti_init(struct omap_target_agent_s *ta,
     s->irq = irq;
     omap_sti_reset(s);
 
-    s->chr = chr ?: qemu_chr_new("null", "null", NULL);
+    qemu_chr_fe_init(&s->chr, chr ?: qemu_chr_new("null", "null"),
+                     &error_abort);
 
     memory_region_init_io(&s->iomem, NULL, &omap_sti_ops, s, "omap.sti",
                           omap_l4_region_size(ta, 0));
diff --git a/hw/arm/palmetto-bmc.c b/hw/arm/palmetto-bmc.c
deleted file mode 100644
index 54e29a865d..0000000000
--- a/hw/arm/palmetto-bmc.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * OpenPOWER Palmetto BMC
- *
- * Andrew Jeffery <andrew@aj.id.au>
- *
- * Copyright 2016 IBM Corp.
- *
- * This code is licensed under the GPL version 2 or later.  See
- * the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "cpu.h"
-#include "exec/address-spaces.h"
-#include "hw/arm/arm.h"
-#include "hw/arm/ast2400.h"
-#include "hw/boards.h"
-#include "qemu/log.h"
-#include "sysemu/block-backend.h"
-#include "sysemu/blockdev.h"
-
-static struct arm_boot_info palmetto_bmc_binfo = {
-    .loader_start = AST2400_SDRAM_BASE,
-    .board_id = 0,
-    .nb_cpus = 1,
-};
-
-typedef struct PalmettoBMCState {
-    AST2400State soc;
-    MemoryRegion ram;
-} PalmettoBMCState;
-
-static void palmetto_bmc_init_flashes(AspeedSMCState *s, const char *flashtype,
-                                      Error **errp)
-{
-    int i ;
-
-    for (i = 0; i < s->num_cs; ++i) {
-        AspeedSMCFlash *fl = &s->flashes[i];
-        DriveInfo *dinfo = drive_get_next(IF_MTD);
-        qemu_irq cs_line;
-
-        /*
-         * FIXME: check that we are not using a flash module exceeding
-         * the controller segment size
-         */
-        fl->flash = ssi_create_slave_no_init(s->spi, flashtype);
-        if (dinfo) {
-            qdev_prop_set_drive(fl->flash, "drive", blk_by_legacy_dinfo(dinfo),
-                                errp);
-        }
-        qdev_init_nofail(fl->flash);
-
-        cs_line = qdev_get_gpio_in_named(fl->flash, SSI_GPIO_CS, 0);
-        sysbus_connect_irq(SYS_BUS_DEVICE(s), i + 1, cs_line);
-    }
-}
-
-static void palmetto_bmc_init(MachineState *machine)
-{
-    PalmettoBMCState *bmc;
-
-    bmc = g_new0(PalmettoBMCState, 1);
-    object_initialize(&bmc->soc, (sizeof(bmc->soc)), TYPE_AST2400);
-    object_property_add_child(OBJECT(machine), "soc", OBJECT(&bmc->soc),
-                              &error_abort);
-
-    memory_region_allocate_system_memory(&bmc->ram, NULL, "ram", ram_size);
-    memory_region_add_subregion(get_system_memory(), AST2400_SDRAM_BASE,
-                                &bmc->ram);
-    object_property_add_const_link(OBJECT(&bmc->soc), "ram", OBJECT(&bmc->ram),
-                                   &error_abort);
-    object_property_set_int(OBJECT(&bmc->soc), 0x120CE416, "hw-strap1",
-                            &error_abort);
-    object_property_set_bool(OBJECT(&bmc->soc), true, "realized",
-                             &error_abort);
-
-    palmetto_bmc_init_flashes(&bmc->soc.smc, "n25q256a", &error_abort);
-    palmetto_bmc_init_flashes(&bmc->soc.spi, "mx25l25635e", &error_abort);
-
-    palmetto_bmc_binfo.kernel_filename = machine->kernel_filename;
-    palmetto_bmc_binfo.initrd_filename = machine->initrd_filename;
-    palmetto_bmc_binfo.kernel_cmdline = machine->kernel_cmdline;
-    palmetto_bmc_binfo.ram_size = ram_size;
-    arm_load_kernel(ARM_CPU(first_cpu), &palmetto_bmc_binfo);
-}
-
-static void palmetto_bmc_machine_init(MachineClass *mc)
-{
-    mc->desc = "OpenPOWER Palmetto BMC";
-    mc->init = palmetto_bmc_init;
-    mc->max_cpus = 1;
-    mc->no_sdcard = 1;
-    mc->no_floppy = 1;
-    mc->no_cdrom = 1;
-    mc->no_sdcard = 1;
-    mc->no_parallel = 1;
-}
-
-DEFINE_MACHINE("palmetto-bmc", palmetto_bmc_machine_init);
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index cb55704687..21ea1d6210 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1505,7 +1505,7 @@ static void pxa2xx_i2c_initfn(Object *obj)
     PXA2xxI2CState *s = PXA2XX_I2C(obj);
     SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
-    s->bus = i2c_init_bus(dev, "i2c");
+    s->bus = i2c_init_bus(dev, NULL);
 
     memory_region_init_io(&s->iomem, obj, &pxa2xx_i2c_ops, s,
                           "pxa2xx-i2c", s->region_size);
@@ -1764,7 +1764,7 @@ struct PXA2xxFIrState {
     qemu_irq rx_dma;
     qemu_irq tx_dma;
     uint32_t enable;
-    CharDriverState *chr;
+    CharBackend chr;
 
     uint8_t control[3];
     uint8_t status[2];
@@ -1898,12 +1898,16 @@ static void pxa2xx_fir_write(void *opaque, hwaddr addr,
         pxa2xx_fir_update(s);
         break;
     case ICDR:
-        if (s->control[2] & (1 << 2))			/* TXP */
+        if (s->control[2] & (1 << 2)) { /* TXP */
             ch = value;
-        else
+        } else {
             ch = ~value;
-        if (s->chr && s->enable && (s->control[0] & (1 << 3)))	/* TXE */
-            qemu_chr_fe_write(s->chr, &ch, 1);
+        }
+        if (s->enable && (s->control[0] & (1 << 3))) { /* TXE */
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
+        }
         break;
     case ICSR0:
         s->status[0] &= ~(value & 0x66);
@@ -1971,11 +1975,8 @@ static void pxa2xx_fir_realize(DeviceState *dev, Error **errp)
 {
     PXA2xxFIrState *s = PXA2XX_FIR(dev);
 
-    if (s->chr) {
-        qemu_chr_fe_claim_no_fail(s->chr);
-        qemu_chr_add_handlers(s->chr, pxa2xx_fir_is_empty,
-                        pxa2xx_fir_rx, pxa2xx_fir_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, pxa2xx_fir_is_empty,
+                             pxa2xx_fir_rx, pxa2xx_fir_event, s, NULL, true);
 }
 
 static bool pxa2xx_fir_vmstate_validate(void *opaque, int version_id)
@@ -2266,7 +2267,9 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
                     qdev_get_gpio_in(s->pic, PXA2XX_PIC_LCD));
 
     s->cm_base = 0x41300000;
-    s->cm_regs[CCCR >> 2] = 0x02000210;	/* 416.0 MHz */
+    s->cm_regs[CCCR >> 2] = 0x00000121;         /* from datasheet */
+    s->cm_regs[CKEN >> 2] = 0x00017def;         /* from datasheet */
+
     s->clkcfg = 0x00000009;		/* Turbo mode active */
     memory_region_init_io(&s->cm_iomem, NULL, &pxa2xx_cm_ops, s, "pxa2xx-cm", 0x1000);
     memory_region_add_subregion(address_space, s->cm_base, &s->cm_iomem);
diff --git a/hw/arm/pxa2xx_gpio.c b/hw/arm/pxa2xx_gpio.c
index 576a8eb91f..521dbad039 100644
--- a/hw/arm/pxa2xx_gpio.c
+++ b/hw/arm/pxa2xx_gpio.c
@@ -280,23 +280,28 @@ DeviceState *pxa2xx_gpio_init(hwaddr base,
     return dev;
 }
 
-static int pxa2xx_gpio_initfn(SysBusDevice *sbd)
+static void pxa2xx_gpio_initfn(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     PXA2xxGPIOInfo *s = PXA2XX_GPIO(dev);
 
-    s->cpu = ARM_CPU(qemu_get_cpu(s->ncpu));
-
-    qdev_init_gpio_in(dev, pxa2xx_gpio_set, s->lines);
-    qdev_init_gpio_out(dev, s->handler, s->lines);
-
-    memory_region_init_io(&s->iomem, OBJECT(s), &pxa_gpio_ops, s, "pxa2xx-gpio", 0x1000);
+    memory_region_init_io(&s->iomem, obj, &pxa_gpio_ops,
+                          s, "pxa2xx-gpio", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->irq0);
     sysbus_init_irq(sbd, &s->irq1);
     sysbus_init_irq(sbd, &s->irqX);
+}
 
-    return 0;
+static void pxa2xx_gpio_realize(DeviceState *dev, Error **errp)
+{
+    PXA2xxGPIOInfo *s = PXA2XX_GPIO(dev);
+
+    s->cpu = ARM_CPU(qemu_get_cpu(s->ncpu));
+
+    qdev_init_gpio_in(dev, pxa2xx_gpio_set, s->lines);
+    qdev_init_gpio_out(dev, s->handler, s->lines);
 }
 
 /*
@@ -336,18 +341,18 @@ static Property pxa2xx_gpio_properties[] = {
 static void pxa2xx_gpio_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = pxa2xx_gpio_initfn;
     dc->desc = "PXA2xx GPIO controller";
     dc->props = pxa2xx_gpio_properties;
     dc->vmsd = &vmstate_pxa2xx_gpio_regs;
+    dc->realize = pxa2xx_gpio_realize;
 }
 
 static const TypeInfo pxa2xx_gpio_info = {
     .name          = TYPE_PXA2XX_GPIO,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(PXA2xxGPIOInfo),
+    .instance_init = pxa2xx_gpio_initfn,
     .class_init    = pxa2xx_gpio_class_init,
 };
 
diff --git a/hw/arm/spitz.c b/hw/arm/spitz.c
index 41cc2eeeb1..949a15ae64 100644
--- a/hw/arm/spitz.c
+++ b/hw/arm/spitz.c
@@ -29,6 +29,7 @@
 #include "sysemu/block-backend.h"
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
 
 #undef REG_FMT
 #define REG_FMT			"0x%02lx"
@@ -844,9 +845,18 @@ static void spitz_lcd_hsync_handler(void *opaque, int line, int level)
     spitz_hsync ^= 1;
 }
 
+static void spitz_reset(void *opaque, int line, int level)
+{
+    if (level) {
+        qemu_system_reset_request();
+    }
+}
+
 static void spitz_gpio_setup(PXA2xxState *cpu, int slots)
 {
     qemu_irq lcd_hsync;
+    qemu_irq reset;
+
     /*
      * Bad hack: We toggle the LCD hsync GPIO on every GPIO status
      * read to satisfy broken guests that poll-wait for hsync.
@@ -867,7 +877,8 @@ static void spitz_gpio_setup(PXA2xxState *cpu, int slots)
     qemu_irq_raise(qdev_get_gpio_in(cpu->gpio, SPITZ_GPIO_BAT_COVER));
 
     /* Handle reset */
-    qdev_connect_gpio_out(cpu->gpio, SPITZ_GPIO_ON_RESET, cpu->reset);
+    reset = qemu_allocate_irq(spitz_reset, cpu, 0);
+    qdev_connect_gpio_out(cpu->gpio, SPITZ_GPIO_ON_RESET, reset);
 
     /* PCMCIA signals: card's IRQ and Card-Detect */
     if (slots >= 1)
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index de26b8caff..38425bda6c 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -34,9 +34,15 @@ static const uint32_t timer_addr[STM_NUM_TIMERS] = { 0x40000000, 0x40000400,
     0x40000800, 0x40000C00 };
 static const uint32_t usart_addr[STM_NUM_USARTS] = { 0x40011000, 0x40004400,
     0x40004800, 0x40004C00, 0x40005000, 0x40011400 };
+static const uint32_t adc_addr[STM_NUM_ADCS] = { 0x40012000, 0x40012100,
+    0x40012200 };
+static const uint32_t spi_addr[STM_NUM_SPIS] = { 0x40013000, 0x40003800,
+    0x40003C00 };
 
 static const int timer_irq[STM_NUM_TIMERS] = {28, 29, 30, 50};
 static const int usart_irq[STM_NUM_USARTS] = {37, 38, 39, 52, 53, 71};
+#define ADC_IRQ 18
+static const int spi_irq[STM_NUM_SPIS] = {35, 36, 51};
 
 static void stm32f205_soc_initfn(Object *obj)
 {
@@ -57,13 +63,27 @@ static void stm32f205_soc_initfn(Object *obj)
                           TYPE_STM32F2XX_TIMER);
         qdev_set_parent_bus(DEVICE(&s->timer[i]), sysbus_get_default());
     }
+
+    s->adc_irqs = OR_IRQ(object_new(TYPE_OR_IRQ));
+
+    for (i = 0; i < STM_NUM_ADCS; i++) {
+        object_initialize(&s->adc[i], sizeof(s->adc[i]),
+                          TYPE_STM32F2XX_ADC);
+        qdev_set_parent_bus(DEVICE(&s->adc[i]), sysbus_get_default());
+    }
+
+    for (i = 0; i < STM_NUM_SPIS; i++) {
+        object_initialize(&s->spi[i], sizeof(s->spi[i]),
+                          TYPE_STM32F2XX_SPI);
+        qdev_set_parent_bus(DEVICE(&s->spi[i]), sysbus_get_default());
+    }
 }
 
 static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
 {
     STM32F205State *s = STM32F205_SOC(dev_soc);
-    DeviceState *syscfgdev, *usartdev, *timerdev, *nvic;
-    SysBusDevice *syscfgbusdev, *usartbusdev, *timerbusdev;
+    DeviceState *dev, *nvic;
+    SysBusDevice *busdev;
     Error *err = NULL;
     int i;
 
@@ -94,44 +114,80 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
                        s->kernel_filename, s->cpu_model);
 
     /* System configuration controller */
-    syscfgdev = DEVICE(&s->syscfg);
+    dev = DEVICE(&s->syscfg);
     object_property_set_bool(OBJECT(&s->syscfg), true, "realized", &err);
     if (err != NULL) {
         error_propagate(errp, err);
         return;
     }
-    syscfgbusdev = SYS_BUS_DEVICE(syscfgdev);
-    sysbus_mmio_map(syscfgbusdev, 0, 0x40013800);
-    sysbus_connect_irq(syscfgbusdev, 0, qdev_get_gpio_in(nvic, 71));
+    busdev = SYS_BUS_DEVICE(dev);
+    sysbus_mmio_map(busdev, 0, 0x40013800);
+    sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, 71));
 
     /* Attach UART (uses USART registers) and USART controllers */
     for (i = 0; i < STM_NUM_USARTS; i++) {
-        usartdev = DEVICE(&(s->usart[i]));
-        qdev_prop_set_chr(usartdev, "chardev", i < MAX_SERIAL_PORTS ? serial_hds[i] : NULL);
+        dev = DEVICE(&(s->usart[i]));
+        qdev_prop_set_chr(dev, "chardev",
+                          i < MAX_SERIAL_PORTS ? serial_hds[i] : NULL);
         object_property_set_bool(OBJECT(&s->usart[i]), true, "realized", &err);
         if (err != NULL) {
             error_propagate(errp, err);
             return;
         }
-        usartbusdev = SYS_BUS_DEVICE(usartdev);
-        sysbus_mmio_map(usartbusdev, 0, usart_addr[i]);
-        sysbus_connect_irq(usartbusdev, 0,
-                           qdev_get_gpio_in(nvic, usart_irq[i]));
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, usart_addr[i]);
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, usart_irq[i]));
     }
 
     /* Timer 2 to 5 */
     for (i = 0; i < STM_NUM_TIMERS; i++) {
-        timerdev = DEVICE(&(s->timer[i]));
-        qdev_prop_set_uint64(timerdev, "clock-frequency", 1000000000);
+        dev = DEVICE(&(s->timer[i]));
+        qdev_prop_set_uint64(dev, "clock-frequency", 1000000000);
         object_property_set_bool(OBJECT(&s->timer[i]), true, "realized", &err);
         if (err != NULL) {
             error_propagate(errp, err);
             return;
         }
-        timerbusdev = SYS_BUS_DEVICE(timerdev);
-        sysbus_mmio_map(timerbusdev, 0, timer_addr[i]);
-        sysbus_connect_irq(timerbusdev, 0,
-                           qdev_get_gpio_in(nvic, timer_irq[i]));
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, timer_addr[i]);
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, timer_irq[i]));
+    }
+
+    /* ADC 1 to 3 */
+    object_property_set_int(OBJECT(s->adc_irqs), STM_NUM_ADCS,
+                            "num-lines", &err);
+    object_property_set_bool(OBJECT(s->adc_irqs), true, "realized", &err);
+    if (err != NULL) {
+        error_propagate(errp, err);
+        return;
+    }
+    qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
+                          qdev_get_gpio_in(nvic, ADC_IRQ));
+
+    for (i = 0; i < STM_NUM_ADCS; i++) {
+        dev = DEVICE(&(s->adc[i]));
+        object_property_set_bool(OBJECT(&s->adc[i]), true, "realized", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, adc_addr[i]);
+        sysbus_connect_irq(busdev, 0,
+                           qdev_get_gpio_in(DEVICE(s->adc_irqs), i));
+    }
+
+    /* SPI 1 and 2 */
+    for (i = 0; i < STM_NUM_SPIS; i++) {
+        dev = DEVICE(&(s->spi[i]));
+        object_property_set_bool(OBJECT(&s->spi[i]), true, "realized", &err);
+        if (err != NULL) {
+            error_propagate(errp, err);
+            return;
+        }
+        busdev = SYS_BUS_DEVICE(dev);
+        sysbus_mmio_map(busdev, 0, spi_addr[i]);
+        sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(nvic, spi_irq[i]));
     }
 }
 
diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index f1b2c6c966..3311cc38a4 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -912,7 +912,7 @@ typedef struct StrongARMUARTState {
     SysBusDevice parent_obj;
 
     MemoryRegion iomem;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     uint8_t utcr0;
@@ -1020,9 +1020,7 @@ static void strongarm_uart_update_parameters(StrongARMUARTState *s)
     ssp.data_bits = data_bits;
     ssp.stop_bits = stop_bits;
     s->char_transmit_time =  (NANOSECONDS_PER_SECOND / speed) * frame_size;
-    if (s->chr) {
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
-    }
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
 
     DPRINTF(stderr, "%s speed=%d parity=%c data=%d stop=%d\n", s->chr->label,
             speed, parity, data_bits, stop_bits);
@@ -1107,8 +1105,10 @@ static void strongarm_uart_tx(void *opaque)
 
     if (s->utcr3 & UTCR3_LBM) /* loopback */ {
         strongarm_uart_receive(s, &s->tx_fifo[s->tx_start], 1);
-    } else if (s->chr) {
-        qemu_chr_fe_write(s->chr, &s->tx_fifo[s->tx_start], 1);
+    } else if (qemu_chr_fe_get_driver(&s->chr)) {
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, &s->tx_fifo[s->tx_start], 1);
     }
 
     s->tx_start = (s->tx_start + 1) % 8;
@@ -1236,14 +1236,17 @@ static void strongarm_uart_init(Object *obj)
 
     s->rx_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, strongarm_uart_rx_to, s);
     s->tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, strongarm_uart_tx, s);
+}
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr,
-                        strongarm_uart_can_receive,
-                        strongarm_uart_receive,
-                        strongarm_uart_event,
-                        s);
-    }
+static void strongarm_uart_realize(DeviceState *dev, Error **errp)
+{
+    StrongARMUARTState *s = STRONGARM_UART(dev);
+
+    qemu_chr_fe_set_handlers(&s->chr,
+                             strongarm_uart_can_receive,
+                             strongarm_uart_receive,
+                             strongarm_uart_event,
+                             s, NULL, true);
 }
 
 static void strongarm_uart_reset(DeviceState *dev)
@@ -1318,6 +1321,7 @@ static void strongarm_uart_class_init(ObjectClass *klass, void *data)
     dc->reset = strongarm_uart_reset;
     dc->vmsd = &vmstate_strongarm_uart_regs;
     dc->props = strongarm_uart_properties;
+    dc->realize = strongarm_uart_realize;
 }
 
 static const TypeInfo strongarm_uart_info = {
@@ -1518,19 +1522,19 @@ static int strongarm_ssp_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static int strongarm_ssp_init(SysBusDevice *sbd)
+static void strongarm_ssp_init(Object *obj)
 {
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
     DeviceState *dev = DEVICE(sbd);
     StrongARMSSPState *s = STRONGARM_SSP(dev);
 
     sysbus_init_irq(sbd, &s->irq);
 
-    memory_region_init_io(&s->iomem, OBJECT(s), &strongarm_ssp_ops, s,
+    memory_region_init_io(&s->iomem, obj, &strongarm_ssp_ops, s,
                           "ssp", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
 
     s->bus = ssi_create_bus(dev, "ssi");
-    return 0;
 }
 
 static void strongarm_ssp_reset(DeviceState *dev)
@@ -1560,9 +1564,7 @@ static const VMStateDescription vmstate_strongarm_ssp_regs = {
 static void strongarm_ssp_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = strongarm_ssp_init;
     dc->desc = "StrongARM SSP controller";
     dc->reset = strongarm_ssp_reset;
     dc->vmsd = &vmstate_strongarm_ssp_regs;
@@ -1572,6 +1574,7 @@ static const TypeInfo strongarm_ssp_info = {
     .name          = TYPE_STRONGARM_SSP,
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(StrongARMSSPState),
+    .instance_init = strongarm_ssp_init,
     .class_init    = strongarm_ssp_class_init,
 };
 
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 5debb3348c..d68e3dcdbd 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -436,7 +436,7 @@ static const NodeCreationPair add_fdt_node_functions[] = {
  * are dynamically instantiable and if so call the node creation
  * function.
  */
-static int add_fdt_node(SysBusDevice *sbdev, void *opaque)
+static void add_fdt_node(SysBusDevice *sbdev, void *opaque)
 {
     int i, ret;
 
@@ -445,7 +445,7 @@ static int add_fdt_node(SysBusDevice *sbdev, void *opaque)
                     add_fdt_node_functions[i].typename)) {
             ret = add_fdt_node_functions[i].add_fdt_node_fn(sbdev, opaque);
             assert(!ret);
-            return 0;
+            return;
         }
     }
     error_report("Device %s can not be dynamically instantiated",
diff --git a/hw/arm/tosa.c b/hw/arm/tosa.c
index 2db66508b5..1ee12f49b3 100644
--- a/hw/arm/tosa.c
+++ b/hw/arm/tosa.c
@@ -25,6 +25,7 @@
 #include "sysemu/block-backend.h"
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
 
 #define TOSA_RAM    0x04000000
 #define TOSA_ROM	0x00800000
@@ -86,6 +87,12 @@ static void tosa_out_switch(void *opaque, int line, int level)
     }
 }
 
+static void tosa_reset(void *opaque, int line, int level)
+{
+    if (level) {
+        qemu_system_reset_request();
+    }
+}
 
 static void tosa_gpio_setup(PXA2xxState *cpu,
                 DeviceState *scp0,
@@ -93,13 +100,16 @@ static void tosa_gpio_setup(PXA2xxState *cpu,
                 TC6393xbState *tmio)
 {
     qemu_irq *outsignals = qemu_allocate_irqs(tosa_out_switch, cpu, 4);
+    qemu_irq reset;
+
     /* MMC/SD host */
     pxa2xx_mmci_handlers(cpu->mmc,
                     qdev_get_gpio_in(scp0, TOSA_GPIO_SD_WP),
                     qemu_irq_invert(qdev_get_gpio_in(cpu->gpio, TOSA_GPIO_nSD_DETECT)));
 
     /* Handle reset */
-    qdev_connect_gpio_out(cpu->gpio, TOSA_GPIO_ON_RESET, cpu->reset);
+    reset = qemu_allocate_irq(tosa_reset, cpu, 0);
+    qdev_connect_gpio_out(cpu->gpio, TOSA_GPIO_ON_RESET, reset);
 
     /* PCMCIA signals: card's IRQ and Card-Detect */
     pxa2xx_pcmcia_set_irq_cb(cpu->pcmcia[0],
diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index 8ae5392bcc..7b5cb36d5a 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -198,6 +198,15 @@ static void versatile_init(MachineState *machine, int board_id)
     int done_smc = 0;
     DriveInfo *dinfo;
 
+    if (machine->ram_size > 0x10000000) {
+        /* Device starting at address 0x10000000,
+         * and memory cannot overlap with devices.
+         * Refuse to run rather than behaving very confusingly.
+         */
+        error_report("versatilepb: memory size must not exceed 256MB");
+        exit(1);
+    }
+
     if (!machine->cpu_model) {
         machine->cpu_model = "arm926";
     }
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 28fc59c665..d4160dfa7d 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -44,6 +44,7 @@
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "sysemu/numa.h"
+#include "kvm_arm.h"
 
 #define ARM_SPI_BASE 32
 #define ACPI_POWER_BUTTON_DEVICE "PWRB"
@@ -53,7 +54,7 @@ static void acpi_dsdt_add_cpus(Aml *scope, int smp_cpus)
     uint16_t i;
 
     for (i = 0; i < smp_cpus; i++) {
-        Aml *dev = aml_device("C%03x", i);
+        Aml *dev = aml_device("C%.03X", i);
         aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
         aml_append(dev, aml_name_decl("_UID", aml_int(i)));
         aml_append(scope, dev);
@@ -382,6 +383,61 @@ build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
     return rsdp_table;
 }
 
+static void
+build_iort(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
+{
+    int iort_start = table_data->len;
+    AcpiIortIdMapping *idmap;
+    AcpiIortItsGroup *its;
+    AcpiIortTable *iort;
+    size_t node_size, iort_length;
+    AcpiIortRC *rc;
+
+    iort = acpi_data_push(table_data, sizeof(*iort));
+
+    iort_length = sizeof(*iort);
+    iort->node_count = cpu_to_le32(2); /* RC and ITS nodes */
+    iort->node_offset = cpu_to_le32(sizeof(*iort));
+
+    /* ITS group node */
+    node_size =  sizeof(*its) + sizeof(uint32_t);
+    iort_length += node_size;
+    its = acpi_data_push(table_data, node_size);
+
+    its->type = ACPI_IORT_NODE_ITS_GROUP;
+    its->length = cpu_to_le16(node_size);
+    its->its_count = cpu_to_le32(1);
+    its->identifiers[0] = 0; /* MADT translation_id */
+
+    /* Root Complex Node */
+    node_size = sizeof(*rc) + sizeof(*idmap);
+    iort_length += node_size;
+    rc = acpi_data_push(table_data, node_size);
+
+    rc->type = ACPI_IORT_NODE_PCI_ROOT_COMPLEX;
+    rc->length = cpu_to_le16(node_size);
+    rc->mapping_count = cpu_to_le32(1);
+    rc->mapping_offset = cpu_to_le32(sizeof(*rc));
+
+    /* fully coherent device */
+    rc->memory_properties.cache_coherency = cpu_to_le32(1);
+    rc->memory_properties.memory_flags = 0x3; /* CCA = CPM = DCAS = 1 */
+    rc->pci_segment_number = 0; /* MCFG pci_segment */
+
+    /* Identity RID mapping covering the whole input RID range */
+    idmap = &rc->id_mapping_array[0];
+    idmap->input_base = 0;
+    idmap->id_count = cpu_to_le32(0xFFFF);
+    idmap->output_base = 0;
+    /* output IORT node is the ITS group node (the first node) */
+    idmap->output_reference = cpu_to_le32(iort->node_offset);
+
+    iort->length = cpu_to_le32(iort_length);
+
+    build_header(linker, table_data, (void *)(table_data->data + iort_start),
+                 "IORT", table_data->len - iort_start, 0, NULL, NULL);
+}
+
 static void
 build_spcr(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
 {
@@ -426,11 +482,9 @@ build_srat(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
     uint32_t *cpu_node = g_malloc0(guest_info->smp_cpus * sizeof(uint32_t));
 
     for (i = 0; i < guest_info->smp_cpus; i++) {
-        for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, numa_info[j].node_cpu)) {
+        j = numa_get_node_for_cpu(i);
+        if (j < nb_numa_nodes) {
                 cpu_node[i] = j;
-                break;
-            }
         }
     }
 
@@ -540,12 +594,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
         gicc->uid = i;
         gicc->flags = cpu_to_le32(ACPI_GICC_ENABLED);
 
-        if (armcpu->has_pmu) {
+        if (arm_feature(&armcpu->env, ARM_FEATURE_PMU)) {
             gicc->performance_interrupt = cpu_to_le32(PPI(VIRTUAL_PMU_IRQ));
         }
     }
 
     if (guest_info->gic_version == 3) {
+        AcpiMadtGenericTranslator *gic_its;
         AcpiMadtGenericRedistributor *gicr = acpi_data_push(table_data,
                                                          sizeof *gicr);
 
@@ -553,6 +608,14 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtGuestInfo *guest_info)
         gicr->length = sizeof(*gicr);
         gicr->base_address = cpu_to_le64(memmap[VIRT_GIC_REDIST].base);
         gicr->range_length = cpu_to_le32(memmap[VIRT_GIC_REDIST].size);
+
+        if (its_class_name() && !guest_info->no_its) {
+            gic_its = acpi_data_push(table_data, sizeof *gic_its);
+            gic_its->type = ACPI_APIC_GENERIC_TRANSLATOR;
+            gic_its->length = sizeof(*gic_its);
+            gic_its->translation_id = 0;
+            gic_its->base_address = cpu_to_le64(memmap[VIRT_GIC_ITS].base);
+        }
     } else {
         gic_msi = acpi_data_push(table_data, sizeof *gic_msi);
         gic_msi->type = ACPI_APIC_GENERIC_MSI_FRAME;
@@ -659,17 +722,6 @@ void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
                              ACPI_BUILD_TABLE_FILE, tables_blob,
                              64, false /* high memory */);
 
-    /*
-     * The ACPI v5.1 tables for Hardware-reduced ACPI platform are:
-     * RSDP
-     * RSDT
-     * FADT
-     * GTDT
-     * MADT
-     * MCFG
-     * DSDT
-     */
-
     /* DSDT is pointed to by FADT */
     dsdt = tables_blob->len;
     build_dsdt(tables_blob, tables->linker, guest_info);
@@ -695,6 +747,11 @@ void virt_acpi_build(VirtGuestInfo *guest_info, AcpiBuildTables *tables)
         build_srat(tables_blob, tables->linker, guest_info);
     }
 
+    if (its_class_name() && !guest_info->no_its) {
+        acpi_add_table(table_offsets, tables_blob);
+        build_iort(tables_blob, tables->linker, guest_info);
+    }
+
     /* RSDT is pointed to by RSDP */
     rsdt = tables_blob->len;
     build_rsdt(tables_blob, tables->linker, table_offsets, NULL, NULL);
@@ -752,7 +809,7 @@ static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state,
                                        uint64_t max_size)
 {
     return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1,
-                        name, virt_acpi_build_update, build_state);
+                        name, virt_acpi_build_update, build_state, NULL);
 }
 
 static const VMStateDescription vmstate_virt_acpi_build = {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a193b5a95b..d04e4acbd9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -76,7 +76,7 @@ typedef struct VirtBoardInfo {
     int fdt_size;
     uint32_t clock_phandle;
     uint32_t gic_phandle;
-    uint32_t v2m_phandle;
+    uint32_t msi_phandle;
     bool using_psci;
 } VirtBoardInfo;
 
@@ -84,6 +84,8 @@ typedef struct {
     MachineClass parent;
     VirtBoardInfo *daughterboard;
     bool disallow_affinity_adjustment;
+    bool no_its;
+    bool no_pmu;
 } VirtMachineClass;
 
 typedef struct {
@@ -413,19 +415,31 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
                                   armcpu->mp_affinity);
         }
 
-        for (i = 0; i < nb_numa_nodes; i++) {
-            if (test_bit(cpu, numa_info[i].node_cpu)) {
-                qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
-            }
+        i = numa_get_node_for_cpu(cpu);
+        if (i < nb_numa_nodes) {
+            qemu_fdt_setprop_cell(vbi->fdt, nodename, "numa-node-id", i);
         }
 
         g_free(nodename);
     }
 }
 
+static void fdt_add_its_gic_node(VirtBoardInfo *vbi)
+{
+    vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+    qemu_fdt_add_subnode(vbi->fdt, "/intc/its");
+    qemu_fdt_setprop_string(vbi->fdt, "/intc/its", "compatible",
+                            "arm,gic-v3-its");
+    qemu_fdt_setprop(vbi->fdt, "/intc/its", "msi-controller", NULL, 0);
+    qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/its", "reg",
+                                 2, vbi->memmap[VIRT_GIC_ITS].base,
+                                 2, vbi->memmap[VIRT_GIC_ITS].size);
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc/its", "phandle", vbi->msi_phandle);
+}
+
 static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
 {
-    vbi->v2m_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+    vbi->msi_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
     qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
     qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
                             "arm,gic-v2m-frame");
@@ -433,7 +447,7 @@ static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
     qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
                                  2, vbi->memmap[VIRT_GIC_V2M].base,
                                  2, vbi->memmap[VIRT_GIC_V2M].size);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->msi_phandle);
 }
 
 static void fdt_add_gic_node(VirtBoardInfo *vbi, int type)
@@ -477,7 +491,7 @@ static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
 
     CPU_FOREACH(cpu) {
         armcpu = ARM_CPU(cpu);
-        if (!armcpu->has_pmu ||
+        if (!arm_feature(&armcpu->env, ARM_FEATURE_PMU) ||
             !kvm_arm_pmu_create(cpu, PPI(VIRTUAL_PMU_IRQ))) {
             return;
         }
@@ -500,6 +514,26 @@ static void fdt_add_pmu_nodes(const VirtBoardInfo *vbi, int gictype)
     }
 }
 
+static void create_its(VirtBoardInfo *vbi, DeviceState *gicdev)
+{
+    const char *itsclass = its_class_name();
+    DeviceState *dev;
+
+    if (!itsclass) {
+        /* Do nothing if not supported */
+        return;
+    }
+
+    dev = qdev_create(NULL, itsclass);
+
+    object_property_set_link(OBJECT(dev), OBJECT(gicdev), "parent-gicv3",
+                             &error_abort);
+    qdev_init_nofail(dev);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_ITS].base);
+
+    fdt_add_its_gic_node(vbi);
+}
+
 static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
 {
     int i;
@@ -519,7 +553,8 @@ static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
     fdt_add_v2m_gic_node(vbi);
 }
 
-static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type, bool secure)
+static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type,
+                       bool secure, bool no_its)
 {
     /* We create a standalone GIC */
     DeviceState *gicdev;
@@ -583,7 +618,9 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic, int type, bool secure)
 
     fdt_add_gic_node(vbi, type);
 
-    if (type == 2) {
+    if (type == 3 && !no_its) {
+        create_its(vbi, gicdev);
+    } else if (type == 2) {
         create_v2m(vbi, pic);
     }
 }
@@ -892,9 +929,11 @@ static void create_fw_cfg(const VirtBoardInfo *vbi, AddressSpace *as)
 {
     hwaddr base = vbi->memmap[VIRT_FW_CFG].base;
     hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
+    FWCfgState *fw_cfg;
     char *nodename;
 
-    fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
+    fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
 
     nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
     qemu_fdt_add_subnode(vbi->fdt, nodename);
@@ -1025,9 +1064,9 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
                            nr_pcie_buses - 1);
     qemu_fdt_setprop(vbi->fdt, nodename, "dma-coherent", NULL, 0);
 
-    if (vbi->v2m_phandle) {
+    if (vbi->msi_phandle) {
         qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent",
-                               vbi->v2m_phandle);
+                               vbi->msi_phandle);
     }
 
     qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
@@ -1317,6 +1356,10 @@ static void machvirt_init(MachineState *machine)
             }
         }
 
+        if (vmc->no_pmu && object_property_find(cpuobj, "pmu", NULL)) {
+            object_property_set_bool(cpuobj, false, "pmu", NULL);
+        }
+
         if (object_property_find(cpuobj, "reset-cbar", NULL)) {
             object_property_set_int(cpuobj, vbi->memmap[VIRT_CPUPERIPHS].base,
                                     "reset-cbar", &error_abort);
@@ -1341,7 +1384,7 @@ static void machvirt_init(MachineState *machine)
 
     create_flash(vbi, sysmem, secure_sysmem ? secure_sysmem : sysmem);
 
-    create_gic(vbi, pic, gic_version, vms->secure);
+    create_gic(vbi, pic, gic_version, vms->secure, vmc->no_its);
 
     fdt_add_pmu_nodes(vbi, gic_version);
 
@@ -1373,6 +1416,7 @@ static void machvirt_init(MachineState *machine)
     guest_info->irqmap = vbi->irqmap;
     guest_info->use_highmem = vms->highmem;
     guest_info->gic_version = gic_version;
+    guest_info->no_its = vmc->no_its;
     guest_info_state->machine_done.notify = virt_guest_info_machine_done;
     qemu_add_machine_init_done_notifier(&guest_info_state->machine_done);
 
@@ -1457,11 +1501,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
      * it later in machvirt_init, where we have more information about the
      * configuration of the particular instance.
      */
-    mc->max_cpus = MAX_CPUMASK_BITS;
+    mc->max_cpus = 255;
     mc->has_dynamic_sysbus = true;
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
     mc->pci_allow_0_address = true;
+    /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */
+    mc->minimum_page_bits = 12;
 }
 
 static const TypeInfo virt_machine_info = {
@@ -1479,7 +1525,7 @@ static void machvirt_machine_init(void)
 }
 type_init(machvirt_machine_init);
 
-static void virt_2_7_instance_init(Object *obj)
+static void virt_2_8_instance_init(Object *obj)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
 
@@ -1512,10 +1558,31 @@ static void virt_2_7_instance_init(Object *obj)
                                     "Valid values are 2, 3 and host", NULL);
 }
 
+static void virt_machine_2_8_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(2, 8)
+
+#define VIRT_COMPAT_2_7 \
+    HW_COMPAT_2_7
+
+static void virt_2_7_instance_init(Object *obj)
+{
+    virt_2_8_instance_init(obj);
+}
+
 static void virt_machine_2_7_options(MachineClass *mc)
 {
+    VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc));
+
+    virt_machine_2_8_options(mc);
+    SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_7);
+    /* ITS was introduced with 2.8 */
+    vmc->no_its = true;
+    /* Stick with 1K pages for migration compatibility */
+    mc->minimum_page_bits = 0;
 }
-DEFINE_VIRT_MACHINE_AS_LATEST(2, 7)
+DEFINE_VIRT_MACHINE(2, 7)
 
 #define VIRT_COMPAT_2_6 \
     HW_COMPAT_2_6
@@ -1532,5 +1599,7 @@ static void virt_machine_2_6_options(MachineClass *mc)
     virt_machine_2_7_options(mc);
     SET_MACHINE_COMPAT(mc, VIRT_COMPAT_2_6);
     vmc->disallow_affinity_adjustment = true;
+    /* Disable PMU for 2.6 as PMU support was first introduced in 2.7 */
+    vmc->no_pmu = true;
 }
 DEFINE_VIRT_MACHINE(2, 6)
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 23c7199867..0d86ba35ae 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -332,6 +332,8 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
             qemu_check_nic_model(nd, TYPE_CADENCE_GEM);
             qdev_set_nic_properties(DEVICE(&s->gem[i]), nd);
         }
+        object_property_set_int(OBJECT(&s->gem[i]), 2, "num-priority-queues",
+                                  &error_abort);
         object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err);
         if (err) {
             error_propagate(errp, err);
diff --git a/hw/audio/gus.c b/hw/audio/gus.c
index 6c02646773..3d08a6576a 100644
--- a/hw/audio/gus.c
+++ b/hw/audio/gus.c
@@ -60,6 +60,8 @@ typedef struct GUSState {
     int64_t last_ticks;
     qemu_irq pic;
     IsaDma *isa_dma;
+    PortioList portio_list1;
+    PortioList portio_list2;
 } GUSState;
 
 static uint32_t gus_readb(void *opaque, uint32_t nport)
@@ -265,9 +267,10 @@ static void gus_realizefn (DeviceState *dev, Error **errp)
     s->samples = AUD_get_buffer_size_out (s->voice) >> s->shift;
     s->mixbuf = g_malloc0 (s->samples << s->shift);
 
-    isa_register_portio_list (d, s->port, gus_portio_list1, s, "gus");
-    isa_register_portio_list (d, (s->port + 0x100) & 0xf00,
-                              gus_portio_list2, s, "gus");
+    isa_register_portio_list(d, &s->portio_list1, s->port,
+                             gus_portio_list1, s, "gus");
+    isa_register_portio_list(d, &s->portio_list2, (s->port + 0x100) & 0xf00,
+                             gus_portio_list2, s, "gus");
 
     s->isa_dma = isa_get_dma(isa_bus_from_device(d), s->emu.gusdma);
     k = ISADMA_GET_CLASS(s->isa_dma);
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index cd95340cd9..537face94d 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -416,7 +416,8 @@ static bool intel_hda_xfer(HDACodecDevice *dev, uint32_t stnr, bool output,
     }
 
     left = len;
-    while (left > 0) {
+    s = st->bentries;
+    while (left > 0 && s-- > 0) {
         copy = left;
         if (copy > st->bsize - st->lpib)
             copy = st->bsize - st->lpib;
diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c
index 42a6f4885a..798002277b 100644
--- a/hw/audio/pcspk.c
+++ b/hw/audio/pcspk.c
@@ -52,8 +52,9 @@ typedef struct {
     unsigned int pit_count;
     unsigned int samples;
     unsigned int play_pos;
-    int data_on;
-    int dummy_refresh_clock;
+    uint8_t data_on;
+    uint8_t dummy_refresh_clock;
+    bool migrate;
 } PCSpkState;
 
 static const char *s_spk = "pcspk";
@@ -187,8 +188,29 @@ static void pcspk_realizefn(DeviceState *dev, Error **errp)
     pcspk_state = s;
 }
 
+static bool migrate_needed(void *opaque)
+{
+    PCSpkState *s = opaque;
+
+    return s->migrate;
+}
+
+static const VMStateDescription vmstate_spk = {
+    .name = "pcspk",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .needed = migrate_needed,
+    .fields      = (VMStateField[]) {
+        VMSTATE_UINT8(data_on, PCSpkState),
+        VMSTATE_UINT8(dummy_refresh_clock, PCSpkState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static Property pcspk_properties[] = {
     DEFINE_PROP_UINT32("iobase", PCSpkState, iobase,  -1),
+    DEFINE_PROP_BOOL("migrate", PCSpkState, migrate,  true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -198,6 +220,7 @@ static void pcspk_class_initfn(ObjectClass *klass, void *data)
 
     dc->realize = pcspk_realizefn;
     set_bit(DEVICE_CATEGORY_SOUND, dc->categories);
+    dc->vmsd = &vmstate_spk;
     dc->props = pcspk_properties;
     /* Reason: realize sets global pcspk_state */
     dc->cannot_instantiate_with_device_add_yet = true;
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 3a4a57ac31..6b4427f242 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -106,6 +106,7 @@ typedef struct SB16State {
     /* mixer state */
     int mixer_nreg;
     uint8_t mixer_regs[256];
+    PortioList portio_list;
 } SB16State;
 
 static void SB_audio_callback (void *opaque, int free);
@@ -1378,7 +1379,8 @@ static void sb16_realizefn (DeviceState *dev, Error **errp)
         dolog ("warning: Could not create auxiliary timer\n");
     }
 
-    isa_register_portio_list (isadev, s->port, sb16_ioport_list, s, "sb16");
+    isa_register_portio_list(isadev, &s->portio_list, s->port,
+                             sb16_ioport_list, s, "sb16");
 
     s->isa_hdma = isa_get_dma(isa_bus_from_device(isadev), s->hdma);
     k = ISADMA_GET_CLASS(s->isa_hdma);
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 704a763603..d1f9f63eaf 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -68,9 +68,7 @@ static void notify_guest_bh(void *opaque)
             unsigned i = j + ctzl(bits);
             VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
-            if (virtio_should_notify(s->vdev, vq)) {
-                event_notifier_set(virtio_queue_get_guest_notifier(vq));
-            }
+            virtio_notify_irqfd(s->vdev, vq);
 
             bits &= bits - 1; /* clear right-most bit */
         }
@@ -88,23 +86,28 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
 
     *dataplane = NULL;
 
-    if (!conf->iothread) {
-        return;
-    }
+    if (conf->iothread) {
+        if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
+            error_setg(errp,
+                       "device is incompatible with iothread "
+                       "(transport does not support notifiers)");
+            return;
+        }
+        if (!virtio_device_ioeventfd_enabled(vdev)) {
+            error_setg(errp, "ioeventfd is required for iothread");
+            return;
+        }
 
-    /* Don't try if transport does not support notifiers. */
-    if (!k->set_guest_notifiers || !k->ioeventfd_started) {
-        error_setg(errp,
-                   "device is incompatible with dataplane "
-                   "(transport does not support notifiers)");
-        return;
+        /* If dataplane is (re-)enabled while the guest is running there could
+         * be block jobs that can conflict.
+         */
+        if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
+            error_prepend(errp, "cannot start virtio-blk dataplane: ");
+            return;
+        }
     }
-
-    /* If dataplane is (re-)enabled while the guest is running there could be
-     * block jobs that can conflict.
-     */
-    if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
-        error_prepend(errp, "cannot start dataplane thread: ");
+    /* Don't try if transport does not support notifiers. */
+    if (!virtio_device_ioeventfd_enabled(vdev)) {
         return;
     }
 
@@ -112,9 +115,13 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
     s->vdev = vdev;
     s->conf = conf;
 
-    s->iothread = conf->iothread;
-    object_ref(OBJECT(s->iothread));
-    s->ctx = iothread_get_aio_context(s->iothread);
+    if (conf->iothread) {
+        s->iothread = conf->iothread;
+        object_ref(OBJECT(s->iothread));
+        s->ctx = iothread_get_aio_context(s->iothread);
+    } else {
+        s->ctx = qemu_get_aio_context();
+    }
     s->bh = aio_bh_new(s->ctx, notify_guest_bh, s);
     s->batch_notify_vqs = bitmap_new(conf->num_queues);
 
@@ -124,14 +131,19 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
 /* Context: QEMU global mutex held */
 void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
 {
+    VirtIOBlock *vblk;
+
     if (!s) {
         return;
     }
 
-    virtio_blk_data_plane_stop(s);
+    vblk = VIRTIO_BLK(s->vdev);
+    assert(!vblk->dataplane_started);
     g_free(s->batch_notify_vqs);
     qemu_bh_delete(s->bh);
-    object_unref(OBJECT(s->iothread));
+    if (s->iothread) {
+        object_unref(OBJECT(s->iothread));
+    }
     g_free(s);
 }
 
@@ -147,17 +159,18 @@ static void virtio_blk_data_plane_handle_output(VirtIODevice *vdev,
 }
 
 /* Context: QEMU global mutex held */
-void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
+int virtio_blk_data_plane_start(VirtIODevice *vdev)
 {
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
+    VirtIOBlock *vblk = VIRTIO_BLK(vdev);
+    VirtIOBlockDataPlane *s = vblk->dataplane;
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk)));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
     unsigned i;
     unsigned nvqs = s->conf->num_queues;
     int r;
 
     if (vblk->dataplane_started || s->starting) {
-        return;
+        return 0;
     }
 
     s->starting = true;
@@ -204,20 +217,22 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
                 virtio_blk_data_plane_handle_output);
     }
     aio_context_release(s->ctx);
-    return;
+    return 0;
 
   fail_guest_notifiers:
     vblk->dataplane_disabled = true;
     s->starting = false;
     vblk->dataplane_started = true;
+    return -ENOSYS;
 }
 
 /* Context: QEMU global mutex held */
-void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
+void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 {
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
+    VirtIOBlock *vblk = VIRTIO_BLK(vdev);
+    VirtIOBlockDataPlane *s = vblk->dataplane;
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vblk));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
     unsigned i;
     unsigned nvqs = s->conf->num_queues;
 
diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h
index b1f0b95b32..db3f47b173 100644
--- a/hw/block/dataplane/virtio-blk.h
+++ b/hw/block/dataplane/virtio-blk.h
@@ -23,9 +23,9 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
                                   VirtIOBlockDataPlane **dataplane,
                                   Error **errp);
 void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s);
-void virtio_blk_data_plane_drain(VirtIOBlockDataPlane *s);
 void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq);
 
+int virtio_blk_data_plane_start(VirtIODevice *vdev);
+void virtio_blk_data_plane_stop(VirtIODevice *vdev);
+
 #endif /* HW_DATAPLANE_VIRTIO_BLK_H */
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index f73af7db46..17d29e7bc5 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -35,6 +35,7 @@
 #include "qemu/timer.h"
 #include "hw/isa/isa.h"
 #include "hw/sysbus.h"
+#include "hw/block/block.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/sysemu.h"
@@ -52,6 +53,35 @@
         }                                                       \
     } while (0)
 
+
+/********************************************************/
+/* qdev floppy bus                                      */
+
+#define TYPE_FLOPPY_BUS "floppy-bus"
+#define FLOPPY_BUS(obj) OBJECT_CHECK(FloppyBus, (obj), TYPE_FLOPPY_BUS)
+
+typedef struct FDCtrl FDCtrl;
+typedef struct FDrive FDrive;
+static FDrive *get_drv(FDCtrl *fdctrl, int unit);
+
+typedef struct FloppyBus {
+    BusState bus;
+    FDCtrl *fdc;
+} FloppyBus;
+
+static const TypeInfo floppy_bus_info = {
+    .name = TYPE_FLOPPY_BUS,
+    .parent = TYPE_BUS,
+    .instance_size = sizeof(FloppyBus),
+};
+
+static void floppy_bus_create(FDCtrl *fdc, FloppyBus *bus, DeviceState *dev)
+{
+    qbus_create_inplace(bus, sizeof(FloppyBus), TYPE_FLOPPY_BUS, dev, NULL);
+    bus->fdc = fdc;
+}
+
+
 /********************************************************/
 /* Floppy drive emulation                               */
 
@@ -148,14 +178,12 @@ static FDriveSize drive_size(FloppyDriveType drive)
 #define FD_SECTOR_SC           2   /* Sector size code */
 #define FD_RESET_SENSEI_COUNT  4   /* Number of sense interrupts on RESET */
 
-typedef struct FDCtrl FDCtrl;
-
 /* Floppy disk drive emulation */
 typedef enum FDiskFlags {
     FDISK_DBL_SIDES  = 0x01,
 } FDiskFlags;
 
-typedef struct FDrive {
+struct FDrive {
     FDCtrl *fdctrl;
     BlockBackend *blk;
     /* Drive status */
@@ -176,7 +204,7 @@ typedef struct FDrive {
     uint8_t media_rate;       /* Data rate of medium    */
 
     bool media_validated;     /* Have we validated the media? */
-} FDrive;
+};
 
 
 static FloppyDriveType get_fallback_drive_type(FDrive *drv);
@@ -441,6 +469,135 @@ static void fd_revalidate(FDrive *drv)
     }
 }
 
+static void fd_change_cb(void *opaque, bool load)
+{
+    FDrive *drive = opaque;
+
+    drive->media_changed = 1;
+    drive->media_validated = false;
+    fd_revalidate(drive);
+}
+
+static const BlockDevOps fd_block_ops = {
+    .change_media_cb = fd_change_cb,
+};
+
+
+#define TYPE_FLOPPY_DRIVE "floppy"
+#define FLOPPY_DRIVE(obj) \
+     OBJECT_CHECK(FloppyDrive, (obj), TYPE_FLOPPY_DRIVE)
+
+typedef struct FloppyDrive {
+    DeviceState     qdev;
+    uint32_t        unit;
+    BlockConf       conf;
+    FloppyDriveType type;
+} FloppyDrive;
+
+static Property floppy_drive_properties[] = {
+    DEFINE_PROP_UINT32("unit", FloppyDrive, unit, -1),
+    DEFINE_BLOCK_PROPERTIES(FloppyDrive, conf),
+    DEFINE_PROP_DEFAULT("drive-type", FloppyDrive, type,
+                        FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
+                        FloppyDriveType),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static int floppy_drive_init(DeviceState *qdev)
+{
+    FloppyDrive *dev = FLOPPY_DRIVE(qdev);
+    FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
+    FDrive *drive;
+    int ret;
+
+    if (dev->unit == -1) {
+        for (dev->unit = 0; dev->unit < MAX_FD; dev->unit++) {
+            drive = get_drv(bus->fdc, dev->unit);
+            if (!drive->blk) {
+                break;
+            }
+        }
+    }
+
+    if (dev->unit >= MAX_FD) {
+        error_report("Can't create floppy unit %d, bus supports only %d units",
+                     dev->unit, MAX_FD);
+        return -1;
+    }
+
+    drive = get_drv(bus->fdc, dev->unit);
+    if (drive->blk) {
+        error_report("Floppy unit %d is in use", dev->unit);
+        return -1;
+    }
+
+    if (!dev->conf.blk) {
+        /* Anonymous BlockBackend for an empty drive */
+        dev->conf.blk = blk_new();
+        ret = blk_attach_dev(dev->conf.blk, qdev);
+        assert(ret == 0);
+    }
+
+    blkconf_blocksizes(&dev->conf);
+    if (dev->conf.logical_block_size != 512 ||
+        dev->conf.physical_block_size != 512)
+    {
+        error_report("Physical and logical block size must be 512 for floppy");
+        return -1;
+    }
+
+    /* rerror/werror aren't supported by fdc and therefore not even registered
+     * with qdev. So set the defaults manually before they are used in
+     * blkconf_apply_backend_options(). */
+    dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
+    dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
+    blkconf_apply_backend_options(&dev->conf);
+
+    /* 'enospc' is the default for -drive, 'report' is what blk_new() gives us
+     * for empty drives. */
+    if (blk_get_on_error(dev->conf.blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC &&
+        blk_get_on_error(dev->conf.blk, 0) != BLOCKDEV_ON_ERROR_REPORT) {
+        error_report("fdc doesn't support drive option werror");
+        return -1;
+    }
+    if (blk_get_on_error(dev->conf.blk, 1) != BLOCKDEV_ON_ERROR_REPORT) {
+        error_report("fdc doesn't support drive option rerror");
+        return -1;
+    }
+
+    drive->blk = dev->conf.blk;
+    drive->fdctrl = bus->fdc;
+
+    fd_init(drive);
+    blk_set_dev_ops(drive->blk, &fd_block_ops, drive);
+
+    /* Keep 'type' qdev property and FDrive->drive in sync */
+    drive->drive = dev->type;
+    pick_drive_type(drive);
+    dev->type = drive->drive;
+
+    fd_revalidate(drive);
+
+    return 0;
+}
+
+static void floppy_drive_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *k = DEVICE_CLASS(klass);
+    k->init = floppy_drive_init;
+    set_bit(DEVICE_CATEGORY_STORAGE, k->categories);
+    k->bus_type = TYPE_FLOPPY_BUS;
+    k->props = floppy_drive_properties;
+    k->desc = "virtual floppy drive";
+}
+
+static const TypeInfo floppy_drive_info = {
+    .name = TYPE_FLOPPY_DRIVE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(FloppyDrive),
+    .class_init = floppy_drive_class_init,
+};
+
 /********************************************************/
 /* Intel 82078 floppy disk controller emulation          */
 
@@ -684,14 +841,20 @@ struct FDCtrl {
     /* Power down config (also with status regB access mode */
     uint8_t pwrd;
     /* Floppy drives */
+    FloppyBus bus;
     uint8_t num_floppies;
     FDrive drives[MAX_FD];
+    struct {
+        BlockBackend *blk;
+        FloppyDriveType type;
+    } qdev_for_drives[MAX_FD];
     int reset_sensei;
     uint32_t check_media_rate;
     FloppyDriveType fallback; /* type=auto failure fallback */
     /* Timers state */
     uint8_t timer0;
     uint8_t timer1;
+    PortioList portio_list;
 };
 
 static FloppyDriveType get_fallback_drive_type(FDrive *drv)
@@ -1158,9 +1321,9 @@ static inline FDrive *drv3(FDCtrl *fdctrl)
 }
 #endif
 
-static FDrive *get_cur_drv(FDCtrl *fdctrl)
+static FDrive *get_drv(FDCtrl *fdctrl, int unit)
 {
-    switch (fdctrl->cur_drv) {
+    switch (unit) {
         case 0: return drv0(fdctrl);
         case 1: return drv1(fdctrl);
 #if MAX_FD == 4
@@ -1171,6 +1334,11 @@ static FDrive *get_cur_drv(FDCtrl *fdctrl)
     }
 }
 
+static FDrive *get_cur_drv(FDCtrl *fdctrl)
+{
+    return get_drv(fdctrl, fdctrl->cur_drv);
+}
+
 /* Status A register : 0x00 (read-only) */
 static uint32_t fdctrl_read_statusA(FDCtrl *fdctrl)
 {
@@ -2330,46 +2498,49 @@ static void fdctrl_result_timer(void *opaque)
     }
 }
 
-static void fdctrl_change_cb(void *opaque, bool load)
-{
-    FDrive *drive = opaque;
-
-    drive->media_changed = 1;
-    drive->media_validated = false;
-    fd_revalidate(drive);
-}
-
-static const BlockDevOps fdctrl_block_ops = {
-    .change_media_cb = fdctrl_change_cb,
-};
-
 /* Init functions */
-static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp)
+static void fdctrl_connect_drives(FDCtrl *fdctrl, Error **errp,
+                                  DeviceState *fdc_dev)
 {
     unsigned int i;
     FDrive *drive;
+    DeviceState *dev;
+    BlockBackend *blk;
+    Error *local_err = NULL;
 
     for (i = 0; i < MAX_FD; i++) {
         drive = &fdctrl->drives[i];
         drive->fdctrl = fdctrl;
 
-        if (drive->blk) {
-            if (blk_get_on_error(drive->blk, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
-                error_setg(errp, "fdc doesn't support drive option werror");
-                return;
-            }
-            if (blk_get_on_error(drive->blk, 1) != BLOCKDEV_ON_ERROR_REPORT) {
-                error_setg(errp, "fdc doesn't support drive option rerror");
-                return;
-            }
+        /* If the drive is not present, we skip creating the qdev device, but
+         * still have to initialise the controller. */
+        blk = fdctrl->qdev_for_drives[i].blk;
+        if (!blk) {
+            fd_init(drive);
+            fd_revalidate(drive);
+            continue;
+        }
+
+        dev = qdev_create(&fdctrl->bus.bus, "floppy");
+        qdev_prop_set_uint32(dev, "unit", i);
+        qdev_prop_set_enum(dev, "drive-type", fdctrl->qdev_for_drives[i].type);
+
+        blk_ref(blk);
+        blk_detach_dev(blk, fdc_dev);
+        fdctrl->qdev_for_drives[i].blk = NULL;
+        qdev_prop_set_drive(dev, "drive", blk, &local_err);
+        blk_unref(blk);
+
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
         }
 
-        fd_init(drive);
-        if (drive->blk) {
-            blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
-            pick_drive_type(drive);
+        object_property_set_bool(OBJECT(dev), true, "realized", &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
         }
-        fd_revalidate(drive);
     }
 }
 
@@ -2441,7 +2612,8 @@ void sun4m_fdctrl_init(qemu_irq irq, hwaddr io_base,
     *fdc_tc = qdev_get_gpio_in(dev, 0);
 }
 
-static void fdctrl_realize_common(FDCtrl *fdctrl, Error **errp)
+static void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl,
+                                  Error **errp)
 {
     int i, j;
     static int command_tables_inited = 0;
@@ -2479,7 +2651,9 @@ static void fdctrl_realize_common(FDCtrl *fdctrl, Error **errp)
         k->register_channel(fdctrl->dma, fdctrl->dma_chann,
                             &fdctrl_transfer_handler, fdctrl);
     }
-    fdctrl_connect_drives(fdctrl, errp);
+
+    floppy_bus_create(fdctrl, &fdctrl->bus, dev);
+    fdctrl_connect_drives(fdctrl, errp, dev);
 }
 
 static const MemoryRegionPortio fdc_portio_list[] = {
@@ -2495,7 +2669,8 @@ static void isabus_fdc_realize(DeviceState *dev, Error **errp)
     FDCtrl *fdctrl = &isa->state;
     Error *err = NULL;
 
-    isa_register_portio_list(isadev, isa->iobase, fdc_portio_list, fdctrl,
+    isa_register_portio_list(isadev, &fdctrl->portio_list,
+                             isa->iobase, fdc_portio_list, fdctrl,
                              "fdc");
 
     isa_init_irq(isadev, &fdctrl->irq, isa->irq);
@@ -2506,7 +2681,7 @@ static void isabus_fdc_realize(DeviceState *dev, Error **errp)
     }
 
     qdev_set_legacy_instance_id(dev, isa->iobase, 2);
-    fdctrl_realize_common(fdctrl, &err);
+    fdctrl_realize_common(dev, fdctrl, &err);
     if (err != NULL) {
         error_propagate(errp, err);
         return;
@@ -2557,7 +2732,7 @@ static void sysbus_fdc_common_realize(DeviceState *dev, Error **errp)
     FDCtrlSysBus *sys = SYSBUS_FDC(dev);
     FDCtrl *fdctrl = &sys->state;
 
-    fdctrl_realize_common(fdctrl, errp);
+    fdctrl_realize_common(dev, fdctrl, errp);
 }
 
 FloppyDriveType isa_fdc_get_drive_type(ISADevice *fdc, int i)
@@ -2604,14 +2779,14 @@ static Property isa_fdc_properties[] = {
     DEFINE_PROP_UINT32("iobase", FDCtrlISABus, iobase, 0x3f0),
     DEFINE_PROP_UINT32("irq", FDCtrlISABus, irq, 6),
     DEFINE_PROP_UINT32("dma", FDCtrlISABus, dma, 2),
-    DEFINE_PROP_DRIVE("driveA", FDCtrlISABus, state.drives[0].blk),
-    DEFINE_PROP_DRIVE("driveB", FDCtrlISABus, state.drives[1].blk),
+    DEFINE_PROP_DRIVE("driveA", FDCtrlISABus, state.qdev_for_drives[0].blk),
+    DEFINE_PROP_DRIVE("driveB", FDCtrlISABus, state.qdev_for_drives[1].blk),
     DEFINE_PROP_BIT("check_media_rate", FDCtrlISABus, state.check_media_rate,
                     0, true),
-    DEFINE_PROP_DEFAULT("fdtypeA", FDCtrlISABus, state.drives[0].drive,
+    DEFINE_PROP_DEFAULT("fdtypeA", FDCtrlISABus, state.qdev_for_drives[0].type,
                         FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
                         FloppyDriveType),
-    DEFINE_PROP_DEFAULT("fdtypeB", FDCtrlISABus, state.drives[1].drive,
+    DEFINE_PROP_DEFAULT("fdtypeB", FDCtrlISABus, state.qdev_for_drives[1].type,
                         FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
                         FloppyDriveType),
     DEFINE_PROP_DEFAULT("fallback", FDCtrlISABus, state.fallback,
@@ -2663,12 +2838,12 @@ static const VMStateDescription vmstate_sysbus_fdc ={
 };
 
 static Property sysbus_fdc_properties[] = {
-    DEFINE_PROP_DRIVE("driveA", FDCtrlSysBus, state.drives[0].blk),
-    DEFINE_PROP_DRIVE("driveB", FDCtrlSysBus, state.drives[1].blk),
-    DEFINE_PROP_DEFAULT("fdtypeA", FDCtrlSysBus, state.drives[0].drive,
+    DEFINE_PROP_DRIVE("driveA", FDCtrlSysBus, state.qdev_for_drives[0].blk),
+    DEFINE_PROP_DRIVE("driveB", FDCtrlSysBus, state.qdev_for_drives[1].blk),
+    DEFINE_PROP_DEFAULT("fdtypeA", FDCtrlSysBus, state.qdev_for_drives[0].type,
                         FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
                         FloppyDriveType),
-    DEFINE_PROP_DEFAULT("fdtypeB", FDCtrlSysBus, state.drives[1].drive,
+    DEFINE_PROP_DEFAULT("fdtypeB", FDCtrlSysBus, state.qdev_for_drives[1].type,
                         FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
                         FloppyDriveType),
     DEFINE_PROP_DEFAULT("fallback", FDCtrlISABus, state.fallback,
@@ -2693,8 +2868,8 @@ static const TypeInfo sysbus_fdc_info = {
 };
 
 static Property sun4m_fdc_properties[] = {
-    DEFINE_PROP_DRIVE("drive", FDCtrlSysBus, state.drives[0].blk),
-    DEFINE_PROP_DEFAULT("fdtype", FDCtrlSysBus, state.drives[0].drive,
+    DEFINE_PROP_DRIVE("drive", FDCtrlSysBus, state.qdev_for_drives[0].blk),
+    DEFINE_PROP_DEFAULT("fdtype", FDCtrlSysBus, state.qdev_for_drives[0].type,
                         FLOPPY_DRIVE_TYPE_AUTO, qdev_prop_fdc_drive_type,
                         FloppyDriveType),
     DEFINE_PROP_DEFAULT("fallback", FDCtrlISABus, state.fallback,
@@ -2742,6 +2917,8 @@ static void fdc_register_types(void)
     type_register_static(&sysbus_fdc_type_info);
     type_register_static(&sysbus_fdc_info);
     type_register_static(&sun4m_fdc_info);
+    type_register_static(&floppy_bus_info);
+    type_register_static(&floppy_drive_info);
 }
 
 type_init(fdc_register_types)
diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 9828ee61d5..d29ff4cb4f 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1189,9 +1189,9 @@ static Property m25p80_properties[] = {
 };
 
 static const VMStateDescription vmstate_m25p80 = {
-    .name = "xilinx_spi",
-    .version_id = 3,
-    .minimum_version_id = 1,
+    .name = "m25p80",
+    .version_id = 0,
+    .minimum_version_id = 0,
     .pre_save = m25p80_pre_save,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(state, Flash),
@@ -1200,20 +1200,19 @@ static const VMStateDescription vmstate_m25p80 = {
         VMSTATE_UINT32(pos, Flash),
         VMSTATE_UINT8(needed_bytes, Flash),
         VMSTATE_UINT8(cmd_in_progress, Flash),
-        VMSTATE_UNUSED(4),
         VMSTATE_UINT32(cur_addr, Flash),
         VMSTATE_BOOL(write_enable, Flash),
-        VMSTATE_BOOL_V(reset_enable, Flash, 2),
-        VMSTATE_UINT8_V(ear, Flash, 2),
-        VMSTATE_BOOL_V(four_bytes_address_mode, Flash, 2),
-        VMSTATE_UINT32_V(nonvolatile_cfg, Flash, 2),
-        VMSTATE_UINT32_V(volatile_cfg, Flash, 2),
-        VMSTATE_UINT32_V(enh_volatile_cfg, Flash, 2),
-        VMSTATE_BOOL_V(quad_enable, Flash, 3),
-        VMSTATE_UINT8_V(spansion_cr1nv, Flash, 3),
-        VMSTATE_UINT8_V(spansion_cr2nv, Flash, 3),
-        VMSTATE_UINT8_V(spansion_cr3nv, Flash, 3),
-        VMSTATE_UINT8_V(spansion_cr4nv, Flash, 3),
+        VMSTATE_BOOL(reset_enable, Flash),
+        VMSTATE_UINT8(ear, Flash),
+        VMSTATE_BOOL(four_bytes_address_mode, Flash),
+        VMSTATE_UINT32(nonvolatile_cfg, Flash),
+        VMSTATE_UINT32(volatile_cfg, Flash),
+        VMSTATE_UINT32(enh_volatile_cfg, Flash),
+        VMSTATE_BOOL(quad_enable, Flash),
+        VMSTATE_UINT8(spansion_cr1nv, Flash),
+        VMSTATE_UINT8(spansion_cr2nv, Flash),
+        VMSTATE_UINT8(spansion_cr3nv, Flash),
+        VMSTATE_UINT8(spansion_cr4nv, Flash),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index cef3bb42f1..d479fd22f5 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -258,8 +258,10 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
     req->has_sg = true;
     dma_acct_start(n->conf.blk, &req->acct, &req->qsg, acct);
     req->aiocb = is_write ?
-        dma_blk_write(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req) :
-        dma_blk_read(n->conf.blk, &req->qsg, data_offset, nvme_rw_cb, req);
+        dma_blk_write(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
+                      nvme_rw_cb, req) :
+        dma_blk_read(n->conf.blk, &req->qsg, data_offset, BDRV_SECTOR_SIZE,
+                     nvme_rw_cb, req);
 
     return NVME_NO_COMPLETE;
 }
@@ -373,7 +375,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
     if (!cqid || nvme_check_cqid(n, cqid)) {
         return NVME_INVALID_CQID | NVME_DNR;
     }
-    if (!sqid || (sqid && !nvme_check_sqid(n, sqid))) {
+    if (!sqid || !nvme_check_sqid(n, sqid)) {
         return NVME_INVALID_QID | NVME_DNR;
     }
     if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
@@ -447,7 +449,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
     uint16_t qflags = le16_to_cpu(c->cq_flags);
     uint64_t prp1 = le64_to_cpu(c->prp1);
 
-    if (!cqid || (cqid && !nvme_check_cqid(n, cqid))) {
+    if (!cqid || !nvme_check_cqid(n, cqid)) {
         return NVME_INVALID_CQID | NVME_DNR;
     }
     if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 331d7667ec..0c5fd27593 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -29,8 +29,8 @@
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 
-void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
-                             VirtIOBlockReq *req)
+static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
+                                    VirtIOBlockReq *req)
 {
     req->dev = s;
     req->vq = vq;
@@ -40,7 +40,7 @@ void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
     req->mr_next = NULL;
 }
 
-void virtio_blk_free_request(VirtIOBlockReq *req)
+static void virtio_blk_free_request(VirtIOBlockReq *req)
 {
     if (req) {
         g_free(req);
@@ -381,7 +381,7 @@ static int multireq_compare(const void *a, const void *b)
     }
 }
 
-void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
+static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
 {
     int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
     uint32_t max_transfer;
@@ -468,30 +468,32 @@ static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
     return true;
 }
 
-void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
+static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
     uint32_t type;
     struct iovec *in_iov = req->elem.in_sg;
     struct iovec *iov = req->elem.out_sg;
     unsigned in_num = req->elem.in_num;
     unsigned out_num = req->elem.out_num;
+    VirtIOBlock *s = req->dev;
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
     if (req->elem.out_num < 1 || req->elem.in_num < 1) {
-        error_report("virtio-blk missing headers");
-        exit(1);
+        virtio_error(vdev, "virtio-blk missing headers");
+        return -1;
     }
 
     if (unlikely(iov_to_buf(iov, out_num, 0, &req->out,
                             sizeof(req->out)) != sizeof(req->out))) {
-        error_report("virtio-blk request outhdr too short");
-        exit(1);
+        virtio_error(vdev, "virtio-blk request outhdr too short");
+        return -1;
     }
 
     iov_discard_front(&iov, &out_num, sizeof(req->out));
 
     if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
-        error_report("virtio-blk request inhdr too short");
-        exit(1);
+        virtio_error(vdev, "virtio-blk request inhdr too short");
+        return -1;
     }
 
     /* We always touch the last byte, so just see how big in_iov is.  */
@@ -529,7 +531,7 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
             block_acct_invalid(blk_get_stats(req->dev->blk),
                                is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
             virtio_blk_free_request(req);
-            return;
+            return 0;
         }
 
         block_acct_start(blk_get_stats(req->dev->blk),
@@ -576,6 +578,7 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
         virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
         virtio_blk_free_request(req);
     }
+    return 0;
 }
 
 void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
@@ -586,7 +589,11 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
     blk_io_plug(s->blk);
 
     while ((req = virtio_blk_get_request(s, vq))) {
-        virtio_blk_handle_request(req, &mrb);
+        if (virtio_blk_handle_request(req, &mrb)) {
+            virtqueue_detach_element(req->vq, &req->elem, 0);
+            virtio_blk_free_request(req);
+            break;
+        }
     }
 
     if (mrb.num_reqs) {
@@ -604,7 +611,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
         /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start
          * dataplane here instead of waiting for .set_status().
          */
-        virtio_blk_data_plane_start(s->dataplane);
+        virtio_device_start_ioeventfd(vdev);
         if (!s->dataplane_disabled) {
             return;
         }
@@ -625,7 +632,18 @@ static void virtio_blk_dma_restart_bh(void *opaque)
 
     while (req) {
         VirtIOBlockReq *next = req->next;
-        virtio_blk_handle_request(req, &mrb);
+        if (virtio_blk_handle_request(req, &mrb)) {
+            /* Device is now broken and won't do any processing until it gets
+             * reset. Already queued requests will be lost: let's purge them.
+             */
+            while (req) {
+                next = req->next;
+                virtqueue_detach_element(req->vq, &req->elem, 0);
+                virtio_blk_free_request(req);
+                req = next;
+            }
+            break;
+        }
         req = next;
     }
 
@@ -665,14 +683,13 @@ static void virtio_blk_reset(VirtIODevice *vdev)
     while (s->rq) {
         req = s->rq;
         s->rq = req->next;
+        virtqueue_detach_element(req->vq, &req->elem, 0);
         virtio_blk_free_request(req);
     }
 
-    if (s->dataplane) {
-        virtio_blk_data_plane_stop(s->dataplane);
-    }
     aio_context_release(ctx);
 
+    assert(!s->dataplane_started);
     blk_set_enable_write_cache(s->blk, s->original_wce);
 }
 
@@ -770,9 +787,8 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
 {
     VirtIOBlock *s = VIRTIO_BLK(vdev);
 
-    if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
-                                    VIRTIO_CONFIG_S_DRIVER_OK))) {
-        virtio_blk_data_plane_stop(s->dataplane);
+    if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) {
+        assert(!s->dataplane_started);
     }
 
     if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
@@ -803,13 +819,6 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
     }
 }
 
-static void virtio_blk_save(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-
-    virtio_save(vdev, f);
-}
-    
 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
     VirtIOBlock *s = VIRTIO_BLK(vdev);
@@ -828,14 +837,6 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
     qemu_put_sbyte(f, 0);
 }
 
-static int virtio_blk_load(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIOBlock *s = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(s);
-
-    return virtio_load(vdev, f, 2);
-}
-
 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
                                   int version_id)
 {
@@ -915,7 +916,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
     s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
 
     for (i = 0; i < conf->num_queues; i++) {
-        virtio_add_queue_aio(vdev, 128, virtio_blk_handle_output);
+        virtio_add_queue(vdev, 128, virtio_blk_handle_output);
     }
     virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
     if (err != NULL) {
@@ -956,7 +957,15 @@ static void virtio_blk_instance_init(Object *obj)
                                   DEVICE(obj), NULL);
 }
 
-VMSTATE_VIRTIO_DEVICE(blk, 2, virtio_blk_load, virtio_blk_save);
+static const VMStateDescription vmstate_virtio_blk = {
+    .name = "virtio-blk",
+    .minimum_version_id = 2,
+    .version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static Property virtio_blk_properties[] = {
     DEFINE_BLOCK_PROPERTIES(VirtIOBlock, conf.conf),
@@ -990,9 +999,11 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data)
     vdc->reset = virtio_blk_reset;
     vdc->save = virtio_blk_save_device;
     vdc->load = virtio_blk_load_device;
+    vdc->start_ioeventfd = virtio_blk_data_plane_start;
+    vdc->stop_ioeventfd = virtio_blk_data_plane_stop;
 }
 
-static const TypeInfo virtio_device_info = {
+static const TypeInfo virtio_blk_info = {
     .name = TYPE_VIRTIO_BLK,
     .parent = TYPE_VIRTIO_DEVICE,
     .instance_size = sizeof(VirtIOBlock),
@@ -1002,7 +1013,7 @@ static const TypeInfo virtio_device_info = {
 
 static void virtio_register_types(void)
 {
-    type_register_static(&virtio_device_info);
+    type_register_static(&virtio_blk_info);
 }
 
 type_init(virtio_register_types)
diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 3b8ad33fc5..456a2d5694 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -119,6 +119,9 @@ struct XenBlkDev {
     unsigned int        persistent_gnt_count;
     unsigned int        max_grants;
 
+    /* Grant copy */
+    gboolean            feature_grant_copy;
+
     /* qemu block driver */
     DriveInfo           *dinfo;
     BlockBackend        *blk;
@@ -164,12 +167,12 @@ static void destroy_grant(gpointer pgnt)
     xengnttab_handle *gnt = grant->blkdev->xendev.gnttabdev;
 
     if (xengnttab_unmap(gnt, grant->page, 1) != 0) {
-        xen_be_printf(&grant->blkdev->xendev, 0,
+        xen_pv_printf(&grant->blkdev->xendev, 0,
                       "xengnttab_unmap failed: %s\n",
                       strerror(errno));
     }
     grant->blkdev->persistent_gnt_count--;
-    xen_be_printf(&grant->blkdev->xendev, 3,
+    xen_pv_printf(&grant->blkdev->xendev, 3,
                   "unmapped grant %p\n", grant->page);
     g_free(grant);
 }
@@ -181,11 +184,11 @@ static void remove_persistent_region(gpointer data, gpointer dev)
     xengnttab_handle *gnt = blkdev->xendev.gnttabdev;
 
     if (xengnttab_unmap(gnt, region->addr, region->num) != 0) {
-        xen_be_printf(&blkdev->xendev, 0,
+        xen_pv_printf(&blkdev->xendev, 0,
                       "xengnttab_unmap region %p failed: %s\n",
                       region->addr, strerror(errno));
     }
-    xen_be_printf(&blkdev->xendev, 3,
+    xen_pv_printf(&blkdev->xendev, 3,
                   "unmapped grant region %p with %d pages\n",
                   region->addr, region->num);
     g_free(region);
@@ -252,7 +255,7 @@ static int ioreq_parse(struct ioreq *ioreq)
     size_t len;
     int i;
 
-    xen_be_printf(&blkdev->xendev, 3,
+    xen_pv_printf(&blkdev->xendev, 3,
                   "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
                   ioreq->req.operation, ioreq->req.nr_segments,
                   ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
@@ -272,28 +275,28 @@ static int ioreq_parse(struct ioreq *ioreq)
     case BLKIF_OP_DISCARD:
         return 0;
     default:
-        xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
+        xen_pv_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
                       ioreq->req.operation);
         goto err;
     };
 
     if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
-        xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
+        xen_pv_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
         goto err;
     }
 
     ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
     for (i = 0; i < ioreq->req.nr_segments; i++) {
         if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
-            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
+            xen_pv_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
             goto err;
         }
         if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
-            xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
+            xen_pv_printf(&blkdev->xendev, 0, "error: first > last sector\n");
             goto err;
         }
         if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
-            xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
+            xen_pv_printf(&blkdev->xendev, 0, "error: page crossing\n");
             goto err;
         }
 
@@ -305,7 +308,7 @@ static int ioreq_parse(struct ioreq *ioreq)
         qemu_iovec_add(&ioreq->v, (void*)mem, len);
     }
     if (ioreq->start + ioreq->v.size > blkdev->file_size) {
-        xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
+        xen_pv_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
         goto err;
     }
     return 0;
@@ -328,7 +331,7 @@ static void ioreq_unmap(struct ioreq *ioreq)
             return;
         }
         if (xengnttab_unmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
-            xen_be_printf(&ioreq->blkdev->xendev, 0,
+            xen_pv_printf(&ioreq->blkdev->xendev, 0,
                           "xengnttab_unmap failed: %s\n",
                           strerror(errno));
         }
@@ -340,7 +343,7 @@ static void ioreq_unmap(struct ioreq *ioreq)
                 continue;
             }
             if (xengnttab_unmap(gnt, ioreq->page[i], 1) != 0) {
-                xen_be_printf(&ioreq->blkdev->xendev, 0,
+                xen_pv_printf(&ioreq->blkdev->xendev, 0,
                               "xengnttab_unmap failed: %s\n",
                               strerror(errno));
             }
@@ -378,7 +381,7 @@ static int ioreq_map(struct ioreq *ioreq)
 
             if (grant != NULL) {
                 page[i] = grant->page;
-                xen_be_printf(&ioreq->blkdev->xendev, 3,
+                xen_pv_printf(&ioreq->blkdev->xendev, 3,
                               "using persistent-grant %" PRIu32 "\n",
                               ioreq->refs[i]);
             } else {
@@ -407,7 +410,7 @@ static int ioreq_map(struct ioreq *ioreq)
         ioreq->pages = xengnttab_map_grant_refs
             (gnt, new_maps, domids, refs, ioreq->prot);
         if (ioreq->pages == NULL) {
-            xen_be_printf(&ioreq->blkdev->xendev, 0,
+            xen_pv_printf(&ioreq->blkdev->xendev, 0,
                           "can't map %d grant refs (%s, %d maps)\n",
                           new_maps, strerror(errno), ioreq->blkdev->cnt_map);
             return -1;
@@ -423,7 +426,7 @@ static int ioreq_map(struct ioreq *ioreq)
             ioreq->page[i] = xengnttab_map_grant_ref
                 (gnt, domids[i], refs[i], ioreq->prot);
             if (ioreq->page[i] == NULL) {
-                xen_be_printf(&ioreq->blkdev->xendev, 0,
+                xen_pv_printf(&ioreq->blkdev->xendev, 0,
                               "can't map grant ref %d (%s, %d maps)\n",
                               refs[i], strerror(errno), ioreq->blkdev->cnt_map);
                 ioreq->mapped = 1;
@@ -471,7 +474,7 @@ static int ioreq_map(struct ioreq *ioreq)
                 grant->page = ioreq->page[new_maps];
             }
             grant->blkdev = ioreq->blkdev;
-            xen_be_printf(&ioreq->blkdev->xendev, 3,
+            xen_pv_printf(&ioreq->blkdev->xendev, 3,
                           "adding grant %" PRIu32 " page: %p\n",
                           refs[new_maps], grant->page);
             g_tree_insert(ioreq->blkdev->persistent_gnts,
@@ -489,6 +492,106 @@ static int ioreq_map(struct ioreq *ioreq)
     return 0;
 }
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 480
+
+static void ioreq_free_copy_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = NULL;
+    }
+
+    qemu_vfree(ioreq->pages);
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq)
+{
+    int i;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
+
+    for (i = 0; i < ioreq->v.niov; i++) {
+        ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
+        ioreq->v.iov[i].iov_base = ioreq->page[i];
+    }
+
+    return 0;
+}
+
+static int ioreq_grant_copy(struct ioreq *ioreq)
+{
+    xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
+    xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    int i, count, rc;
+    int64_t file_blk = ioreq->blkdev->file_blk;
+
+    if (ioreq->v.niov == 0) {
+        return 0;
+    }
+
+    count = ioreq->v.niov;
+
+    for (i = 0; i < count; i++) {
+        if (ioreq->req.operation == BLKIF_OP_READ) {
+            segs[i].flags = GNTCOPY_dest_gref;
+            segs[i].dest.foreign.ref = ioreq->refs[i];
+            segs[i].dest.foreign.domid = ioreq->domids[i];
+            segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].source.virt = ioreq->v.iov[i].iov_base;
+        } else {
+            segs[i].flags = GNTCOPY_source_gref;
+            segs[i].source.foreign.ref = ioreq->refs[i];
+            segs[i].source.foreign.domid = ioreq->domids[i];
+            segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+            segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+        }
+        segs[i].len = (ioreq->req.seg[i].last_sect
+                       - ioreq->req.seg[i].first_sect + 1) * file_blk;
+    }
+
+    rc = xengnttab_grant_copy(gnt, count, segs);
+
+    if (rc) {
+        xen_pv_printf(&ioreq->blkdev->xendev, 0,
+                      "failed to copy data %d\n", rc);
+        ioreq->aio_errors++;
+        return -1;
+    }
+
+    for (i = 0; i < count; i++) {
+        if (segs[i].status != GNTST_okay) {
+            xen_pv_printf(&ioreq->blkdev->xendev, 3,
+                          "failed to copy data %d for gref %d, domid %d\n",
+                          segs[i].status, ioreq->refs[i], ioreq->domids[i]);
+            ioreq->aio_errors++;
+            rc = -1;
+        }
+    }
+
+    return rc;
+}
+#else
+static void ioreq_free_copy_buffers(struct ioreq *ioreq)
+{
+    abort();
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq)
+{
+    abort();
+}
+
+static int ioreq_grant_copy(struct ioreq *ioreq)
+{
+    abort();
+}
+#endif
+
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
 
 static void qemu_aio_complete(void *opaque, int ret)
@@ -496,7 +599,7 @@ static void qemu_aio_complete(void *opaque, int ret)
     struct ioreq *ioreq = opaque;
 
     if (ret != 0) {
-        xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
+        xen_pv_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
                       ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
         ioreq->aio_errors++;
     }
@@ -511,8 +614,31 @@ static void qemu_aio_complete(void *opaque, int ret)
         return;
     }
 
+    if (ioreq->blkdev->feature_grant_copy) {
+        switch (ioreq->req.operation) {
+        case BLKIF_OP_READ:
+            /* in case of failure ioreq->aio_errors is increased */
+            if (ret == 0) {
+                ioreq_grant_copy(ioreq);
+            }
+            ioreq_free_copy_buffers(ioreq);
+            break;
+        case BLKIF_OP_WRITE:
+        case BLKIF_OP_FLUSH_DISKCACHE:
+            if (!ioreq->req.nr_segments) {
+                break;
+            }
+            ioreq_free_copy_buffers(ioreq);
+            break;
+        default:
+            break;
+        }
+    }
+
     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
-    ioreq_unmap(ioreq);
+    if (!ioreq->blkdev->feature_grant_copy) {
+        ioreq_unmap(ioreq);
+    }
     ioreq_finish(ioreq);
     switch (ioreq->req.operation) {
     case BLKIF_OP_WRITE:
@@ -534,12 +660,54 @@ static void qemu_aio_complete(void *opaque, int ret)
     qemu_bh_schedule(ioreq->blkdev->bh);
 }
 
+static bool blk_split_discard(struct ioreq *ioreq, blkif_sector_t sector_number,
+                              uint64_t nr_sectors)
+{
+    struct XenBlkDev *blkdev = ioreq->blkdev;
+    int64_t byte_offset;
+    int byte_chunk;
+    uint64_t byte_remaining, limit;
+    uint64_t sec_start = sector_number;
+    uint64_t sec_count = nr_sectors;
+
+    /* Wrap around, or overflowing byte limit? */
+    if (sec_start + sec_count < sec_count ||
+        sec_start + sec_count > INT64_MAX >> BDRV_SECTOR_BITS) {
+        return false;
+    }
+
+    limit = BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS;
+    byte_offset = sec_start << BDRV_SECTOR_BITS;
+    byte_remaining = sec_count << BDRV_SECTOR_BITS;
+
+    do {
+        byte_chunk = byte_remaining > limit ? limit : byte_remaining;
+        ioreq->aio_inflight++;
+        blk_aio_pdiscard(blkdev->blk, byte_offset, byte_chunk,
+                         qemu_aio_complete, ioreq);
+        byte_remaining -= byte_chunk;
+        byte_offset += byte_chunk;
+    } while (byte_remaining > 0);
+
+    return true;
+}
+
 static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
 {
     struct XenBlkDev *blkdev = ioreq->blkdev;
 
-    if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
-        goto err_no_map;
+    if (ioreq->blkdev->feature_grant_copy) {
+        ioreq_init_copy_buffers(ioreq);
+        if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
+            ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+            ioreq_grant_copy(ioreq)) {
+                ioreq_free_copy_buffers(ioreq);
+                goto err;
+        }
+    } else {
+        if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
+            goto err;
+        }
     }
 
     ioreq->aio_inflight++;
@@ -572,16 +740,17 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
         break;
     case BLKIF_OP_DISCARD:
     {
-        struct blkif_request_discard *discard_req = (void *)&ioreq->req;
-        ioreq->aio_inflight++;
-        blk_aio_pdiscard(blkdev->blk,
-                         discard_req->sector_number << BDRV_SECTOR_BITS,
-                         discard_req->nr_sectors << BDRV_SECTOR_BITS,
-                         qemu_aio_complete, ioreq);
+        struct blkif_request_discard *req = (void *)&ioreq->req;
+        if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) {
+            goto err;
+        }
         break;
     }
     default:
         /* unknown operation (shouldn't happen -- parse catches this) */
+        if (!ioreq->blkdev->feature_grant_copy) {
+            ioreq_unmap(ioreq);
+        }
         goto err;
     }
 
@@ -590,8 +759,6 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
     return 0;
 
 err:
-    ioreq_unmap(ioreq);
-err_no_map:
     ioreq_finish(ioreq);
     ioreq->status = BLKIF_RSP_ERROR;
     return -1;
@@ -659,7 +826,7 @@ static void blk_send_response_all(struct XenBlkDev *blkdev)
         ioreq_release(ioreq, true);
     }
     if (send_notify) {
-        xen_be_send_notify(&blkdev->xendev);
+        xen_pv_send_notify(&blkdev->xendev);
     }
 }
 
@@ -729,7 +896,7 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
             };
 
             if (blk_send_response_one(ioreq)) {
-                xen_be_send_notify(&blkdev->xendev);
+                xen_pv_send_notify(&blkdev->xendev);
             }
             ioreq_release(ioreq, false);
             continue;
@@ -773,7 +940,7 @@ static void blk_alloc(struct XenDevice *xendev)
     }
     if (xengnttab_set_max_grants(xendev->gnttabdev,
             MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
-        xen_be_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+        xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
                       strerror(errno));
     }
 }
@@ -919,11 +1086,11 @@ static int blk_connect(struct XenDevice *xendev)
         }
 
         /* setup via xenbus -> create new block driver instance */
-        xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
+        xen_pv_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
         blkdev->blk = blk_new_open(blkdev->filename, NULL, options,
                                    qflags, &local_err);
         if (!blkdev->blk) {
-            xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
+            xen_pv_printf(&blkdev->xendev, 0, "error: %s\n",
                           error_get_pretty(local_err));
             error_free(local_err);
             return -1;
@@ -931,10 +1098,11 @@ static int blk_connect(struct XenDevice *xendev)
         blk_set_enable_write_cache(blkdev->blk, !writethrough);
     } else {
         /* setup via qemu cmdline -> already setup for us */
-        xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
+        xen_pv_printf(&blkdev->xendev, 2,
+                      "get configured bdrv (cmdline setup)\n");
         blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo);
         if (blk_is_read_only(blkdev->blk) && !readonly) {
-            xen_be_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
+            xen_pv_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
             blkdev->blk = NULL;
             return -1;
         }
@@ -942,18 +1110,18 @@ static int blk_connect(struct XenDevice *xendev)
          * so we can blk_unref() unconditionally */
         blk_ref(blkdev->blk);
     }
-    blk_attach_dev_nofail(blkdev->blk, blkdev);
+    blk_attach_dev_legacy(blkdev->blk, blkdev);
     blkdev->file_size = blk_getlength(blkdev->blk);
     if (blkdev->file_size < 0) {
         BlockDriverState *bs = blk_bs(blkdev->blk);
         const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
-        xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
+        xen_pv_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
                       (int)blkdev->file_size, strerror(-blkdev->file_size),
                       drv_name ?: "-");
         blkdev->file_size = 0;
     }
 
-    xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
+    xen_pv_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
                   " size %" PRId64 " (%" PRId64 " MB)\n",
                   blkdev->type, blkdev->fileproto, blkdev->filename,
                   blkdev->file_size, blkdev->file_size >> 20);
@@ -976,14 +1144,16 @@ static int blk_connect(struct XenDevice *xendev)
         blkdev->feature_persistent = !!pers;
     }
 
-    blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
-    if (blkdev->xendev.protocol) {
-        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
-            blkdev->protocol = BLKIF_PROTOCOL_X86_32;
-        }
-        if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
-            blkdev->protocol = BLKIF_PROTOCOL_X86_64;
-        }
+    if (!blkdev->xendev.protocol) {
+        blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
+    } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_NATIVE) == 0) {
+        blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
+    } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
+        blkdev->protocol = BLKIF_PROTOCOL_X86_32;
+    } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
+        blkdev->protocol = BLKIF_PROTOCOL_X86_64;
+    } else {
+        blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
     }
 
     blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev,
@@ -1032,7 +1202,13 @@ static int blk_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(&blkdev->xendev);
 
-    xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+    blkdev->feature_grant_copy =
+                (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+    xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+                  blkdev->feature_grant_copy ? "enabled" : "disabled");
+
+    xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
                   "remote port %d, local port %d\n",
                   blkdev->xendev.protocol, blkdev->ring_ref,
                   blkdev->xendev.remote_port, blkdev->xendev.local_port);
@@ -1048,7 +1224,7 @@ static void blk_disconnect(struct XenDevice *xendev)
         blk_unref(blkdev->blk);
         blkdev->blk = NULL;
     }
-    xen_be_unbind_evtchn(&blkdev->xendev);
+    xen_pv_unbind_evtchn(&blkdev->xendev);
 
     if (blkdev->sring) {
         xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
diff --git a/hw/bt/hci-csr.c b/hw/bt/hci-csr.c
index d688372ca3..fbb3109cc1 100644
--- a/hw/bt/hci-csr.c
+++ b/hw/bt/hci-csr.c
@@ -78,15 +78,17 @@ enum {
 
 static inline void csrhci_fifo_wake(struct csrhci_s *s)
 {
+    CharBackend *be = s->chr.be;
+
     if (!s->enable || !s->out_len)
         return;
 
     /* XXX: Should wait for s->modem_state & CHR_TIOCM_RTS? */
-    if (s->chr.chr_can_read && s->chr.chr_can_read(s->chr.handler_opaque) &&
-                    s->chr.chr_read) {
-        s->chr.chr_read(s->chr.handler_opaque,
-                        s->outfifo + s->out_start ++, 1);
-        s->out_len --;
+    if (be && be->chr_can_read && be->chr_can_read(be->opaque) &&
+        be->chr_read) {
+        be->chr_read(be->opaque,
+                     s->outfifo + s->out_start++, 1);
+        s->out_len--;
         if (s->out_start >= s->out_size) {
             s->out_start = 0;
             s->out_size = FIFO_LEN;
@@ -458,7 +460,7 @@ qemu_irq *csrhci_pins_get(CharDriverState *chr)
     return s->pins;
 }
 
-CharDriverState *uart_hci_init(qemu_irq wakeup)
+CharDriverState *uart_hci_init(void)
 {
     struct csrhci_s *s = (struct csrhci_s *)
             g_malloc0(sizeof(struct csrhci_s));
@@ -466,7 +468,6 @@ CharDriverState *uart_hci_init(qemu_irq wakeup)
     s->chr.opaque = s;
     s->chr.chr_write = csrhci_write;
     s->chr.chr_ioctl = csrhci_ioctl;
-    s->chr.avail_connections = 1;
 
     s->hci = qemu_next_hci();
     s->hci->opaque = s;
diff --git a/hw/bt/hci.c b/hw/bt/hci.c
index 351123fab7..476ebec0ab 100644
--- a/hw/bt/hci.c
+++ b/hw/bt/hci.c
@@ -421,7 +421,7 @@ static void bt_submit_raw_acl(struct bt_piconet_s *net, int length, uint8_t *dat
 
 /* HCI layer emulation */
 
-/* Note: we could ignore endiannes because unswapped handles will still
+/* Note: we could ignore endianness because unswapped handles will still
  * be valid as connection identifiers for the guest - they don't have to
  * be continuously allocated.  We do it though, to preserve similar
  * behaviour between hosts.  Some things, like the BD_ADDR cannot be
diff --git a/hw/char/bcm2835_aux.c b/hw/char/bcm2835_aux.c
index 319f1652f6..4d46ad60ae 100644
--- a/hw/char/bcm2835_aux.c
+++ b/hw/char/bcm2835_aux.c
@@ -79,9 +79,7 @@ static uint64_t bcm2835_aux_read(void *opaque, hwaddr offset, unsigned size)
                 s->read_pos = 0;
             }
         }
-        if (s->chr) {
-            qemu_chr_accept_input(s->chr);
-        }
+        qemu_chr_fe_accept_input(&s->chr);
         bcm2835_aux_update(s);
         return c;
 
@@ -168,9 +166,9 @@ static void bcm2835_aux_write(void *opaque, hwaddr offset, uint64_t value,
     case AUX_MU_IO_REG:
         /* "DLAB bit set means access baudrate register" is NYI */
         ch = value;
-        if (s->chr) {
-            qemu_chr_fe_write(s->chr, &ch, 1);
-        }
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
         break;
 
     case AUX_MU_IER_REG:
@@ -280,10 +278,8 @@ static void bcm2835_aux_realize(DeviceState *dev, Error **errp)
 {
     BCM2835AuxState *s = BCM2835_AUX(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, bcm2835_aux_can_receive,
-                              bcm2835_aux_receive, NULL, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, bcm2835_aux_can_receive,
+                             bcm2835_aux_receive, NULL, s, NULL, true);
 }
 
 static Property bcm2835_aux_props[] = {
diff --git a/hw/char/cadence_uart.c b/hw/char/cadence_uart.c
index e3bc52f7df..0215d6518d 100644
--- a/hw/char/cadence_uart.c
+++ b/hw/char/cadence_uart.c
@@ -1,6 +1,11 @@
 /*
  * Device model for Cadence UART
  *
+ * Reference: Xilinx Zynq 7000 reference manual
+ *   - http://www.xilinx.com/support/documentation/user_guides/ug585-Zynq-7000-TRM.pdf
+ *   - Chapter 19 UART Controller
+ *   - Appendix B for Register details
+ *
  * Copyright (c) 2010 Xilinx Inc.
  * Copyright (c) 2012 Peter A.G. Crosthwaite (peter.crosthwaite@petalogix.com)
  * Copyright (c) 2012 PetaLogix Pty Ltd.
@@ -142,9 +147,7 @@ static void uart_rx_reset(CadenceUARTState *s)
 {
     s->rx_wpos = 0;
     s->rx_count = 0;
-    if (s->chr) {
-        qemu_chr_accept_input(s->chr);
-    }
+    qemu_chr_fe_accept_input(&s->chr);
 }
 
 static void uart_tx_reset(CadenceUARTState *s)
@@ -156,10 +159,8 @@ static void uart_send_breaks(CadenceUARTState *s)
 {
     int break_enabled = 1;
 
-    if (s->chr) {
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
-                                   &break_enabled);
-    }
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
+                      &break_enabled);
 }
 
 static void uart_parameters_setup(CadenceUARTState *s)
@@ -210,9 +211,7 @@ static void uart_parameters_setup(CadenceUARTState *s)
 
     packet_size += ssp.data_bits + ssp.stop_bits;
     s->char_tx_time = (NANOSECONDS_PER_SECOND / ssp.speed) * packet_size;
-    if (s->chr) {
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
-    }
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
 }
 
 static int uart_can_receive(void *opaque)
@@ -278,7 +277,7 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
     int ret;
 
     /* instant drain the fifo when there's no back-end */
-    if (!s->chr) {
+    if (!qemu_chr_fe_get_driver(&s->chr)) {
         s->tx_count = 0;
         return FALSE;
     }
@@ -287,7 +286,7 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
         return FALSE;
     }
 
-    ret = qemu_chr_fe_write(s->chr, s->tx_fifo, s->tx_count);
+    ret = qemu_chr_fe_write(&s->chr, s->tx_fifo, s->tx_count);
 
     if (ret >= 0) {
         s->tx_count -= ret;
@@ -295,7 +294,7 @@ static gboolean cadence_uart_xmit(GIOChannel *chan, GIOCondition cond,
     }
 
     if (s->tx_count) {
-        guint r = qemu_chr_fe_add_watch(s->chr, G_IO_OUT|G_IO_HUP,
+        guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
                                         cadence_uart_xmit, s);
         if (!r) {
             s->tx_count = 0;
@@ -368,9 +367,7 @@ static void uart_read_rx_fifo(CadenceUARTState *s, uint32_t *c)
         *c = s->rx_fifo[rx_rpos];
         s->rx_count--;
 
-        if (s->chr) {
-            qemu_chr_accept_input(s->chr);
-        }
+        qemu_chr_fe_accept_input(&s->chr);
     } else {
         *c = 0;
     }
@@ -410,6 +407,16 @@ static void uart_write(void *opaque, hwaddr offset,
             break;
         }
         break;
+    case R_BRGR: /* Baud rate generator */
+        if (value >= 0x01) {
+            s->r[offset] = value & 0xFFFF;
+        }
+        break;
+    case R_BDIV:    /* Baud rate divider */
+        if (value >= 0x04) {
+            s->r[offset] = value & 0xFF;
+        }
+        break;
     default:
         s->r[offset] = value;
     }
@@ -458,7 +465,8 @@ static void cadence_uart_reset(DeviceState *dev)
     s->r[R_IMR] = 0;
     s->r[R_CISR] = 0;
     s->r[R_RTRIG] = 0x00000020;
-    s->r[R_BRGR] = 0x0000000F;
+    s->r[R_BRGR] = 0x0000028B;
+    s->r[R_BDIV] = 0x0000000F;
     s->r[R_TTRIG] = 0x00000020;
 
     uart_rx_reset(s);
@@ -474,10 +482,8 @@ static void cadence_uart_realize(DeviceState *dev, Error **errp)
     s->fifo_trigger_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                           fifo_trigger_update, s);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, uart_can_receive, uart_receive,
-                              uart_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, uart_can_receive, uart_receive,
+                             uart_event, s, NULL, true);
 }
 
 static void cadence_uart_init(Object *obj)
diff --git a/hw/char/debugcon.c b/hw/char/debugcon.c
index e7f025ec67..80dce07e7f 100644
--- a/hw/char/debugcon.c
+++ b/hw/char/debugcon.c
@@ -39,7 +39,7 @@
 
 typedef struct DebugconState {
     MemoryRegion io;
-    CharDriverState *chr;
+    CharBackend chr;
     uint32_t readback;
 } DebugconState;
 
@@ -60,7 +60,9 @@ static void debugcon_ioport_write(void *opaque, hwaddr addr, uint64_t val,
     printf(" [debugcon: write addr=0x%04" HWADDR_PRIx " val=0x%02" PRIx64 "]\n", addr, val);
 #endif
 
-    qemu_chr_fe_write(s->chr, &ch, 1);
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(&s->chr, &ch, 1);
 }
 
 
@@ -85,12 +87,12 @@ static const MemoryRegionOps debugcon_ops = {
 
 static void debugcon_realize_core(DebugconState *s, Error **errp)
 {
-    if (!s->chr) {
+    if (!qemu_chr_fe_get_driver(&s->chr)) {
         error_setg(errp, "Can't create debugcon device, empty char device");
         return;
     }
 
-    qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, s);
+    qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, NULL, s, NULL, true);
 }
 
 static void debugcon_isa_realizefn(DeviceState *dev, Error **errp)
diff --git a/hw/char/digic-uart.c b/hw/char/digic-uart.c
index c7604e6766..029f5bbf5e 100644
--- a/hw/char/digic-uart.c
+++ b/hw/char/digic-uart.c
@@ -76,9 +76,9 @@ static void digic_uart_write(void *opaque, hwaddr addr, uint64_t value,
 
     switch (addr) {
     case R_TX:
-        if (s->chr) {
-            qemu_chr_fe_write_all(s->chr, &ch, 1);
-        }
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
         break;
 
     case R_ST:
@@ -145,9 +145,8 @@ static void digic_uart_realize(DeviceState *dev, Error **errp)
 {
     DigicUartState *s = DIGIC_UART(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
+                             uart_event, s, NULL, true);
 }
 
 static void digic_uart_init(Object *obj)
diff --git a/hw/char/escc.c b/hw/char/escc.c
index 31a5f902f9..d6662dc77d 100644
--- a/hw/char/escc.c
+++ b/hw/char/escc.c
@@ -88,7 +88,7 @@ typedef struct ChannelState {
     uint32_t reg;
     uint8_t wregs[SERIAL_REGS], rregs[SERIAL_REGS];
     SERIOQueue queue;
-    CharDriverState *chr;
+    CharBackend chr;
     int e0_mode, led_mode, caps_lock_mode, num_lock_mode;
     int disabled;
     int clock;
@@ -416,7 +416,7 @@ static void escc_update_parameters(ChannelState *s)
     int speed, parity, data_bits, stop_bits;
     QEMUSerialSetParams ssp;
 
-    if (!s->chr || s->type != ser)
+    if (!qemu_chr_fe_get_driver(&s->chr) || s->type != ser)
         return;
 
     if (s->wregs[W_TXCTRL1] & TXCTRL1_PAREN) {
@@ -466,7 +466,7 @@ static void escc_update_parameters(ChannelState *s)
     ssp.data_bits = data_bits;
     ssp.stop_bits = stop_bits;
     trace_escc_update_parameters(CHN_C(s), speed, parity, data_bits, stop_bits);
-    qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
 }
 
 static void escc_mem_write(void *opaque, hwaddr addr,
@@ -556,9 +556,11 @@ static void escc_mem_write(void *opaque, hwaddr addr,
         trace_escc_mem_writeb_data(CHN_C(s), val);
         s->tx = val;
         if (s->wregs[W_TXCTRL2] & TXCTRL2_TXEN) { // tx enabled
-            if (s->chr)
-                qemu_chr_fe_write(s->chr, &s->tx, 1);
-            else if (s->type == kbd && !s->disabled) {
+            if (qemu_chr_fe_get_driver(&s->chr)) {
+                /* XXX this blocks entire thread. Rewrite to use
+                 * qemu_chr_fe_write and background I/O callbacks */
+                qemu_chr_fe_write_all(&s->chr, &s->tx, 1);
+            } else if (s->type == kbd && !s->disabled) {
                 handle_kbd_command(s, val);
             }
         }
@@ -597,8 +599,7 @@ static uint64_t escc_mem_read(void *opaque, hwaddr addr,
         else
             ret = s->rx;
         trace_escc_mem_readb_data(CHN_C(s), ret);
-        if (s->chr)
-            qemu_chr_accept_input(s->chr);
+        qemu_chr_fe_accept_input(&s->chr);
         return ret;
     default:
         break;
@@ -1011,10 +1012,11 @@ static void escc_realize(DeviceState *dev, Error **errp)
                           ESCC_SIZE << s->it_shift);
 
     for (i = 0; i < 2; i++) {
-        if (s->chn[i].chr) {
+        if (qemu_chr_fe_get_driver(&s->chn[i].chr)) {
             s->chn[i].clock = s->frequency / 2;
-            qemu_chr_add_handlers(s->chn[i].chr, serial_can_receive,
-                                  serial_receive1, serial_event, &s->chn[i]);
+            qemu_chr_fe_set_handlers(&s->chn[i].chr, serial_can_receive,
+                                     serial_receive1, serial_event,
+                                     &s->chn[i], NULL, true);
         }
     }
 
diff --git a/hw/char/etraxfs_ser.c b/hw/char/etraxfs_ser.c
index 04ca04fe2c..54383878e0 100644
--- a/hw/char/etraxfs_ser.c
+++ b/hw/char/etraxfs_ser.c
@@ -53,7 +53,7 @@ typedef struct ETRAXSerial {
     SysBusDevice parent_obj;
 
     MemoryRegion mmio;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     int pending_tx;
@@ -126,7 +126,9 @@ ser_write(void *opaque, hwaddr addr,
     switch (addr)
     {
         case RW_DOUT:
-            qemu_chr_fe_write(s->chr, &ch, 1);
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
             s->regs[R_INTR] |= 3;
             s->pending_tx = 1;
             s->regs[addr] = value;
@@ -229,11 +231,9 @@ static void etraxfs_ser_realize(DeviceState *dev, Error **errp)
 {
     ETRAXSerial *s = ETRAX_SERIAL(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr,
-                              serial_can_receive, serial_receive,
-                              serial_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr,
+                             serial_can_receive, serial_receive,
+                             serial_event, s, NULL, true);
 }
 
 static void etraxfs_ser_class_init(ObjectClass *klass, void *data)
diff --git a/hw/char/exynos4210_uart.c b/hw/char/exynos4210_uart.c
index 885ecc027b..571c324004 100644
--- a/hw/char/exynos4210_uart.c
+++ b/hw/char/exynos4210_uart.c
@@ -181,7 +181,7 @@ typedef struct Exynos4210UartState {
     Exynos4210UartFIFO   rx;
     Exynos4210UartFIFO   tx;
 
-    CharDriverState  *chr;
+    CharBackend       chr;
     qemu_irq          irq;
 
     uint32_t channel;
@@ -346,7 +346,7 @@ static void exynos4210_uart_update_parameters(Exynos4210UartState *s)
     ssp.data_bits = data_bits;
     ssp.stop_bits = stop_bits;
 
-    qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
 
     PRINT_DEBUG("UART%d: speed: %d, parity: %c, data: %d, stop: %d\n",
                 s->channel, speed, parity, data_bits, stop_bits);
@@ -383,11 +383,13 @@ static void exynos4210_uart_write(void *opaque, hwaddr offset,
         break;
 
     case UTXH:
-        if (s->chr) {
+        if (qemu_chr_fe_get_driver(&s->chr)) {
             s->reg[I_(UTRSTAT)] &= ~(UTRSTAT_TRANSMITTER_EMPTY |
                     UTRSTAT_Tx_BUFFER_EMPTY);
             ch = (uint8_t)val;
-            qemu_chr_fe_write(s->chr, &ch, 1);
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
 #if DEBUG_Tx_DATA
             fprintf(stderr, "%c", ch);
 #endif
@@ -604,7 +606,7 @@ DeviceState *exynos4210_uart_create(hwaddr addr,
         chr = serial_hds[channel];
         if (!chr) {
             snprintf(label, ARRAY_SIZE(label), "%s%d", chr_name, channel);
-            chr = qemu_chr_new(label, "null", NULL);
+            chr = qemu_chr_new(label, "null");
             if (!(chr)) {
                 error_report("Can't assign serial port to UART%d", channel);
                 exit(1);
@@ -638,8 +640,9 @@ static int exynos4210_uart_init(SysBusDevice *dev)
 
     sysbus_init_irq(dev, &s->irq);
 
-    qemu_chr_add_handlers(s->chr, exynos4210_uart_can_receive,
-                          exynos4210_uart_receive, exynos4210_uart_event, s);
+    qemu_chr_fe_set_handlers(&s->chr, exynos4210_uart_can_receive,
+                             exynos4210_uart_receive, exynos4210_uart_event,
+                             s, NULL, true);
 
     return 0;
 }
diff --git a/hw/char/grlib_apbuart.c b/hw/char/grlib_apbuart.c
index 871524c82f..db686e6a6f 100644
--- a/hw/char/grlib_apbuart.c
+++ b/hw/char/grlib_apbuart.c
@@ -78,7 +78,7 @@ typedef struct UART {
     MemoryRegion iomem;
     qemu_irq irq;
 
-    CharDriverState *chr;
+    CharBackend chr;
 
     /* registers */
     uint32_t status;
@@ -201,9 +201,12 @@ static void grlib_apbuart_write(void *opaque, hwaddr addr,
     case DATA_OFFSET:
     case DATA_OFFSET + 3:       /* When only one byte write */
         /* Transmit when character device available and transmitter enabled */
-        if ((uart->chr) && (uart->control & UART_TRANSMIT_ENABLE)) {
+        if (qemu_chr_fe_get_driver(&uart->chr) &&
+            (uart->control & UART_TRANSMIT_ENABLE)) {
             c = value & 0xFF;
-            qemu_chr_fe_write(uart->chr, &c, 1);
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&uart->chr, &c, 1);
             /* Generate interrupt */
             if (uart->control & UART_TRANSMIT_INTERRUPT) {
                 qemu_irq_pulse(uart->irq);
@@ -240,11 +243,11 @@ static int grlib_apbuart_init(SysBusDevice *dev)
 {
     UART *uart = GRLIB_APB_UART(dev);
 
-    qemu_chr_add_handlers(uart->chr,
-                          grlib_apbuart_can_receive,
-                          grlib_apbuart_receive,
-                          grlib_apbuart_event,
-                          uart);
+    qemu_chr_fe_set_handlers(&uart->chr,
+                             grlib_apbuart_can_receive,
+                             grlib_apbuart_receive,
+                             grlib_apbuart_event,
+                             uart, NULL, true);
 
     sysbus_init_irq(dev, &uart->irq);
 
diff --git a/hw/char/imx_serial.c b/hw/char/imx_serial.c
index 44856d671e..99545fc359 100644
--- a/hw/char/imx_serial.c
+++ b/hw/char/imx_serial.c
@@ -121,9 +121,7 @@ static uint64_t imx_serial_read(void *opaque, hwaddr offset,
             s->usr2 &= ~USR2_RDR;
             s->uts1 |= UTS1_RXEMPTY;
             imx_update(s);
-            if (s->chr) {
-                qemu_chr_accept_input(s->chr);
-            }
+            qemu_chr_fe_accept_input(&s->chr);
         }
         return c;
 
@@ -172,18 +170,19 @@ static void imx_serial_write(void *opaque, hwaddr offset,
                              uint64_t value, unsigned size)
 {
     IMXSerialState *s = (IMXSerialState *)opaque;
+    CharDriverState *chr = qemu_chr_fe_get_driver(&s->chr);
     unsigned char ch;
 
     DPRINTF("write(offset=0x%" HWADDR_PRIx ", value = 0x%x) to %s\n",
-            offset, (unsigned int)value, s->chr ? s->chr->label : "NODEV");
+            offset, (unsigned int)value, chr ? chr->label : "NODEV");
 
     switch (offset >> 2) {
     case 0x10: /* UTXD */
         ch = value;
         if (s->ucr2 & UCR2_TXEN) {
-            if (s->chr) {
-                qemu_chr_fe_write(s->chr, &ch, 1);
-            }
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
             s->usr1 &= ~USR1_TRDY;
             imx_update(s);
             s->usr1 |= USR1_TRDY;
@@ -212,9 +211,7 @@ static void imx_serial_write(void *opaque, hwaddr offset,
         }
         if (value & UCR2_RXEN) {
             if (!(s->ucr2 & UCR2_RXEN)) {
-                if (s->chr) {
-                    qemu_chr_accept_input(s->chr);
-                }
+                qemu_chr_fe_accept_input(&s->chr);
             }
         }
         s->ucr2 = value & 0xffff;
@@ -316,12 +313,10 @@ static void imx_serial_realize(DeviceState *dev, Error **errp)
 {
     IMXSerialState *s = IMX_SERIAL(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, imx_can_receive, imx_receive,
-                              imx_event, s);
-    } else {
-        DPRINTF("No char dev for uart\n");
-    }
+    DPRINTF("char dev for uart: %p\n", qemu_chr_fe_get_driver(&s->chr));
+
+    qemu_chr_fe_set_handlers(&s->chr, imx_can_receive, imx_receive,
+                             imx_event, s, NULL, true);
 }
 
 static void imx_serial_init(Object *obj)
diff --git a/hw/char/ipoctal232.c b/hw/char/ipoctal232.c
index 9ead32af60..93929c2880 100644
--- a/hw/char/ipoctal232.c
+++ b/hw/char/ipoctal232.c
@@ -93,7 +93,7 @@ typedef struct SCC2698Block SCC2698Block;
 
 struct SCC2698Channel {
     IPOctalState *ipoctal;
-    CharDriverState *dev;
+    CharBackend dev;
     bool rx_enabled;
     uint8_t mr[2];
     uint8_t mr_idx;
@@ -288,9 +288,7 @@ static uint16_t io_read(IPackDevice *ip, uint8_t addr)
             if (ch->rx_pending == 0) {
                 ch->sr &= ~SR_RXRDY;
                 blk->isr &= ~ISR_RXRDY(channel);
-                if (ch->dev) {
-                    qemu_chr_accept_input(ch->dev);
-                }
+                qemu_chr_fe_accept_input(&ch->dev);
             } else {
                 ch->rhr_idx = (ch->rhr_idx + 1) % RX_FIFO_SIZE;
             }
@@ -357,11 +355,11 @@ static void io_write(IPackDevice *ip, uint8_t addr, uint16_t val)
     case REG_THRa:
     case REG_THRb:
         if (ch->sr & SR_TXRDY) {
+            uint8_t thr = reg;
             DPRINTF("Write THR%c (0x%x)\n", channel + 'a', reg);
-            if (ch->dev) {
-                uint8_t thr = reg;
-                qemu_chr_fe_write(ch->dev, &thr, 1);
-            }
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&ch->dev, &thr, 1);
         } else {
             DPRINTF("Write THR%c (0x%x), Tx disabled\n", channel + 'a', reg);
         }
@@ -544,9 +542,10 @@ static void ipoctal_realize(DeviceState *dev, Error **errp)
         ch->ipoctal = s;
 
         /* Redirect IP-Octal channels to host character devices */
-        if (ch->dev) {
-            qemu_chr_add_handlers(ch->dev, hostdev_can_receive,
-                                  hostdev_receive, hostdev_event, ch);
+        if (qemu_chr_fe_get_driver(&ch->dev)) {
+            qemu_chr_fe_set_handlers(&ch->dev, hostdev_can_receive,
+                                     hostdev_receive, hostdev_event,
+                                     ch, NULL, true);
             DPRINTF("Redirecting channel %u to %s\n", i, ch->dev->label);
         } else {
             DPRINTF("Could not redirect channel %u, no chardev set\n", i);
diff --git a/hw/char/lm32_juart.c b/hw/char/lm32_juart.c
index 28c2cf702d..f8c1e0d076 100644
--- a/hw/char/lm32_juart.c
+++ b/hw/char/lm32_juart.c
@@ -44,7 +44,7 @@ enum {
 struct LM32JuartState {
     SysBusDevice parent_obj;
 
-    CharDriverState *chr;
+    CharBackend chr;
 
     uint32_t jtx;
     uint32_t jrx;
@@ -75,9 +75,9 @@ void lm32_juart_set_jtx(DeviceState *d, uint32_t jtx)
     trace_lm32_juart_set_jtx(s->jtx);
 
     s->jtx = jtx;
-    if (s->chr) {
-        qemu_chr_fe_write_all(s->chr, &ch, 1);
-    }
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(&s->chr, &ch, 1);
 }
 
 void lm32_juart_set_jrx(DeviceState *d, uint32_t jtx)
@@ -118,9 +118,8 @@ static void lm32_juart_realize(DeviceState *dev, Error **errp)
 {
     LM32JuartState *s = LM32_JUART(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, juart_can_rx, juart_rx, juart_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, juart_can_rx, juart_rx,
+                             juart_event, s, NULL, true);
 }
 
 static const VMStateDescription vmstate_lm32_juart = {
diff --git a/hw/char/lm32_uart.c b/hw/char/lm32_uart.c
index b5c760dda3..7f3597c4b0 100644
--- a/hw/char/lm32_uart.c
+++ b/hw/char/lm32_uart.c
@@ -97,7 +97,7 @@ struct LM32UartState {
     SysBusDevice parent_obj;
 
     MemoryRegion iomem;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     uint32_t regs[R_MAX];
@@ -142,7 +142,7 @@ static uint64_t uart_read(void *opaque, hwaddr addr,
         r = s->regs[R_RXTX];
         s->regs[R_LSR] &= ~LSR_DR;
         uart_update_irq(s);
-        qemu_chr_accept_input(s->chr);
+        qemu_chr_fe_accept_input(&s->chr);
         break;
     case R_IIR:
     case R_LSR:
@@ -177,9 +177,9 @@ static void uart_write(void *opaque, hwaddr addr,
     addr >>= 2;
     switch (addr) {
     case R_RXTX:
-        if (s->chr) {
-            qemu_chr_fe_write_all(s->chr, &ch, 1);
-        }
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
         break;
     case R_IER:
     case R_LCR:
@@ -265,9 +265,8 @@ static void lm32_uart_realize(DeviceState *dev, Error **errp)
 {
     LM32UartState *s = LM32_UART(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
+                             uart_event, s, NULL, true);
 }
 
 static const VMStateDescription vmstate_lm32_uart = {
diff --git a/hw/char/mcf_uart.c b/hw/char/mcf_uart.c
index 3c0438fd79..ecaa091190 100644
--- a/hw/char/mcf_uart.c
+++ b/hw/char/mcf_uart.c
@@ -10,6 +10,7 @@
 #include "hw/m68k/mcf.h"
 #include "sysemu/char.h"
 #include "exec/address-spaces.h"
+#include "qapi/error.h"
 
 typedef struct {
     MemoryRegion iomem;
@@ -26,7 +27,7 @@ typedef struct {
     int tx_enabled;
     int rx_enabled;
     qemu_irq irq;
-    CharDriverState *chr;
+    CharBackend chr;
 } mcf_uart_state;
 
 /* UART Status Register bits.  */
@@ -92,7 +93,7 @@ uint64_t mcf_uart_read(void *opaque, hwaddr addr,
             if (s->fifo_len == 0)
                 s->sr &= ~MCF_UART_RxRDY;
             mcf_uart_update(s);
-            qemu_chr_accept_input(s->chr);
+            qemu_chr_fe_accept_input(&s->chr);
             return val;
         }
     case 0x10:
@@ -113,8 +114,9 @@ uint64_t mcf_uart_read(void *opaque, hwaddr addr,
 static void mcf_uart_do_tx(mcf_uart_state *s)
 {
     if (s->tx_enabled && (s->sr & MCF_UART_TxEMP) == 0) {
-        if (s->chr)
-            qemu_chr_fe_write(s->chr, (unsigned char *)&s->tb, 1);
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, (unsigned char *)&s->tb, 1);
         s->sr |= MCF_UART_TxEMP;
     }
     if (s->tx_enabled) {
@@ -278,12 +280,12 @@ void *mcf_uart_init(qemu_irq irq, CharDriverState *chr)
     mcf_uart_state *s;
 
     s = g_malloc0(sizeof(mcf_uart_state));
-    s->chr = chr;
     s->irq = irq;
     if (chr) {
-        qemu_chr_fe_claim_no_fail(chr);
-        qemu_chr_add_handlers(chr, mcf_uart_can_receive, mcf_uart_receive,
-                              mcf_uart_event, s);
+        qemu_chr_fe_init(&s->chr, chr, &error_abort);
+        qemu_chr_fe_set_handlers(&s->chr, mcf_uart_can_receive,
+                                 mcf_uart_receive, mcf_uart_event,
+                                 s, NULL, true);
     }
     mcf_uart_reset(s);
     return s;
diff --git a/hw/char/milkymist-uart.c b/hw/char/milkymist-uart.c
index baddb37648..ae8e2f3554 100644
--- a/hw/char/milkymist-uart.c
+++ b/hw/char/milkymist-uart.c
@@ -61,7 +61,7 @@ struct MilkymistUartState {
     SysBusDevice parent_obj;
 
     MemoryRegion regs_region;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     uint32_t regs[R_MAX];
@@ -124,9 +124,7 @@ static void uart_write(void *opaque, hwaddr addr, uint64_t value,
     addr >>= 2;
     switch (addr) {
     case R_RXTX:
-        if (s->chr) {
-            qemu_chr_fe_write_all(s->chr, &ch, 1);
-        }
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
         s->regs[R_STAT] |= STAT_TX_EVT;
         break;
     case R_DIV:
@@ -138,7 +136,7 @@ static void uart_write(void *opaque, hwaddr addr, uint64_t value,
     case R_STAT:
         /* write one to clear bits */
         s->regs[addr] &= ~(value & (STAT_RX_EVT | STAT_TX_EVT));
-        qemu_chr_accept_input(s->chr);
+        qemu_chr_fe_accept_input(&s->chr);
         break;
 
     default:
@@ -200,9 +198,8 @@ static void milkymist_uart_realize(DeviceState *dev, Error **errp)
 {
     MilkymistUartState *s = MILKYMIST_UART(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
+                             uart_event, s, NULL, true);
 }
 
 static void milkymist_uart_init(Object *obj)
diff --git a/hw/char/omap_uart.c b/hw/char/omap_uart.c
index 415bec5fac..893ab108bc 100644
--- a/hw/char/omap_uart.c
+++ b/hw/char/omap_uart.c
@@ -63,7 +63,7 @@ struct omap_uart_s *omap_uart_init(hwaddr base,
     s->irq = irq;
     s->serial = serial_mm_init(get_system_memory(), base, 2, irq,
                                omap_clk_getrate(fclk)/16,
-                               chr ?: qemu_chr_new(label, "null", NULL),
+                               chr ?: qemu_chr_new(label, "null"),
                                DEVICE_NATIVE_ENDIAN);
     return s;
 }
@@ -183,6 +183,6 @@ void omap_uart_attach(struct omap_uart_s *s, CharDriverState *chr)
     /* TODO: Should reuse or destroy current s->serial */
     s->serial = serial_mm_init(get_system_memory(), s->base, 2, s->irq,
                                omap_clk_getrate(s->fclk) / 16,
-                               chr ?: qemu_chr_new("null", "null", NULL),
+                               chr ?: qemu_chr_new("null", "null"),
                                DEVICE_NATIVE_ENDIAN);
 }
diff --git a/hw/char/parallel.c b/hw/char/parallel.c
index 11c78fed88..f2d56666b7 100644
--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
@@ -74,12 +74,13 @@ typedef struct ParallelState {
     uint8_t control;
     qemu_irq irq;
     int irq_pending;
-    CharDriverState *chr;
+    CharBackend chr;
     int hw_driver;
     int epp_timeout;
     uint32_t last_read_offset; /* For debugging */
     /* Memory-mapped interface */
     int it_shift;
+    PortioList portio_list;
 } ParallelState;
 
 #define TYPE_ISA_PARALLEL "isa-parallel"
@@ -128,7 +129,9 @@ parallel_ioport_write_sw(void *opaque, uint32_t addr, uint32_t val)
             if (val & PARA_CTR_STROBE) {
                 s->status &= ~PARA_STS_BUSY;
                 if ((s->control & PARA_CTR_STROBE) == 0)
-                    qemu_chr_fe_write(s->chr, &s->dataw, 1);
+                    /* XXX this blocks entire thread. Rewrite to use
+                     * qemu_chr_fe_write and background I/O callbacks */
+                    qemu_chr_fe_write_all(&s->chr, &s->dataw, 1);
             } else {
                 if (s->control & PARA_CTR_INTEN) {
                     s->irq_pending = 1;
@@ -158,7 +161,7 @@ static void parallel_ioport_write_hw(void *opaque, uint32_t addr, uint32_t val)
         if (s->dataw == val)
             return;
         pdebug("wd%02x\n", val);
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_WRITE_DATA, &parm);
+        qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_WRITE_DATA, &parm);
         s->dataw = val;
         break;
     case PARA_REG_STS:
@@ -178,11 +181,11 @@ static void parallel_ioport_write_hw(void *opaque, uint32_t addr, uint32_t val)
             } else {
                 dir = 0;
             }
-            qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_DATA_DIR, &dir);
+            qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_DATA_DIR, &dir);
             parm &= ~PARA_CTR_DIR;
         }
 
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_WRITE_CONTROL, &parm);
+        qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_WRITE_CONTROL, &parm);
         s->control = val;
         break;
     case PARA_REG_EPP_ADDR:
@@ -191,7 +194,8 @@ static void parallel_ioport_write_hw(void *opaque, uint32_t addr, uint32_t val)
             pdebug("wa%02x s\n", val);
         else {
             struct ParallelIOArg ioarg = { .buffer = &parm, .count = 1 };
-            if (qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_WRITE_ADDR, &ioarg)) {
+            if (qemu_chr_fe_ioctl(&s->chr,
+                                  CHR_IOCTL_PP_EPP_WRITE_ADDR, &ioarg)) {
                 s->epp_timeout = 1;
                 pdebug("wa%02x t\n", val);
             }
@@ -205,7 +209,7 @@ static void parallel_ioport_write_hw(void *opaque, uint32_t addr, uint32_t val)
             pdebug("we%02x s\n", val);
         else {
             struct ParallelIOArg ioarg = { .buffer = &parm, .count = 1 };
-            if (qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg)) {
+            if (qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg)) {
                 s->epp_timeout = 1;
                 pdebug("we%02x t\n", val);
             }
@@ -230,7 +234,7 @@ parallel_ioport_eppdata_write_hw2(void *opaque, uint32_t addr, uint32_t val)
         pdebug("we%04x s\n", val);
         return;
     }
-    err = qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg);
+    err = qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg);
     if (err) {
         s->epp_timeout = 1;
         pdebug("we%04x t\n", val);
@@ -253,7 +257,7 @@ parallel_ioport_eppdata_write_hw4(void *opaque, uint32_t addr, uint32_t val)
         pdebug("we%08x s\n", val);
         return;
     }
-    err = qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg);
+    err = qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_WRITE, &ioarg);
     if (err) {
         s->epp_timeout = 1;
         pdebug("we%08x t\n", val);
@@ -305,13 +309,13 @@ static uint32_t parallel_ioport_read_hw(void *opaque, uint32_t addr)
     addr &= 7;
     switch(addr) {
     case PARA_REG_DATA:
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_READ_DATA, &ret);
+        qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_READ_DATA, &ret);
         if (s->last_read_offset != addr || s->datar != ret)
             pdebug("rd%02x\n", ret);
         s->datar = ret;
         break;
     case PARA_REG_STS:
-        qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_READ_STATUS, &ret);
+        qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_READ_STATUS, &ret);
         ret &= ~PARA_STS_TMOUT;
         if (s->epp_timeout)
             ret |= PARA_STS_TMOUT;
@@ -323,7 +327,7 @@ static uint32_t parallel_ioport_read_hw(void *opaque, uint32_t addr)
         /* s->control has some bits fixed to 1. It is zero only when
            it has not been yet written to.  */
         if (s->control == 0) {
-            qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_READ_CONTROL, &ret);
+            qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_READ_CONTROL, &ret);
             if (s->last_read_offset != addr)
                 pdebug("rc%02x\n", ret);
             s->control = ret;
@@ -335,12 +339,14 @@ static uint32_t parallel_ioport_read_hw(void *opaque, uint32_t addr)
         }
         break;
     case PARA_REG_EPP_ADDR:
-        if ((s->control & (PARA_CTR_DIR|PARA_CTR_SIGNAL)) != (PARA_CTR_DIR|PARA_CTR_INIT))
+        if ((s->control & (PARA_CTR_DIR | PARA_CTR_SIGNAL)) !=
+            (PARA_CTR_DIR | PARA_CTR_INIT))
             /* Controls not correct for EPP addr cycle, so do nothing */
             pdebug("ra%02x s\n", ret);
         else {
             struct ParallelIOArg ioarg = { .buffer = &ret, .count = 1 };
-            if (qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_READ_ADDR, &ioarg)) {
+            if (qemu_chr_fe_ioctl(&s->chr,
+                                  CHR_IOCTL_PP_EPP_READ_ADDR, &ioarg)) {
                 s->epp_timeout = 1;
                 pdebug("ra%02x t\n", ret);
             }
@@ -349,12 +355,13 @@ static uint32_t parallel_ioport_read_hw(void *opaque, uint32_t addr)
         }
         break;
     case PARA_REG_EPP_DATA:
-        if ((s->control & (PARA_CTR_DIR|PARA_CTR_SIGNAL)) != (PARA_CTR_DIR|PARA_CTR_INIT))
+        if ((s->control & (PARA_CTR_DIR | PARA_CTR_SIGNAL)) !=
+            (PARA_CTR_DIR | PARA_CTR_INIT))
             /* Controls not correct for EPP data cycle, so do nothing */
             pdebug("re%02x s\n", ret);
         else {
             struct ParallelIOArg ioarg = { .buffer = &ret, .count = 1 };
-            if (qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg)) {
+            if (qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg)) {
                 s->epp_timeout = 1;
                 pdebug("re%02x t\n", ret);
             }
@@ -382,7 +389,7 @@ parallel_ioport_eppdata_read_hw2(void *opaque, uint32_t addr)
         pdebug("re%04x s\n", eppdata);
         return eppdata;
     }
-    err = qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg);
+    err = qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg);
     ret = le16_to_cpu(eppdata);
 
     if (err) {
@@ -409,7 +416,7 @@ parallel_ioport_eppdata_read_hw4(void *opaque, uint32_t addr)
         pdebug("re%08x s\n", eppdata);
         return eppdata;
     }
-    err = qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg);
+    err = qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_EPP_READ, &ioarg);
     ret = le32_to_cpu(eppdata);
 
     if (err) {
@@ -505,7 +512,7 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
     int base;
     uint8_t dummy;
 
-    if (!s->chr) {
+    if (!qemu_chr_fe_get_driver(&s->chr)) {
         error_setg(errp, "Can't create parallel device, empty char device");
         return;
     }
@@ -527,12 +534,12 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
     isa_init_irq(isadev, &s->irq, isa->isairq);
     qemu_register_reset(parallel_reset, s);
 
-    if (qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_PP_READ_STATUS, &dummy) == 0) {
+    if (qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_PP_READ_STATUS, &dummy) == 0) {
         s->hw_driver = 1;
         s->status = dummy;
     }
 
-    isa_register_portio_list(isadev, base,
+    isa_register_portio_list(isadev, &s->portio_list, base,
                              (s->hw_driver
                               ? &isa_parallel_portio_hw_list[0]
                               : &isa_parallel_portio_sw_list[0]),
@@ -602,7 +609,7 @@ bool parallel_mm_init(MemoryRegion *address_space,
 
     s = g_malloc0(sizeof(ParallelState));
     s->irq = irq;
-    s->chr = chr;
+    qemu_chr_fe_init(&s->chr, chr, &error_abort);
     s->it_shift = it_shift;
     qemu_register_reset(parallel_reset, s);
 
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index c0fbf8a874..24ea9738b6 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -11,6 +11,7 @@
 #include "hw/sysbus.h"
 #include "sysemu/char.h"
 #include "qemu/log.h"
+#include "trace.h"
 
 #define TYPE_PL011 "pl011"
 #define PL011(obj) OBJECT_CHECK(PL011State, (obj), TYPE_PL011)
@@ -35,7 +36,7 @@ typedef struct PL011State {
     int read_pos;
     int read_count;
     int read_trigger;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
     const unsigned char *id;
 } PL011State;
@@ -58,6 +59,7 @@ static void pl011_update(PL011State *s)
     uint32_t flags;
 
     flags = s->int_level & s->int_enabled;
+    trace_pl011_irq_state(flags != 0);
     qemu_set_irq(s->irq, flags != 0);
 }
 
@@ -66,10 +68,8 @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
 {
     PL011State *s = (PL011State *)opaque;
     uint32_t c;
+    uint64_t r;
 
-    if (offset >= 0xfe0 && offset < 0x1000) {
-        return s->id[(offset - 0xfe0) >> 2];
-    }
     switch (offset >> 2) {
     case 0: /* UARTDR */
         s->flags &= ~PL011_FLAG_RXFF;
@@ -84,41 +84,60 @@ static uint64_t pl011_read(void *opaque, hwaddr offset,
         }
         if (s->read_count == s->read_trigger - 1)
             s->int_level &= ~ PL011_INT_RX;
+        trace_pl011_read_fifo(s->read_count);
         s->rsr = c >> 8;
         pl011_update(s);
-        if (s->chr) {
-            qemu_chr_accept_input(s->chr);
-        }
-        return c;
+        qemu_chr_fe_accept_input(&s->chr);
+        r = c;
+        break;
     case 1: /* UARTRSR */
-        return s->rsr;
+        r = s->rsr;
+        break;
     case 6: /* UARTFR */
-        return s->flags;
+        r = s->flags;
+        break;
     case 8: /* UARTILPR */
-        return s->ilpr;
+        r = s->ilpr;
+        break;
     case 9: /* UARTIBRD */
-        return s->ibrd;
+        r = s->ibrd;
+        break;
     case 10: /* UARTFBRD */
-        return s->fbrd;
+        r = s->fbrd;
+        break;
     case 11: /* UARTLCR_H */
-        return s->lcr;
+        r = s->lcr;
+        break;
     case 12: /* UARTCR */
-        return s->cr;
+        r = s->cr;
+        break;
     case 13: /* UARTIFLS */
-        return s->ifl;
+        r = s->ifl;
+        break;
     case 14: /* UARTIMSC */
-        return s->int_enabled;
+        r = s->int_enabled;
+        break;
     case 15: /* UARTRIS */
-        return s->int_level;
+        r = s->int_level;
+        break;
     case 16: /* UARTMIS */
-        return s->int_level & s->int_enabled;
+        r = s->int_level & s->int_enabled;
+        break;
     case 18: /* UARTDMACR */
-        return s->dmacr;
+        r = s->dmacr;
+        break;
+    case 0x3f8 ... 0x400:
+        r = s->id[(offset - 0xfe0) >> 2];
+        break;
     default:
         qemu_log_mask(LOG_GUEST_ERROR,
                       "pl011_read: Bad offset %x\n", (int)offset);
-        return 0;
+        r = 0;
+        break;
     }
+
+    trace_pl011_read(offset, r);
+    return r;
 }
 
 static void pl011_set_read_trigger(PL011State *s)
@@ -141,12 +160,15 @@ static void pl011_write(void *opaque, hwaddr offset,
     PL011State *s = (PL011State *)opaque;
     unsigned char ch;
 
+    trace_pl011_write(offset, value);
+
     switch (offset >> 2) {
     case 0: /* UARTDR */
         /* ??? Check if transmitter is enabled.  */
         ch = value;
-        if (s->chr)
-            qemu_chr_fe_write(s->chr, &ch, 1);
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&s->chr, &ch, 1);
         s->int_level |= PL011_INT_TX;
         pl011_update(s);
         break;
@@ -205,11 +227,15 @@ static void pl011_write(void *opaque, hwaddr offset,
 static int pl011_can_receive(void *opaque)
 {
     PL011State *s = (PL011State *)opaque;
+    int r;
 
-    if (s->lcr & 0x10)
-        return s->read_count < 16;
-    else
-        return s->read_count < 1;
+    if (s->lcr & 0x10) {
+        r = s->read_count < 16;
+    } else {
+        r = s->read_count < 1;
+    }
+    trace_pl011_can_receive(s->lcr, s->read_count, r);
+    return r;
 }
 
 static void pl011_put_fifo(void *opaque, uint32_t value)
@@ -223,7 +249,9 @@ static void pl011_put_fifo(void *opaque, uint32_t value)
     s->read_fifo[slot] = value;
     s->read_count++;
     s->flags &= ~PL011_FLAG_RXFE;
+    trace_pl011_put_fifo(value, s->read_count);
     if (!(s->lcr & 0x10) || s->read_count == 16) {
+        trace_pl011_put_fifo_full();
         s->flags |= PL011_FLAG_RXFF;
     }
     if (s->read_count == s->read_trigger) {
@@ -300,10 +328,8 @@ static void pl011_realize(DeviceState *dev, Error **errp)
 {
     PL011State *s = PL011(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, pl011_can_receive, pl011_receive,
-                              pl011_event, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, pl011_can_receive, pl011_receive,
+                             pl011_event, s, NULL, true);
 }
 
 static void pl011_class_init(ObjectClass *oc, void *data)
diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c
index a22ad8d016..07d6ebd112 100644
--- a/hw/char/sclpconsole-lm.c
+++ b/hw/char/sclpconsole-lm.c
@@ -37,7 +37,7 @@ typedef struct OprtnsCommand {
 
 typedef struct SCLPConsoleLM {
     SCLPEvent event;
-    CharDriverState *chr;
+    CharBackend chr;
     bool echo;                  /* immediate echo of input if true        */
     uint32_t write_errors;      /* errors writing to char layer           */
     uint32_t length;            /* length of byte stream in buffer        */
@@ -89,7 +89,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     scon->buf[scon->length] = *buf;
     scon->length += 1;
     if (scon->echo) {
-        qemu_chr_fe_write(scon->chr, buf, size);
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(&scon->chr, buf, size);
     }
 }
 
@@ -191,31 +193,16 @@ static int read_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
  */
 static int write_console_data(SCLPEvent *event, const uint8_t *buf, int len)
 {
-    int ret = 0;
-    const uint8_t *buf_offset;
-
     SCLPConsoleLM *scon = SCLPLM_CONSOLE(event);
 
-    if (!scon->chr) {
+    if (!qemu_chr_fe_get_driver(&scon->chr)) {
         /* If there's no backend, we can just say we consumed all data. */
         return len;
     }
 
-    buf_offset = buf;
-    while (len > 0) {
-        ret = qemu_chr_fe_write(scon->chr, buf, len);
-        if (ret == 0) {
-            /* a pty doesn't seem to be connected - no error */
-            len = 0;
-        } else if (ret == -EAGAIN || (ret > 0 && ret < len)) {
-            len -= ret;
-            buf_offset += ret;
-        } else {
-            len = 0;
-        }
-    }
-
-    return ret;
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    return qemu_chr_fe_write_all(&scon->chr, buf, len);
 }
 
 static int process_mdb(SCLPEvent *event, MDBO *mdbo)
@@ -325,9 +312,8 @@ static int console_init(SCLPEvent *event)
     }
     console_available = true;
 
-    if (scon->chr) {
-        qemu_chr_add_handlers(scon->chr, chr_can_read, chr_read, NULL, scon);
-    }
+    qemu_chr_fe_set_handlers(&scon->chr, chr_can_read,
+                             chr_read, NULL, scon, NULL, true);
 
     return 0;
 }
diff --git a/hw/char/sclpconsole.c b/hw/char/sclpconsole.c
index d22464826b..b78f240a73 100644
--- a/hw/char/sclpconsole.c
+++ b/hw/char/sclpconsole.c
@@ -31,7 +31,7 @@ typedef struct ASCIIConsoleData {
 
 typedef struct SCLPConsole {
     SCLPEvent event;
-    CharDriverState *chr;
+    CharBackend chr;
     uint8_t iov[SIZE_BUFFER_VT220];
     uint32_t iov_sclp;      /* offset in buf for SCLP read operation       */
     uint32_t iov_bs;        /* offset in buf for char layer read operation */
@@ -163,12 +163,14 @@ static ssize_t write_console_data(SCLPEvent *event, const uint8_t *buf,
 {
     SCLPConsole *scon = SCLP_CONSOLE(event);
 
-    if (!scon->chr) {
+    if (!qemu_chr_fe_get_driver(&scon->chr)) {
         /* If there's no backend, we can just say we consumed all data. */
         return len;
     }
 
-    return qemu_chr_fe_write_all(scon->chr, buf, len);
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    return qemu_chr_fe_write_all(&scon->chr, buf, len);
 }
 
 static int write_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr)
@@ -225,10 +227,8 @@ static int console_init(SCLPEvent *event)
         return -1;
     }
     console_available = true;
-    if (scon->chr) {
-        qemu_chr_add_handlers(scon->chr, chr_can_read,
-                              chr_read, NULL, scon);
-    }
+    qemu_chr_fe_set_handlers(&scon->chr, chr_can_read,
+                             chr_read, NULL, scon, NULL, true);
 
     return 0;
 }
diff --git a/hw/char/serial-isa.c b/hw/char/serial-isa.c
index 1594ec4db3..54d3a12f51 100644
--- a/hw/char/serial-isa.c
+++ b/hw/char/serial-isa.c
@@ -133,13 +133,14 @@ static void serial_isa_init(ISABus *bus, int index, CharDriverState *chr)
     qdev_init_nofail(dev);
 }
 
-void serial_hds_isa_init(ISABus *bus, int n)
+void serial_hds_isa_init(ISABus *bus, int from, int to)
 {
     int i;
 
-    assert(n <= MAX_SERIAL_PORTS);
+    assert(from >= 0);
+    assert(to <= MAX_SERIAL_PORTS);
 
-    for (i = 0; i < n; ++i) {
+    for (i = from; i < to; ++i) {
         if (serial_hds[i]) {
             serial_isa_init(bus, i, serial_hds[i]);
         }
diff --git a/hw/char/serial.c b/hw/char/serial.c
index 3442f47d36..ffbacd8227 100644
--- a/hw/char/serial.c
+++ b/hw/char/serial.c
@@ -153,8 +153,9 @@ static void serial_update_parameters(SerialState *s)
     int speed, parity, data_bits, stop_bits, frame_size;
     QEMUSerialSetParams ssp;
 
-    if (s->divider == 0)
+    if (s->divider == 0 || s->divider > s->baudbase) {
         return;
+    }
 
     /* Start bit. */
     frame_size = 1;
@@ -181,7 +182,7 @@ static void serial_update_parameters(SerialState *s)
     ssp.data_bits = data_bits;
     ssp.stop_bits = stop_bits;
     s->char_transmit_time =  (NANOSECONDS_PER_SECOND / speed) * frame_size;
-    qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
+    qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_PARAMS, &ssp);
 
     DPRINTF("speed=%d parity=%c data=%d stop=%d\n",
            speed, parity, data_bits, stop_bits);
@@ -194,7 +195,8 @@ static void serial_update_msl(SerialState *s)
 
     timer_del(s->modem_status_poll);
 
-    if (qemu_chr_fe_ioctl(s->chr,CHR_IOCTL_SERIAL_GET_TIOCM, &flags) == -ENOTSUP) {
+    if (qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_GET_TIOCM,
+                          &flags) == -ENOTSUP) {
         s->poll_msl = -1;
         return;
     }
@@ -259,11 +261,12 @@ static void serial_xmit(SerialState *s)
         if (s->mcr & UART_MCR_LOOP) {
             /* in loopback mode, say that we just received a char */
             serial_receive1(s, &s->tsr, 1);
-        } else if (qemu_chr_fe_write(s->chr, &s->tsr, 1) != 1 &&
+        } else if (qemu_chr_fe_write(&s->chr, &s->tsr, 1) != 1 &&
                    s->tsr_retry < MAX_XMIT_RETRY) {
             assert(s->watch_tag == 0);
-            s->watch_tag = qemu_chr_fe_add_watch(s->chr, G_IO_OUT|G_IO_HUP,
-                                                 serial_watch_cb, s);
+            s->watch_tag =
+                qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
+                                      serial_watch_cb, s);
             if (s->watch_tag > 0) {
                 s->tsr_retry++;
                 return;
@@ -416,8 +419,8 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
             break_enable = (val >> 6) & 1;
             if (break_enable != s->last_break_enable) {
                 s->last_break_enable = break_enable;
-                qemu_chr_fe_ioctl(s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
-                               &break_enable);
+                qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_BREAK,
+                                  &break_enable);
             }
         }
         break;
@@ -431,7 +434,7 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
 
             if (s->poll_msl >= 0 && old_mcr != s->mcr) {
 
-                qemu_chr_fe_ioctl(s->chr,CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
+                qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
 
                 flags &= ~(CHR_TIOCM_RTS | CHR_TIOCM_DTR);
 
@@ -440,7 +443,7 @@ static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
                 if (val & UART_MCR_DTR)
                     flags |= CHR_TIOCM_DTR;
 
-                qemu_chr_fe_ioctl(s->chr,CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
+                qemu_chr_fe_ioctl(&s->chr, CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
                 /* Update the modem status after a one-character-send wait-time, since there may be a response
                    from the device/computer at the other end of the serial line */
                 timer_mod(s->modem_status_poll, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->char_transmit_time);
@@ -485,7 +488,7 @@ static uint64_t serial_ioport_read(void *opaque, hwaddr addr, unsigned size)
             serial_update_irq(s);
             if (!(s->mcr & UART_MCR_LOOP)) {
                 /* in loopback mode, don't receive any data */
-                qemu_chr_accept_input(s->chr);
+                qemu_chr_fe_accept_input(&s->chr);
             }
         }
         break;
@@ -658,7 +661,7 @@ static int serial_post_load(void *opaque, int version_id)
         }
 
         assert(s->watch_tag == 0);
-        s->watch_tag = qemu_chr_fe_add_watch(s->chr, G_IO_OUT|G_IO_HUP,
+        s->watch_tag = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP,
                                              serial_watch_cb, s);
     } else {
         /* tsr_retry == 0 implies LSR.TEMT = 1 (transmitter empty).  */
@@ -883,7 +886,7 @@ static void serial_reset(void *opaque)
 
 void serial_realize_core(SerialState *s, Error **errp)
 {
-    if (!s->chr) {
+    if (!qemu_chr_fe_get_driver(&s->chr)) {
         error_setg(errp, "Can't create serial device, empty char device");
         return;
     }
@@ -893,8 +896,8 @@ void serial_realize_core(SerialState *s, Error **errp)
     s->fifo_timeout_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, (QEMUTimerCB *) fifo_timeout_int, s);
     qemu_register_reset(serial_reset, s);
 
-    qemu_chr_add_handlers(s->chr, serial_can_receive1, serial_receive1,
-                          serial_event, s);
+    qemu_chr_fe_set_handlers(&s->chr, serial_can_receive1, serial_receive1,
+                             serial_event, s, NULL, true);
     fifo8_create(&s->recv_fifo, UART_FIFO_LENGTH);
     fifo8_create(&s->xmit_fifo, UART_FIFO_LENGTH);
     serial_reset(s);
@@ -902,7 +905,7 @@ void serial_realize_core(SerialState *s, Error **errp)
 
 void serial_exit_core(SerialState *s)
 {
-    qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
+    qemu_chr_fe_deinit(&s->chr);
     qemu_unregister_reset(serial_reset, s);
 }
 
@@ -932,7 +935,7 @@ SerialState *serial_init(int base, qemu_irq irq, int baudbase,
 
     s->irq = irq;
     s->baudbase = baudbase;
-    s->chr = chr;
+    qemu_chr_fe_init(&s->chr, chr, &error_abort);
     serial_realize_core(s, &error_fatal);
 
     vmstate_register(NULL, base, &vmstate_serial, s);
@@ -989,7 +992,7 @@ SerialState *serial_mm_init(MemoryRegion *address_space,
     s->it_shift = it_shift;
     s->irq = irq;
     s->baudbase = baudbase;
-    s->chr = chr;
+    qemu_chr_fe_init(&s->chr, chr, &error_abort);
 
     serial_realize_core(s, &error_fatal);
     vmstate_register(NULL, base, &vmstate_serial, s);
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 4c55dcb7dc..9d35564bcf 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -29,6 +29,7 @@
 #include "hw/sh4/sh.h"
 #include "sysemu/char.h"
 #include "exec/address-spaces.h"
+#include "qapi/error.h"
 
 //#define DEBUG_SERIAL
 
@@ -62,7 +63,7 @@ typedef struct {
     int flags;
     int rtrg;
 
-    CharDriverState *chr;
+    CharBackend chr;
 
     qemu_irq eri;
     qemu_irq rxi;
@@ -109,9 +110,11 @@ static void sh_serial_write(void *opaque, hwaddr offs,
         }
         return;
     case 0x0c: /* FTDR / TDR */
-        if (s->chr) {
+        if (qemu_chr_fe_get_driver(&s->chr)) {
             ch = val;
-            qemu_chr_fe_write(s->chr, &ch, 1);
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
 	}
 	s->dr = val;
 	s->flags &= ~SH_SERIAL_FLAG_TDE;
@@ -393,12 +396,11 @@ void sh_serial_init(MemoryRegion *sysmem,
                              0, 0x28);
     memory_region_add_subregion(sysmem, A7ADDR(base), &s->iomem_a7);
 
-    s->chr = chr;
-
     if (chr) {
-        qemu_chr_fe_claim_no_fail(chr);
-        qemu_chr_add_handlers(chr, sh_serial_can_receive1, sh_serial_receive1,
-			      sh_serial_event, s);
+        qemu_chr_fe_init(&s->chr, chr, &error_abort);
+        qemu_chr_fe_set_handlers(&s->chr, sh_serial_can_receive1,
+                                 sh_serial_receive1,
+                                 sh_serial_event, s, NULL, true);
     }
 
     s->eri = eri_source;
diff --git a/hw/char/spapr_vty.c b/hw/char/spapr_vty.c
index 3498d7b052..7c22b8bd0e 100644
--- a/hw/char/spapr_vty.c
+++ b/hw/char/spapr_vty.c
@@ -1,4 +1,5 @@
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
@@ -11,7 +12,7 @@
 
 typedef struct VIOsPAPRVTYDevice {
     VIOsPAPRDevice sdev;
-    CharDriverState *chardev;
+    CharBackend chardev;
     uint32_t in, out;
     uint8_t buf[VTERM_BUFSIZE];
 } VIOsPAPRVTYDevice;
@@ -24,7 +25,7 @@ static int vty_can_receive(void *opaque)
 {
     VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(opaque);
 
-    return (dev->in - dev->out) < VTERM_BUFSIZE;
+    return VTERM_BUFSIZE - (dev->in - dev->out);
 }
 
 static void vty_receive(void *opaque, const uint8_t *buf, int size)
@@ -37,7 +38,15 @@ static void vty_receive(void *opaque, const uint8_t *buf, int size)
         qemu_irq_pulse(spapr_vio_qirq(&dev->sdev));
     }
     for (i = 0; i < size; i++) {
-        assert((dev->in - dev->out) < VTERM_BUFSIZE);
+        if (dev->in - dev->out >= VTERM_BUFSIZE) {
+            static bool reported;
+            if (!reported) {
+                error_report("VTY input buffer exhausted - characters dropped."
+                             " (input size = %i)", size);
+                reported = true;
+            }
+            break;
+        }
         dev->buf[dev->in++ % VTERM_BUFSIZE] = buf[i];
     }
 }
@@ -51,7 +60,7 @@ static int vty_getchars(VIOsPAPRDevice *sdev, uint8_t *buf, int max)
         buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE];
     }
 
-    qemu_chr_accept_input(dev->chardev);
+    qemu_chr_fe_accept_input(&dev->chardev);
 
     return n;
 }
@@ -60,21 +69,22 @@ void vty_putchars(VIOsPAPRDevice *sdev, uint8_t *buf, int len)
 {
     VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev);
 
-    /* FIXME: should check the qemu_chr_fe_write() return value */
-    qemu_chr_fe_write(dev->chardev, buf, len);
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(&dev->chardev, buf, len);
 }
 
 static void spapr_vty_realize(VIOsPAPRDevice *sdev, Error **errp)
 {
     VIOsPAPRVTYDevice *dev = VIO_SPAPR_VTY_DEVICE(sdev);
 
-    if (!dev->chardev) {
+    if (!qemu_chr_fe_get_driver(&dev->chardev)) {
         error_setg(errp, "chardev property not set");
         return;
     }
 
-    qemu_chr_add_handlers(dev->chardev, vty_can_receive,
-                          vty_receive, NULL, dev);
+    qemu_chr_fe_set_handlers(&dev->chardev, vty_can_receive,
+                             vty_receive, NULL, dev, NULL, true);
 }
 
 /* Forward declaration */
diff --git a/hw/char/stm32f2xx_usart.c b/hw/char/stm32f2xx_usart.c
index 15657abda9..59872e6d3b 100644
--- a/hw/char/stm32f2xx_usart.c
+++ b/hw/char/stm32f2xx_usart.c
@@ -97,17 +97,13 @@ static uint64_t stm32f2xx_usart_read(void *opaque, hwaddr addr,
     case USART_SR:
         retvalue = s->usart_sr;
         s->usart_sr &= ~USART_SR_TC;
-        if (s->chr) {
-            qemu_chr_accept_input(s->chr);
-        }
+        qemu_chr_fe_accept_input(&s->chr);
         return retvalue;
     case USART_DR:
         DB_PRINT("Value: 0x%" PRIx32 ", %c\n", s->usart_dr, (char) s->usart_dr);
         s->usart_sr |= USART_SR_TXE;
         s->usart_sr &= ~USART_SR_RXNE;
-        if (s->chr) {
-            qemu_chr_accept_input(s->chr);
-        }
+        qemu_chr_fe_accept_input(&s->chr);
         qemu_set_irq(s->irq, 0);
         return s->usart_dr & 0x3FF;
     case USART_BRR:
@@ -152,9 +148,9 @@ static void stm32f2xx_usart_write(void *opaque, hwaddr addr,
     case USART_DR:
         if (value < 0xF000) {
             ch = value;
-            if (s->chr) {
-                qemu_chr_fe_write_all(s->chr, &ch, 1);
-            }
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
             s->usart_sr |= USART_SR_TC;
             s->usart_sr &= ~USART_SR_TXE;
         }
@@ -210,10 +206,8 @@ static void stm32f2xx_usart_realize(DeviceState *dev, Error **errp)
 {
     STM32F2XXUsartState *s = STM32F2XX_USART(dev);
 
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, stm32f2xx_usart_can_receive,
-                              stm32f2xx_usart_receive, NULL, s);
-    }
+    qemu_chr_fe_set_handlers(&s->chr, stm32f2xx_usart_can_receive,
+                             stm32f2xx_usart_receive, NULL, s, NULL, true);
 }
 
 static void stm32f2xx_usart_class_init(ObjectClass *klass, void *data)
diff --git a/hw/char/trace-events b/hw/char/trace-events
index d53577c99d..7fd48bb80d 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -47,3 +47,12 @@ escc_sunkbd_event_in(int ch, const char *name, int down) "QKeyCode 0x%2.2x [%s],
 escc_sunkbd_event_out(int ch) "Translated keycode 0x%2.2x"
 escc_kbd_command(int val) "Command %d"
 escc_sunmouse_event(int dx, int dy, int buttons_state) "dx=%d dy=%d buttons=%01x"
+
+# hw/char/pl011.c
+pl011_irq_state(int level) "irq state %d"
+pl011_read(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
+pl011_read_fifo(int read_count) "FIFO read, read_count now %d"
+pl011_write(uint32_t addr, uint32_t value) "addr 0x%08x value 0x%08x"
+pl011_can_receive(uint32_t lcr, int read_count, int r) "LCR %08x read_count %d returning %d"
+pl011_put_fifo(uint32_t c, int read_count) "new char 0x%x read_count now %d"
+pl011_put_fifo_full(void) "FIFO now full, RXFF set"
diff --git a/hw/char/virtio-console.c b/hw/char/virtio-console.c
index 4f0e03d3b7..776205b4a9 100644
--- a/hw/char/virtio-console.c
+++ b/hw/char/virtio-console.c
@@ -24,7 +24,7 @@
 typedef struct VirtConsole {
     VirtIOSerialPort parent_obj;
 
-    CharDriverState *chr;
+    CharBackend chr;
     guint watch;
 } VirtConsole;
 
@@ -49,12 +49,12 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
     VirtConsole *vcon = VIRTIO_CONSOLE(port);
     ssize_t ret;
 
-    if (!vcon->chr) {
+    if (!qemu_chr_fe_get_driver(&vcon->chr)) {
         /* If there's no backend, we can just say we consumed all data. */
         return len;
     }
 
-    ret = qemu_chr_fe_write(vcon->chr, buf, len);
+    ret = qemu_chr_fe_write(&vcon->chr, buf, len);
     trace_virtio_console_flush_buf(port->id, len, ret);
 
     if (ret < len) {
@@ -68,10 +68,31 @@ static ssize_t flush_buf(VirtIOSerialPort *port,
          */
         if (ret < 0)
             ret = 0;
+
+        /* XXX we should be queuing data to send later for the
+         * console devices too rather than silently dropping
+         * console data on EAGAIN. The Linux virtio-console
+         * hvc driver though does sends with spinlocks held,
+         * so if we enable throttling that'll stall the entire
+         * guest kernel, not merely the process writing to the
+         * console.
+         *
+         * While we could queue data for later write without
+         * enabling throttling, this would result in the guest
+         * being able to trigger arbitrary memory usage in QEMU
+         * buffering data for later writes.
+         *
+         * So fixing this problem likely requires fixing the
+         * Linux virtio-console hvc driver to not hold spinlocks
+         * while writing, and instead merely block the process
+         * that's writing. QEMU would then need some way to detect
+         * if the guest had the fixed driver too, before we can
+         * use throttling on host side.
+         */
         if (!k->is_console) {
             virtio_serial_throttle_port(port, true);
             if (!vcon->watch) {
-                vcon->watch = qemu_chr_fe_add_watch(vcon->chr,
+                vcon->watch = qemu_chr_fe_add_watch(&vcon->chr,
                                                     G_IO_OUT|G_IO_HUP,
                                                     chr_write_unblocked, vcon);
             }
@@ -87,8 +108,8 @@ static void set_guest_connected(VirtIOSerialPort *port, int guest_connected)
     DeviceState *dev = DEVICE(port);
     VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(port);
 
-    if (vcon->chr && !k->is_console) {
-        qemu_chr_fe_set_open(vcon->chr, guest_connected);
+    if (!k->is_console) {
+        qemu_chr_fe_set_open(&vcon->chr, guest_connected);
     }
 
     if (dev->id) {
@@ -101,9 +122,7 @@ static void guest_writable(VirtIOSerialPort *port)
 {
     VirtConsole *vcon = VIRTIO_CONSOLE(port);
 
-    if (vcon->chr) {
-        qemu_chr_accept_input(vcon->chr);
-    }
+    qemu_chr_fe_accept_input(&vcon->chr);
 }
 
 /* Readiness of the guest to accept data on a port */
@@ -149,6 +168,7 @@ static void virtconsole_realize(DeviceState *dev, Error **errp)
     VirtIOSerialPort *port = VIRTIO_SERIAL_PORT(dev);
     VirtConsole *vcon = VIRTIO_CONSOLE(dev);
     VirtIOSerialPortClass *k = VIRTIO_SERIAL_PORT_GET_CLASS(dev);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&vcon->chr);
 
     if (port->id == 0 && !k->is_console) {
         error_setg(errp, "Port number 0 on virtio-serial devices reserved "
@@ -156,7 +176,7 @@ static void virtconsole_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    if (vcon->chr) {
+    if (chr) {
         /*
          * For consoles we don't block guest data transfer just
          * because nothing is connected - we'll just let it go
@@ -167,14 +187,12 @@ static void virtconsole_realize(DeviceState *dev, Error **errp)
          * trigger open/close of the device
          */
         if (k->is_console) {
-            vcon->chr->explicit_fe_open = 0;
-            qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read,
-                                  NULL, vcon);
+            qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
+                                     NULL, vcon, NULL, true);
             virtio_serial_open(port);
         } else {
-            vcon->chr->explicit_fe_open = 1;
-            qemu_chr_add_handlers(vcon->chr, chr_can_read, chr_read,
-                                  chr_event, vcon);
+            qemu_chr_fe_set_handlers(&vcon->chr, chr_can_read, chr_read,
+                                     chr_event, vcon, NULL, false);
         }
     }
 }
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index db57a38546..7975c2cda1 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -75,6 +75,19 @@ static VirtIOSerialPort *find_port_by_name(char *name)
     return NULL;
 }
 
+static VirtIOSerialPort *find_first_connected_console(VirtIOSerial *vser)
+{
+    VirtIOSerialPort *port;
+
+    QTAILQ_FOREACH(port, &vser->ports, next) {
+        VirtIOSerialPortClass const *vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+        if (vsc->is_console && port->host_connected) {
+            return port;
+        }
+    }
+    return NULL;
+}
+
 static bool use_multiport(VirtIOSerial *vser)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(vser);
@@ -132,6 +145,15 @@ static void discard_vq_data(VirtQueue *vq, VirtIODevice *vdev)
     virtio_notify(vdev, vq);
 }
 
+static void discard_throttle_data(VirtIOSerialPort *port)
+{
+    if (port->elem) {
+        virtqueue_detach_element(port->ovq, port->elem, 0);
+        g_free(port->elem);
+        port->elem = NULL;
+    }
+}
+
 static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq,
                                  VirtIODevice *vdev)
 {
@@ -254,6 +276,7 @@ int virtio_serial_close(VirtIOSerialPort *port)
      * consume, reset the throttling flag and discard the data.
      */
     port->throttled = false;
+    discard_throttle_data(port);
     discard_vq_data(port->ovq, VIRTIO_DEVICE(port->vser));
 
     send_control_event(port->vser, port->id, VIRTIO_CONSOLE_PORT_OPEN, 0);
@@ -528,6 +551,7 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t features,
 
     vser = VIRTIO_SERIAL(vdev);
 
+    features |= vser->host_features;
     if (vser->bus.max_nr_ports > 1) {
         virtio_add_feature(&features, VIRTIO_CONSOLE_F_MULTIPORT);
     }
@@ -547,6 +571,29 @@ static void get_config(VirtIODevice *vdev, uint8_t *config_data)
                                           vser->serial.max_virtserial_ports);
 }
 
+/* Guest sent new config info */
+static void set_config(VirtIODevice *vdev, const uint8_t *config_data)
+{
+    VirtIOSerial *vser = VIRTIO_SERIAL(vdev);
+    struct virtio_console_config *config =
+        (struct virtio_console_config *)config_data;
+    uint8_t emerg_wr_lo = le32_to_cpu(config->emerg_wr);
+    VirtIOSerialPort *port = find_first_connected_console(vser);
+    VirtIOSerialPortClass *vsc;
+
+    if (!config->emerg_wr) {
+        return;
+    }
+    /* Make sure we don't misdetect an emergency write when the guest
+     * does a short config write after an emergency write. */
+    config->emerg_wr = 0;
+    if (!port) {
+        return;
+    }
+    vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+    (void)vsc->have_data(port, &emerg_wr_lo, 1);
+}
+
 static void guest_reset(VirtIOSerial *vser)
 {
     VirtIOSerialPort *port;
@@ -554,6 +601,9 @@ static void guest_reset(VirtIOSerial *vser)
 
     QTAILQ_FOREACH(port, &vser->ports, next) {
         vsc = VIRTIO_SERIAL_PORT_GET_CLASS(port);
+
+        discard_throttle_data(port);
+
         if (port->guest_connected) {
             port->guest_connected = false;
             if (vsc->set_guest_connected) {
@@ -728,12 +778,6 @@ static int fetch_active_ports_list(QEMUFile *f,
     return 0;
 }
 
-static int virtio_serial_load(QEMUFile *f, void *opaque, size_t size)
-{
-    /* The virtio device */
-    return virtio_load(VIRTIO_DEVICE(opaque), f, 3);
-}
-
 static int virtio_serial_load_device(VirtIODevice *vdev, QEMUFile *f,
                                      int version_id)
 {
@@ -864,6 +908,7 @@ static void remove_port(VirtIOSerial *vser, uint32_t port_id)
     assert(port);
 
     /* Flush out any unconsumed buffers first */
+    discard_throttle_data(port);
     discard_vq_data(port->ovq, VIRTIO_DEVICE(port->vser));
 
     send_control_event(vser, port->id, VIRTIO_CONSOLE_PORT_REMOVE, 1);
@@ -967,6 +1012,7 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOSerial *vser = VIRTIO_SERIAL(dev);
     uint32_t i, max_supported_ports;
+    size_t config_size = sizeof(struct virtio_console_config);
 
     if (!vser->serial.max_virtserial_ports) {
         error_setg(errp, "Maximum number of serial ports not specified");
@@ -981,10 +1027,12 @@ static void virtio_serial_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    /* We don't support emergency write, skip it for now. */
-    /* TODO: cleaner fix, depending on host features. */
+    if (!virtio_has_feature(vser->host_features,
+                            VIRTIO_CONSOLE_F_EMERG_WRITE)) {
+        config_size = offsetof(struct virtio_console_config, emerg_wr);
+    }
     virtio_init(vdev, "virtio-serial", VIRTIO_ID_CONSOLE,
-                offsetof(struct virtio_console_config, emerg_wr));
+                config_size);
 
     /* Spawn a new virtio-serial bus on which the ports will ride as devices */
     qbus_create_inplace(&vser->bus, sizeof(vser->bus), TYPE_VIRTIO_SERIAL_BUS,
@@ -1075,11 +1123,21 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp)
 }
 
 /* Note: 'console' is used for backwards compatibility */
-VMSTATE_VIRTIO_DEVICE(console, 3, virtio_serial_load, virtio_vmstate_save);
+static const VMStateDescription vmstate_virtio_console = {
+    .name = "virtio-console",
+    .minimum_version_id = 3,
+    .version_id = 3,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static Property virtio_serial_properties[] = {
     DEFINE_PROP_UINT32("max_ports", VirtIOSerial, serial.max_virtserial_ports,
                                                   31),
+    DEFINE_PROP_BIT64("emergency-write", VirtIOSerial, host_features,
+                      VIRTIO_CONSOLE_F_EMERG_WRITE, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1098,6 +1156,7 @@ static void virtio_serial_class_init(ObjectClass *klass, void *data)
     vdc->unrealize = virtio_serial_device_unrealize;
     vdc->get_features = get_features;
     vdc->get_config = get_config;
+    vdc->set_config = set_config;
     vdc->set_status = set_status;
     vdc->reset = vser_reset;
     vdc->save = virtio_serial_save_device;
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 83108b0bdb..c01f41090e 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -23,9 +23,11 @@
 #include <sys/select.h>
 #include <termios.h>
 
+#include "qapi/error.h"
 #include "hw/hw.h"
 #include "sysemu/char.h"
 #include "hw/xen/xen_backend.h"
+#include "qapi/error.h"
 
 #include <xen/io/console.h>
 
@@ -43,7 +45,7 @@ struct XenConsole {
     char              console[XEN_BUFSIZE];
     int               ring_ref;
     void              *sring;
-    CharDriverState   *chr;
+    CharBackend       chr;
     int               backlog;
 };
 
@@ -72,7 +74,7 @@ static void buffer_append(struct XenConsole *con)
 
     xen_mb();
     intf->out_cons = cons;
-    xen_be_send_notify(&con->xendev);
+    xen_pv_send_notify(&con->xendev);
 
     if (buffer->max_capacity &&
 	buffer->size > buffer->max_capacity) {
@@ -140,7 +142,7 @@ static void xencons_receive(void *opaque, const uint8_t *buf, int len)
     }
     xen_wmb();
     intf->in_prod = prod;
-    xen_be_send_notify(&con->xendev);
+    xen_pv_send_notify(&con->xendev);
 }
 
 static void xencons_send(struct XenConsole *con)
@@ -148,22 +150,25 @@ static void xencons_send(struct XenConsole *con)
     ssize_t len, size;
 
     size = con->buffer.size - con->buffer.consumed;
-    if (con->chr)
-        len = qemu_chr_fe_write(con->chr, con->buffer.data + con->buffer.consumed,
-                             size);
-    else
+    if (qemu_chr_fe_get_driver(&con->chr)) {
+        len = qemu_chr_fe_write(&con->chr,
+                                con->buffer.data + con->buffer.consumed,
+                                size);
+    } else {
         len = size;
+    }
     if (len < 1) {
-	if (!con->backlog) {
-	    con->backlog = 1;
-	    xen_be_printf(&con->xendev, 1, "backlog piling up, nobody listening?\n");
-	}
+        if (!con->backlog) {
+            con->backlog = 1;
+            xen_pv_printf(&con->xendev, 1,
+                          "backlog piling up, nobody listening?\n");
+        }
     } else {
-	buffer_advance(&con->buffer, len);
-	if (con->backlog && len == size) {
-	    con->backlog = 0;
-	    xen_be_printf(&con->xendev, 1, "backlog is gone\n");
-	}
+        buffer_advance(&con->buffer, len);
+        if (con->backlog && len == size) {
+            con->backlog = 0;
+            xen_pv_printf(&con->xendev, 1, "backlog is gone\n");
+        }
     }
 }
 
@@ -187,7 +192,7 @@ static int con_init(struct XenDevice *xendev)
 
     type = xenstore_read_str(con->console, "type");
     if (!type || strcmp(type, "ioemu") != 0) {
-	xen_be_printf(xendev, 1, "not for me (type=%s)\n", type);
+        xen_pv_printf(xendev, 1, "not for me (type=%s)\n", type);
         ret = -1;
         goto out;
     }
@@ -196,13 +201,18 @@ static int con_init(struct XenDevice *xendev)
 
     /* no Xen override, use qemu output device */
     if (output == NULL) {
-        con->chr = serial_hds[con->xendev.dev];
+        if (con->xendev.dev) {
+            qemu_chr_fe_init(&con->chr, serial_hds[con->xendev.dev],
+                             &error_abort);
+        }
     } else {
         snprintf(label, sizeof(label), "xencons%d", con->xendev.dev);
-        con->chr = qemu_chr_new(label, output, NULL);
+        qemu_chr_fe_init(&con->chr,
+                         qemu_chr_new(label, output), &error_abort);
     }
 
-    xenstore_store_pv_console_info(con->xendev.dev, con->chr);
+    xenstore_store_pv_console_info(con->xendev.dev,
+                                   qemu_chr_fe_get_driver(&con->chr));
 
 out:
     g_free(type);
@@ -235,19 +245,11 @@ static int con_initialise(struct XenDevice *xendev)
 	return -1;
 
     xen_be_bind_evtchn(&con->xendev);
-    if (con->chr) {
-        if (qemu_chr_fe_claim(con->chr) == 0) {
-            qemu_chr_add_handlers(con->chr, xencons_can_receive,
-                                  xencons_receive, NULL, con);
-        } else {
-            xen_be_printf(xendev, 0,
-                          "xen_console_init error chardev %s already used\n",
-                          con->chr->label);
-            con->chr = NULL;
-        }
-    }
+    qemu_chr_fe_set_handlers(&con->chr, xencons_can_receive,
+                             xencons_receive, NULL, con, NULL, true);
 
-    xen_be_printf(xendev, 1, "ring mfn %d, remote port %d, local port %d, limit %zd\n",
+    xen_pv_printf(xendev, 1,
+                  "ring mfn %d, remote port %d, local port %d, limit %zd\n",
 		  con->ring_ref,
 		  con->xendev.remote_port,
 		  con->xendev.local_port,
@@ -259,11 +261,8 @@ static void con_disconnect(struct XenDevice *xendev)
 {
     struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
 
-    if (con->chr) {
-        qemu_chr_add_handlers(con->chr, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(con->chr);
-    }
-    xen_be_unbind_evtchn(&con->xendev);
+    qemu_chr_fe_deinit(&con->chr);
+    xen_pv_unbind_evtchn(&con->xendev);
 
     if (con->sring) {
         if (!xendev->dev) {
diff --git a/hw/char/xilinx_uartlite.c b/hw/char/xilinx_uartlite.c
index 4847efb29f..37d313b429 100644
--- a/hw/char/xilinx_uartlite.c
+++ b/hw/char/xilinx_uartlite.c
@@ -55,7 +55,7 @@ typedef struct XilinxUARTLite {
     SysBusDevice parent_obj;
 
     MemoryRegion mmio;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     uint8_t rx_fifo[8];
@@ -107,7 +107,7 @@ uart_read(void *opaque, hwaddr addr, unsigned int size)
                 s->rx_fifo_len--;
             uart_update_status(s);
             uart_update_irq(s);
-            qemu_chr_accept_input(s->chr);
+            qemu_chr_fe_accept_input(&s->chr);
             break;
 
         default:
@@ -143,9 +143,9 @@ uart_write(void *opaque, hwaddr addr,
             break;
 
         case R_TX:
-            if (s->chr)
-                qemu_chr_fe_write(s->chr, &ch, 1);
-
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->chr, &ch, 1);
             s->regs[addr] = value;
 
             /* hax.  */
@@ -211,8 +211,8 @@ static void xilinx_uartlite_realize(DeviceState *dev, Error **errp)
 {
     XilinxUARTLite *s = XILINX_UARTLITE(dev);
 
-    if (s->chr)
-        qemu_chr_add_handlers(s->chr, uart_can_rx, uart_rx, uart_event, s);
+    qemu_chr_fe_set_handlers(&s->chr, uart_can_rx, uart_rx,
+                             uart_event, s, NULL, true);
 }
 
 static void xilinx_uartlite_init(Object *obj)
diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
index cfd4840397..a4c94e522d 100644
--- a/hw/core/Makefile.objs
+++ b/hw/core/Makefile.objs
@@ -16,4 +16,7 @@ common-obj-$(CONFIG_SOFTMMU) += null-machine.o
 common-obj-$(CONFIG_SOFTMMU) += loader.o
 common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
 common-obj-$(CONFIG_SOFTMMU) += register.o
+common-obj-$(CONFIG_SOFTMMU) += or-irq.o
 common-obj-$(CONFIG_PLATFORM_BUS) += platform-bus.o
+
+obj-$(CONFIG_SOFTMMU) += generic-loader.o
diff --git a/hw/core/bus.c b/hw/core/bus.c
index 3e3f8ac740..cf383fc1af 100644
--- a/hw/core/bus.c
+++ b/hw/core/bus.c
@@ -78,8 +78,7 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
 {
     const char *typename = object_get_typename(OBJECT(bus));
     BusClass *bc;
-    char *buf;
-    int i, len, bus_id;
+    int i, bus_id;
 
     bus->parent = parent;
 
@@ -88,23 +87,15 @@ static void qbus_realize(BusState *bus, DeviceState *parent, const char *name)
     } else if (bus->parent && bus->parent->id) {
         /* parent device has id -> use it plus parent-bus-id for bus name */
         bus_id = bus->parent->num_child_bus;
-
-        len = strlen(bus->parent->id) + 16;
-        buf = g_malloc(len);
-        snprintf(buf, len, "%s.%d", bus->parent->id, bus_id);
-        bus->name = buf;
+        bus->name = g_strdup_printf("%s.%d", bus->parent->id, bus_id);
     } else {
         /* no id -> use lowercase bus type plus global bus-id for bus name */
         bc = BUS_GET_CLASS(bus);
         bus_id = bc->automatic_ids++;
-
-        len = strlen(typename) + 16;
-        buf = g_malloc(len);
-        len = snprintf(buf, len, "%s.%d", typename, bus_id);
-        for (i = 0; i < len; i++) {
-            buf[i] = qemu_tolower(buf[i]);
+        bus->name = g_strdup_printf("%s.%d", typename, bus_id);
+        for (i = 0; bus->name[i]; i++) {
+            bus->name[i] = qemu_tolower(bus->name[i]);
         }
-        bus->name = buf;
     }
 
     if (bus->parent) {
@@ -229,7 +220,7 @@ static void qbus_finalize(Object *obj)
 {
     BusState *bus = BUS(obj);
 
-    g_free((char *)bus->name);
+    g_free(bus->name);
 }
 
 static const TypeInfo bus_info = {
diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
new file mode 100644
index 0000000000..208f549dff
--- /dev/null
+++ b/hw/core/generic-loader.c
@@ -0,0 +1,216 @@
+/*
+ * Generic Loader
+ *
+ * Copyright (C) 2014 Li Guang
+ * Copyright (C) 2016 Xilinx Inc.
+ * Written by Li Guang <lig.fnst@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ */
+
+/*
+ * Internally inside QEMU this is a device. It is a strange device that
+ * provides no hardware interface but allows QEMU to monkey patch memory
+ * specified when it is created. To be able to do this it has a reset
+ * callback that does the memory operations.
+
+ * This device allows the user to monkey patch memory. To be able to do
+ * this it needs a backend to manage the datas, the same as other
+ * memory-related devices. In this case as the backend is so trivial we
+ * have merged it with the frontend instead of creating and maintaining a
+ * seperate backend.
+ */
+
+#include "qemu/osdep.h"
+#include "qom/cpu.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/loader.h"
+#include "qapi/error.h"
+#include "hw/core/generic-loader.h"
+
+#define CPU_NONE 0xFFFFFFFF
+
+static void generic_loader_reset(void *opaque)
+{
+    GenericLoaderState *s = GENERIC_LOADER(opaque);
+
+    if (s->set_pc) {
+        CPUClass *cc = CPU_GET_CLASS(s->cpu);
+        cpu_reset(s->cpu);
+        if (cc) {
+            cc->set_pc(s->cpu, s->addr);
+        }
+    }
+
+    if (s->data_len) {
+        assert(s->data_len < sizeof(s->data));
+        dma_memory_write(s->cpu->as, s->addr, &s->data, s->data_len);
+    }
+}
+
+static void generic_loader_realize(DeviceState *dev, Error **errp)
+{
+    GenericLoaderState *s = GENERIC_LOADER(dev);
+    hwaddr entry;
+    int big_endian;
+    int size = 0;
+
+    s->set_pc = false;
+
+    /* Perform some error checking on the user's options */
+    if (s->data || s->data_len  || s->data_be) {
+        /* User is loading memory values */
+        if (s->file) {
+            error_setg(errp, "Specifying a file is not supported when loading "
+                       "memory values");
+            return;
+        } else if (s->force_raw) {
+            error_setg(errp, "Specifying force-raw is not supported when "
+                       "loading memory values");
+            return;
+        } else if (!s->data_len) {
+            /* We cant' check for !data here as a value of 0 is still valid. */
+            error_setg(errp, "Both data and data-len must be specified");
+            return;
+        } else if (s->data_len > 8) {
+            error_setg(errp, "data-len cannot be greater then 8 bytes");
+            return;
+        }
+    } else if (s->file || s->force_raw)  {
+        /* User is loading an image */
+        if (s->data || s->data_len || s->data_be) {
+            error_setg(errp, "data can not be specified when loading an "
+                       "image");
+            return;
+        }
+        /* The user specified a file, only set the PC if they also specified
+         * a CPU to use.
+         */
+        if (s->cpu_num != CPU_NONE) {
+            s->set_pc = true;
+        }
+    } else if (s->addr) {
+        /* User is setting the PC */
+        if (s->data || s->data_len || s->data_be) {
+            error_setg(errp, "data can not be specified when setting a "
+                       "program counter");
+            return;
+        } else if (!s->cpu_num) {
+            error_setg(errp, "cpu_num must be specified when setting a "
+                       "program counter");
+            return;
+        }
+        s->set_pc = true;
+    } else {
+        /* Did the user specify anything? */
+        error_setg(errp, "please include valid arguments");
+        return;
+    }
+
+    qemu_register_reset(generic_loader_reset, dev);
+
+    if (s->cpu_num != CPU_NONE) {
+        s->cpu = qemu_get_cpu(s->cpu_num);
+        if (!s->cpu) {
+            error_setg(errp, "Specified boot CPU#%d is nonexistent",
+                       s->cpu_num);
+            return;
+        }
+    } else {
+        s->cpu = first_cpu;
+    }
+
+#ifdef TARGET_WORDS_BIGENDIAN
+    big_endian = 1;
+#else
+    big_endian = 0;
+#endif
+
+    if (s->file) {
+        if (!s->force_raw) {
+            size = load_elf_as(s->file, NULL, NULL, &entry, NULL, NULL,
+                               big_endian, 0, 0, 0, s->cpu->as);
+
+            if (size < 0) {
+                size = load_uimage_as(s->file, &entry, NULL, NULL, NULL, NULL,
+                                      s->cpu->as);
+            }
+        }
+
+        if (size < 0 || s->force_raw) {
+            /* Default to the maximum size being the machine's ram size */
+            size = load_image_targphys_as(s->file, s->addr, ram_size,
+                                          s->cpu->as);
+        } else {
+            s->addr = entry;
+        }
+
+        if (size < 0) {
+            error_setg(errp, "Cannot load specified image %s", s->file);
+            return;
+        }
+    }
+
+    /* Convert the data endiannes */
+    if (s->data_be) {
+        s->data = cpu_to_be64(s->data);
+    } else {
+        s->data = cpu_to_le64(s->data);
+    }
+}
+
+static void generic_loader_unrealize(DeviceState *dev, Error **errp)
+{
+    qemu_unregister_reset(generic_loader_reset, dev);
+}
+
+static Property generic_loader_props[] = {
+    DEFINE_PROP_UINT64("addr", GenericLoaderState, addr, 0),
+    DEFINE_PROP_UINT64("data", GenericLoaderState, data, 0),
+    DEFINE_PROP_UINT8("data-len", GenericLoaderState, data_len, 0),
+    DEFINE_PROP_BOOL("data-be", GenericLoaderState, data_be, false),
+    DEFINE_PROP_UINT32("cpu-num", GenericLoaderState, cpu_num, CPU_NONE),
+    DEFINE_PROP_BOOL("force-raw", GenericLoaderState, force_raw, false),
+    DEFINE_PROP_STRING("file", GenericLoaderState, file),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void generic_loader_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    /* The reset function is not registered here and is instead registered in
+     * the realize function to allow this device to be added via the device_add
+     * command in the QEMU monitor.
+     * TODO: Improve the device_add functionality to allow resets to be
+     * connected
+     */
+    dc->realize = generic_loader_realize;
+    dc->unrealize = generic_loader_unrealize;
+    dc->props = generic_loader_props;
+    dc->desc = "Generic Loader";
+}
+
+static TypeInfo generic_loader_info = {
+    .name = TYPE_GENERIC_LOADER,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(GenericLoaderState),
+    .class_init = generic_loader_class_init,
+};
+
+static void generic_loader_register_type(void)
+{
+    type_register_static(&generic_loader_info);
+}
+
+type_init(generic_loader_register_type)
diff --git a/hw/core/loader.c b/hw/core/loader.c
index 751fa30816..52ad97c0a9 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -140,9 +140,15 @@ ssize_t read_targphys(const char *name,
     return did;
 }
 
-/* return the size or -1 if error */
 int load_image_targphys(const char *filename,
                         hwaddr addr, uint64_t max_sz)
+{
+    return load_image_targphys_as(filename, addr, max_sz, NULL);
+}
+
+/* return the size or -1 if error */
+int load_image_targphys_as(const char *filename,
+                           hwaddr addr, uint64_t max_sz, AddressSpace *as)
 {
     int size;
 
@@ -151,7 +157,7 @@ int load_image_targphys(const char *filename,
         return -1;
     }
     if (size > 0) {
-        rom_add_file_fixed(filename, addr, -1);
+        rom_add_file_fixed_as(filename, addr, -1, as);
     }
     return size;
 }
@@ -423,6 +429,18 @@ int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
              void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
              uint64_t *highaddr, int big_endian, int elf_machine,
              int clear_lsb, int data_swab)
+{
+    return load_elf_as(filename, translate_fn, translate_opaque, pentry,
+                       lowaddr, highaddr, big_endian, elf_machine, clear_lsb,
+                       data_swab, NULL);
+}
+
+/* return < 0 if error, otherwise the number of bytes loaded in memory */
+int load_elf_as(const char *filename,
+                uint64_t (*translate_fn)(void *, uint64_t),
+                void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                uint64_t *highaddr, int big_endian, int elf_machine,
+                int clear_lsb, int data_swab, AddressSpace *as)
 {
     int fd, data_order, target_data_order, must_swab, ret = ELF_LOAD_FAILED;
     uint8_t e_ident[EI_NIDENT];
@@ -462,11 +480,11 @@ int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
     if (e_ident[EI_CLASS] == ELFCLASS64) {
         ret = load_elf64(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab);
+                         data_swab, as);
     } else {
         ret = load_elf32(filename, fd, translate_fn, translate_opaque, must_swab,
                          pentry, lowaddr, highaddr, elf_machine, clear_lsb,
-                         data_swab);
+                         data_swab, as);
     }
 
  fail:
@@ -576,7 +594,7 @@ static ssize_t gunzip(void *dst, size_t dstlen, uint8_t *src,
 static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr,
                             int *is_linux, uint8_t image_type,
                             uint64_t (*translate_fn)(void *, uint64_t),
-                            void *translate_opaque)
+                            void *translate_opaque, AddressSpace *as)
 {
     int fd;
     int size;
@@ -677,7 +695,7 @@ static int load_uboot_image(const char *filename, hwaddr *ep, hwaddr *loadaddr,
         hdr->ih_size = bytes;
     }
 
-    rom_add_blob_fixed(filename, data, hdr->ih_size, address);
+    rom_add_blob_fixed_as(filename, data, hdr->ih_size, address, as);
 
     ret = hdr->ih_size;
 
@@ -693,14 +711,23 @@ int load_uimage(const char *filename, hwaddr *ep, hwaddr *loadaddr,
                 void *translate_opaque)
 {
     return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
-                            translate_fn, translate_opaque);
+                            translate_fn, translate_opaque, NULL);
+}
+
+int load_uimage_as(const char *filename, hwaddr *ep, hwaddr *loadaddr,
+                   int *is_linux,
+                   uint64_t (*translate_fn)(void *, uint64_t),
+                   void *translate_opaque, AddressSpace *as)
+{
+    return load_uboot_image(filename, ep, loadaddr, is_linux, IH_TYPE_KERNEL,
+                            translate_fn, translate_opaque, as);
 }
 
 /* Load a ramdisk.  */
 int load_ramdisk(const char *filename, hwaddr addr, uint64_t max_sz)
 {
     return load_uboot_image(filename, NULL, &addr, NULL, IH_TYPE_RAMDISK,
-                            NULL, NULL);
+                            NULL, NULL, NULL);
 }
 
 /* Load a gzip-compressed kernel to a dynamically allocated buffer. */
@@ -784,6 +811,7 @@ struct Rom {
 
     uint8_t *data;
     MemoryRegion *mr;
+    AddressSpace *as;
     int isrom;
     char *fw_dir;
     char *fw_file;
@@ -795,6 +823,12 @@ struct Rom {
 static FWCfgState *fw_cfg;
 static QTAILQ_HEAD(, Rom) roms = QTAILQ_HEAD_INITIALIZER(roms);
 
+static inline bool rom_order_compare(Rom *rom, Rom *item)
+{
+    return ((uintptr_t)(void *)rom->as > (uintptr_t)(void *)item->as) ||
+           (rom->as == item->as && rom->addr >= item->addr);
+}
+
 static void rom_insert(Rom *rom)
 {
     Rom *item;
@@ -803,10 +837,16 @@ static void rom_insert(Rom *rom)
         hw_error ("ROM images must be loaded at startup\n");
     }
 
-    /* list is ordered by load address */
+    /* The user didn't specify an address space, this is the default */
+    if (!rom->as) {
+        rom->as = &address_space_memory;
+    }
+
+    /* List is ordered by load address in the same address space */
     QTAILQ_FOREACH(item, &roms, next) {
-        if (rom->addr >= item->addr)
+        if (rom_order_compare(rom, item)) {
             continue;
+        }
         QTAILQ_INSERT_BEFORE(item, rom, next);
         return;
     }
@@ -840,16 +880,25 @@ static void *rom_set_mr(Rom *rom, Object *owner, const char *name)
 
 int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex,
-                 bool option_rom, MemoryRegion *mr)
+                 bool option_rom, MemoryRegion *mr,
+                 AddressSpace *as)
 {
     MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
     Rom *rom;
     int rc, fd = -1;
     char devpath[100];
 
+    if (as && mr) {
+        fprintf(stderr, "Specifying an Address Space and Memory Region is " \
+                "not valid when loading a rom\n");
+        /* We haven't allocated anything so we don't need any cleanup */
+        return -1;
+    }
+
     rom = g_malloc0(sizeof(*rom));
     rom->name = g_strdup(file);
     rom->path = qemu_find_file(QEMU_FILE_TYPE_BIOS, rom->name);
+    rom->as = as;
     if (rom->path == NULL) {
         rom->path = g_strdup(file);
     }
@@ -936,7 +985,8 @@ int rom_add_file(const char *file, const char *fw_dir,
 
 MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
                    size_t max_len, hwaddr addr, const char *fw_file_name,
-                   FWCfgReadCallback fw_callback, void *callback_opaque)
+                   FWCfgReadCallback fw_callback, void *callback_opaque,
+                   AddressSpace *as)
 {
     MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
     Rom *rom;
@@ -944,6 +994,7 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
 
     rom           = g_malloc0(sizeof(*rom));
     rom->name     = g_strdup(name);
+    rom->as       = as;
     rom->addr     = addr;
     rom->romsize  = max_len ? max_len : len;
     rom->datasize = len;
@@ -976,7 +1027,7 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
  * memory ownership of "data", so we don't have to allocate and copy the buffer.
  */
 int rom_add_elf_program(const char *name, void *data, size_t datasize,
-                        size_t romsize, hwaddr addr)
+                        size_t romsize, hwaddr addr, AddressSpace *as)
 {
     Rom *rom;
 
@@ -992,18 +1043,19 @@ int rom_add_elf_program(const char *name, void *data, size_t datasize,
     rom->datasize = datasize;
     rom->romsize  = romsize;
     rom->data     = data;
+    rom->as       = as;
     rom_insert(rom);
     return 0;
 }
 
 int rom_add_vga(const char *file)
 {
-    return rom_add_file(file, "vgaroms", 0, -1, true, NULL);
+    return rom_add_file(file, "vgaroms", 0, -1, true, NULL, NULL);
 }
 
 int rom_add_option(const char *file, int32_t bootindex)
 {
-    return rom_add_file(file, "genroms", 0, bootindex, true, NULL);
+    return rom_add_file(file, "genroms", 0, bootindex, true, NULL, NULL);
 }
 
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
@@ -1029,8 +1081,8 @@ static void rom_reset(void *unused)
             void *host = memory_region_get_ram_ptr(rom->mr);
             memcpy(host, rom->data, rom->datasize);
         } else {
-            cpu_physical_memory_write_rom(&address_space_memory,
-                                          rom->addr, rom->data, rom->datasize);
+            cpu_physical_memory_write_rom(rom->as, rom->addr, rom->data,
+                                          rom->datasize);
         }
         if (rom->isrom) {
             /* rom needs to be written only once */
@@ -1052,12 +1104,13 @@ int rom_check_and_register_reset(void)
     hwaddr addr = 0;
     MemoryRegionSection section;
     Rom *rom;
+    AddressSpace *as = NULL;
 
     QTAILQ_FOREACH(rom, &roms, next) {
         if (rom->fw_file) {
             continue;
         }
-        if (addr > rom->addr) {
+        if ((addr > rom->addr) && (as == rom->as)) {
             fprintf(stderr, "rom: requested regions overlap "
                     "(rom %s. free=0x" TARGET_FMT_plx
                     ", addr=0x" TARGET_FMT_plx ")\n",
@@ -1066,9 +1119,11 @@ int rom_check_and_register_reset(void)
         }
         addr  = rom->addr;
         addr += rom->romsize;
-        section = memory_region_find(get_system_memory(), rom->addr, 1);
+        section = memory_region_find(rom->mr ? rom->mr : get_system_memory(),
+                                     rom->addr, 1);
         rom->isrom = int128_nz(section.size) && memory_region_is_rom(section.mr);
         memory_region_unref(section.mr);
+        as = rom->as;
     }
     qemu_register_reset(rom_reset, NULL);
     roms_loaded = 1;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 8b5d44567b..ecc3a14438 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -351,7 +351,7 @@ static bool machine_get_enforce_config_section(Object *obj, Error **errp)
     return ms->enforce_config_section;
 }
 
-static int error_on_sysbus_device(SysBusDevice *sbdev, void *opaque)
+static void error_on_sysbus_device(SysBusDevice *sbdev, void *opaque)
 {
     error_report("Option '-device %s' cannot be handled by this machine",
                  object_class_get_name(object_get_class(OBJECT(sbdev))));
@@ -383,6 +383,104 @@ static void machine_class_init(ObjectClass *oc, void *data)
     /* Default 128 MB as guest ram size */
     mc->default_ram_size = 128 * M_BYTE;
     mc->rom_file_has_mr = true;
+
+    object_class_property_add_str(oc, "accel",
+        machine_get_accel, machine_set_accel, &error_abort);
+    object_class_property_set_description(oc, "accel",
+        "Accelerator list", &error_abort);
+
+    object_class_property_add(oc, "kernel-irqchip", "OnOffSplit",
+        NULL, machine_set_kernel_irqchip,
+        NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, "kernel-irqchip",
+        "Configure KVM in-kernel irqchip", &error_abort);
+
+    object_class_property_add(oc, "kvm-shadow-mem", "int",
+        machine_get_kvm_shadow_mem, machine_set_kvm_shadow_mem,
+        NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, "kvm-shadow-mem",
+        "KVM shadow MMU size", &error_abort);
+
+    object_class_property_add_str(oc, "kernel",
+        machine_get_kernel, machine_set_kernel, &error_abort);
+    object_class_property_set_description(oc, "kernel",
+        "Linux kernel image file", &error_abort);
+
+    object_class_property_add_str(oc, "initrd",
+        machine_get_initrd, machine_set_initrd, &error_abort);
+    object_class_property_set_description(oc, "initrd",
+        "Linux initial ramdisk file", &error_abort);
+
+    object_class_property_add_str(oc, "append",
+        machine_get_append, machine_set_append, &error_abort);
+    object_class_property_set_description(oc, "append",
+        "Linux kernel command line", &error_abort);
+
+    object_class_property_add_str(oc, "dtb",
+        machine_get_dtb, machine_set_dtb, &error_abort);
+    object_class_property_set_description(oc, "dtb",
+        "Linux kernel device tree file", &error_abort);
+
+    object_class_property_add_str(oc, "dumpdtb",
+        machine_get_dumpdtb, machine_set_dumpdtb, &error_abort);
+    object_class_property_set_description(oc, "dumpdtb",
+        "Dump current dtb to a file and quit", &error_abort);
+
+    object_class_property_add(oc, "phandle-start", "int",
+        machine_get_phandle_start, machine_set_phandle_start,
+        NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, "phandle-start",
+            "The first phandle ID we may generate dynamically", &error_abort);
+
+    object_class_property_add_str(oc, "dt-compatible",
+        machine_get_dt_compatible, machine_set_dt_compatible, &error_abort);
+    object_class_property_set_description(oc, "dt-compatible",
+        "Overrides the \"compatible\" property of the dt root node",
+        &error_abort);
+
+    object_class_property_add_bool(oc, "dump-guest-core",
+        machine_get_dump_guest_core, machine_set_dump_guest_core, &error_abort);
+    object_class_property_set_description(oc, "dump-guest-core",
+        "Include guest memory in  a core dump", &error_abort);
+
+    object_class_property_add_bool(oc, "mem-merge",
+        machine_get_mem_merge, machine_set_mem_merge, &error_abort);
+    object_class_property_set_description(oc, "mem-merge",
+        "Enable/disable memory merge support", &error_abort);
+
+    object_class_property_add_bool(oc, "usb",
+        machine_get_usb, machine_set_usb, &error_abort);
+    object_class_property_set_description(oc, "usb",
+        "Set on/off to enable/disable usb", &error_abort);
+
+    object_class_property_add_bool(oc, "graphics",
+        machine_get_graphics, machine_set_graphics, &error_abort);
+    object_class_property_set_description(oc, "graphics",
+        "Set on/off to enable/disable graphics emulation", &error_abort);
+
+    object_class_property_add_bool(oc, "igd-passthru",
+        machine_get_igd_gfx_passthru, machine_set_igd_gfx_passthru,
+        &error_abort);
+    object_class_property_set_description(oc, "igd-passthru",
+        "Set on/off to enable/disable igd passthrou", &error_abort);
+
+    object_class_property_add_str(oc, "firmware",
+        machine_get_firmware, machine_set_firmware,
+        &error_abort);
+    object_class_property_set_description(oc, "firmware",
+        "Firmware image", &error_abort);
+
+    object_class_property_add_bool(oc, "suppress-vmdesc",
+        machine_get_suppress_vmdesc, machine_set_suppress_vmdesc,
+        &error_abort);
+    object_class_property_set_description(oc, "suppress-vmdesc",
+        "Set on to disable self-describing migration", &error_abort);
+
+    object_class_property_add_bool(oc, "enforce-config-section",
+        machine_get_enforce_config_section, machine_set_enforce_config_section,
+        &error_abort);
+    object_class_property_set_description(oc, "enforce-config-section",
+        "Set on to enforce configuration section migration", &error_abort);
 }
 
 static void machine_class_base_init(ObjectClass *oc, void *data)
@@ -406,41 +504,6 @@ static void machine_initfn(Object *obj)
     ms->mem_merge = true;
     ms->enable_graphics = true;
 
-    object_property_add_str(obj, "accel",
-                            machine_get_accel, machine_set_accel, NULL);
-    object_property_set_description(obj, "accel",
-                                    "Accelerator list",
-                                    NULL);
-    object_property_add(obj, "kernel-irqchip", "OnOffSplit",
-                        NULL,
-                        machine_set_kernel_irqchip,
-                        NULL, NULL, NULL);
-    object_property_set_description(obj, "kernel-irqchip",
-                                    "Configure KVM in-kernel irqchip",
-                                    NULL);
-    object_property_add(obj, "kvm-shadow-mem", "int",
-                        machine_get_kvm_shadow_mem,
-                        machine_set_kvm_shadow_mem,
-                        NULL, NULL, NULL);
-    object_property_set_description(obj, "kvm-shadow-mem",
-                                    "KVM shadow MMU size",
-                                    NULL);
-    object_property_add_str(obj, "kernel",
-                            machine_get_kernel, machine_set_kernel, NULL);
-    object_property_set_description(obj, "kernel",
-                                    "Linux kernel image file",
-                                    NULL);
-    object_property_add_str(obj, "initrd",
-                            machine_get_initrd, machine_set_initrd, NULL);
-    object_property_set_description(obj, "initrd",
-                                    "Linux initial ramdisk file",
-                                    NULL);
-    object_property_add_str(obj, "append",
-                            machine_get_append, machine_set_append, NULL);
-    object_property_set_description(obj, "append",
-                                    "Linux kernel command line",
-                                    NULL);
-
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
     object_property_add_str(obj, "image",
                             machine_get_image, machine_set_image, NULL);
@@ -449,80 +512,6 @@ static void machine_initfn(Object *obj)
                                     NULL);
 #endif /* defined(CONFIG_GNU_ARM_ECLIPSE) */
 
-    object_property_add_str(obj, "dtb",
-                            machine_get_dtb, machine_set_dtb, NULL);
-    object_property_set_description(obj, "dtb",
-                                    "Linux kernel device tree file",
-                                    NULL);
-    object_property_add_str(obj, "dumpdtb",
-                            machine_get_dumpdtb, machine_set_dumpdtb, NULL);
-    object_property_set_description(obj, "dumpdtb",
-                                    "Dump current dtb to a file and quit",
-                                    NULL);
-    object_property_add(obj, "phandle-start", "int",
-                        machine_get_phandle_start,
-                        machine_set_phandle_start,
-                        NULL, NULL, NULL);
-    object_property_set_description(obj, "phandle-start",
-                                    "The first phandle ID we may generate dynamically",
-                                    NULL);
-    object_property_add_str(obj, "dt-compatible",
-                            machine_get_dt_compatible,
-                            machine_set_dt_compatible,
-                            NULL);
-    object_property_set_description(obj, "dt-compatible",
-                                    "Overrides the \"compatible\" property of the dt root node",
-                                    NULL);
-    object_property_add_bool(obj, "dump-guest-core",
-                             machine_get_dump_guest_core,
-                             machine_set_dump_guest_core,
-                             NULL);
-    object_property_set_description(obj, "dump-guest-core",
-                                    "Include guest memory in  a core dump",
-                                    NULL);
-    object_property_add_bool(obj, "mem-merge",
-                             machine_get_mem_merge,
-                             machine_set_mem_merge, NULL);
-    object_property_set_description(obj, "mem-merge",
-                                    "Enable/disable memory merge support",
-                                    NULL);
-    object_property_add_bool(obj, "usb",
-                             machine_get_usb,
-                             machine_set_usb, NULL);
-    object_property_set_description(obj, "usb",
-                                    "Set on/off to enable/disable usb",
-                                    NULL);
-    object_property_add_bool(obj, "graphics",
-                             machine_get_graphics,
-                             machine_set_graphics, NULL);
-    object_property_set_description(obj, "graphics",
-                                    "Set on/off to enable/disable graphics emulation",
-                                    NULL);
-    object_property_add_bool(obj, "igd-passthru",
-                             machine_get_igd_gfx_passthru,
-                             machine_set_igd_gfx_passthru, NULL);
-    object_property_set_description(obj, "igd-passthru",
-                                    "Set on/off to enable/disable igd passthrou",
-                                    NULL);
-    object_property_add_str(obj, "firmware",
-                            machine_get_firmware,
-                            machine_set_firmware, NULL);
-    object_property_set_description(obj, "firmware",
-                                    "Firmware image",
-                                    NULL);
-    object_property_add_bool(obj, "suppress-vmdesc",
-                             machine_get_suppress_vmdesc,
-                             machine_set_suppress_vmdesc, NULL);
-    object_property_set_description(obj, "suppress-vmdesc",
-                                    "Set on to disable self-describing migration",
-                                    NULL);
-    object_property_add_bool(obj, "enforce-config-section",
-                             machine_get_enforce_config_section,
-                             machine_set_enforce_config_section, NULL);
-    object_property_set_description(obj, "enforce-config-section",
-                                    "Set on to enforce configuration section migration",
-                                    NULL);
-
     /* Register notifier when init is done for sysbus sanity checks */
     ms->sysbus_notifier.notify = machine_init_notify;
     qemu_add_machine_init_done_notifier(&ms->sysbus_notifier);
@@ -594,6 +583,7 @@ static void machine_class_finalize(ObjectClass *klass, void *data)
     if (mc->compat_props) {
         g_array_free(mc->compat_props, true);
     }
+    g_free(mc->name);
 }
 
 void machine_register_compat_props(MachineState *machine)
diff --git a/hw/core/or-irq.c b/hw/core/or-irq.c
new file mode 100644
index 0000000000..1ac090d1a4
--- /dev/null
+++ b/hw/core/or-irq.c
@@ -0,0 +1,107 @@
+/*
+ * QEMU IRQ/GPIO common code.
+ *
+ * Copyright (c) 2016 Alistair Francis <alistair@alistair23.me>.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/or-irq.h"
+
+static void or_irq_handler(void *opaque, int n, int level)
+{
+    qemu_or_irq *s = OR_IRQ(opaque);
+    int or_level = 0;
+    int i;
+
+    s->levels[n] = level;
+
+    for (i = 0; i < s->num_lines; i++) {
+        or_level |= s->levels[i];
+    }
+
+    qemu_set_irq(s->out_irq, or_level);
+}
+
+static void or_irq_reset(DeviceState *dev)
+{
+    qemu_or_irq *s = OR_IRQ(dev);
+    int i;
+
+    for (i = 0; i < MAX_OR_LINES; i++) {
+        s->levels[i] = false;
+    }
+}
+
+static void or_irq_realize(DeviceState *dev, Error **errp)
+{
+    qemu_or_irq *s = OR_IRQ(dev);
+
+    assert(s->num_lines < MAX_OR_LINES);
+
+    qdev_init_gpio_in(dev, or_irq_handler, s->num_lines);
+}
+
+static void or_irq_init(Object *obj)
+{
+    qemu_or_irq *s = OR_IRQ(obj);
+
+    qdev_init_gpio_out(DEVICE(obj), &s->out_irq, 1);
+}
+
+static const VMStateDescription vmstate_or_irq = {
+    .name = TYPE_OR_IRQ,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL_ARRAY(levels, qemu_or_irq, MAX_OR_LINES),
+        VMSTATE_END_OF_LIST(),
+    }
+};
+
+static Property or_irq_properties[] = {
+    DEFINE_PROP_UINT16("num-lines", qemu_or_irq, num_lines, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void or_irq_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = or_irq_reset;
+    dc->props = or_irq_properties;
+    dc->realize = or_irq_realize;
+    dc->vmsd = &vmstate_or_irq;
+}
+
+static const TypeInfo or_irq_type_info = {
+   .name = TYPE_OR_IRQ,
+   .parent = TYPE_DEVICE,
+   .instance_size = sizeof(qemu_or_irq),
+   .instance_init = or_irq_init,
+   .class_init = or_irq_class_init,
+};
+
+static void or_irq_register_types(void)
+{
+    type_register_static(&or_irq_type_info);
+}
+
+type_init(or_irq_register_types)
diff --git a/hw/core/platform-bus.c b/hw/core/platform-bus.c
index 36f84ab72f..329ac670c0 100644
--- a/hw/core/platform-bus.c
+++ b/hw/core/platform-bus.c
@@ -74,7 +74,7 @@ hwaddr platform_bus_get_mmio_addr(PlatformBusDevice *pbus, SysBusDevice *sbdev,
     return object_property_get_int(OBJECT(sbdev_mr), "addr", NULL);
 }
 
-static int platform_bus_count_irqs(SysBusDevice *sbdev, void *opaque)
+static void platform_bus_count_irqs(SysBusDevice *sbdev, void *opaque)
 {
     PlatformBusDevice *pbus = opaque;
     qemu_irq sbirq;
@@ -93,8 +93,6 @@ static int platform_bus_count_irqs(SysBusDevice *sbdev, void *opaque)
             }
         }
     }
-
-    return 0;
 }
 
 /*
@@ -168,7 +166,7 @@ static void platform_bus_map_mmio(PlatformBusDevice *pbus, SysBusDevice *sbdev,
  * For each sysbus device, look for unassigned IRQ lines as well as
  * unassociated MMIO regions. Connect them to the platform bus if available.
  */
-static int link_sysbus_device(SysBusDevice *sbdev, void *opaque)
+static void link_sysbus_device(SysBusDevice *sbdev, void *opaque)
 {
     PlatformBusDevice *pbus = opaque;
     int i;
@@ -180,8 +178,6 @@ static int link_sysbus_device(SysBusDevice *sbdev, void *opaque)
     for (i = 0; sysbus_has_mmio(sbdev, i); i++) {
         platform_bus_map_mmio(pbus, sbdev, i);
     }
-
-    return 0;
 }
 
 static void platform_bus_init_notify(Notifier *notifier, void *data)
diff --git a/hw/core/ptimer.c b/hw/core/ptimer.c
index 30829ee97b..3af82afe78 100644
--- a/hw/core/ptimer.c
+++ b/hw/core/ptimer.c
@@ -11,6 +11,10 @@
 #include "hw/ptimer.h"
 #include "qemu/host-utils.h"
 #include "sysemu/replay.h"
+#include "sysemu/qtest.h"
+
+#define DELTA_ADJUST     1
+#define DELTA_NO_ADJUST -1
 
 struct ptimer_state
 {
@@ -21,6 +25,7 @@ struct ptimer_state
     int64_t period;
     int64_t last_event;
     int64_t next_event;
+    uint8_t policy_mask;
     QEMUBH *bh;
     QEMUTimer *timer;
 };
@@ -33,17 +38,58 @@ static void ptimer_trigger(ptimer_state *s)
     }
 }
 
-static void ptimer_reload(ptimer_state *s)
+static void ptimer_reload(ptimer_state *s, int delta_adjust)
 {
     uint32_t period_frac = s->period_frac;
     uint64_t period = s->period;
+    uint64_t delta = s->delta;
 
-    if (s->delta == 0) {
+    if (delta == 0 && !(s->policy_mask & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER)) {
         ptimer_trigger(s);
-        s->delta = s->limit;
     }
-    if (s->delta == 0 || s->period == 0) {
-        fprintf(stderr, "Timer with period zero, disabling\n");
+
+    if (delta == 0 && !(s->policy_mask & PTIMER_POLICY_NO_IMMEDIATE_RELOAD)) {
+        delta = s->delta = s->limit;
+    }
+
+    if (s->period == 0) {
+        if (!qtest_enabled()) {
+            fprintf(stderr, "Timer with period zero, disabling\n");
+        }
+        timer_del(s->timer);
+        s->enabled = 0;
+        return;
+    }
+
+    if (s->policy_mask & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD) {
+        if (delta_adjust != DELTA_NO_ADJUST) {
+            delta += delta_adjust;
+        }
+    }
+
+    if (delta == 0 && (s->policy_mask & PTIMER_POLICY_CONTINUOUS_TRIGGER)) {
+        if (s->enabled == 1 && s->limit == 0) {
+            delta = 1;
+        }
+    }
+
+    if (delta == 0 && (s->policy_mask & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER)) {
+        if (delta_adjust != DELTA_NO_ADJUST) {
+            delta = 1;
+        }
+    }
+
+    if (delta == 0 && (s->policy_mask & PTIMER_POLICY_NO_IMMEDIATE_RELOAD)) {
+        if (s->enabled == 1 && s->limit != 0) {
+            delta = 1;
+        }
+    }
+
+    if (delta == 0) {
+        if (!qtest_enabled()) {
+            fprintf(stderr, "Timer with delta zero, disabling\n");
+        }
+        timer_del(s->timer);
         s->enabled = 0;
         return;
     }
@@ -57,15 +103,15 @@ static void ptimer_reload(ptimer_state *s)
      * on the current generation of host machines.
      */
 
-    if (s->enabled == 1 && (s->delta * period < 10000) && !use_icount) {
-        period = 10000 / s->delta;
+    if (s->enabled == 1 && (delta * period < 10000) && !use_icount) {
+        period = 10000 / delta;
         period_frac = 0;
     }
 
     s->last_event = s->next_event;
-    s->next_event = s->last_event + s->delta * period;
+    s->next_event = s->last_event + delta * period;
     if (period_frac) {
-        s->next_event += ((int64_t)period_frac * s->delta) >> 32;
+        s->next_event += ((int64_t)period_frac * delta) >> 32;
     }
     timer_mod(s->timer, s->next_event);
 }
@@ -73,12 +119,35 @@ static void ptimer_reload(ptimer_state *s)
 static void ptimer_tick(void *opaque)
 {
     ptimer_state *s = (ptimer_state *)opaque;
-    ptimer_trigger(s);
-    s->delta = 0;
+    bool trigger = true;
+
     if (s->enabled == 2) {
+        s->delta = 0;
         s->enabled = 0;
     } else {
-        ptimer_reload(s);
+        int delta_adjust = DELTA_ADJUST;
+
+        if (s->delta == 0 || s->limit == 0) {
+            /* If a "continuous trigger" policy is not used and limit == 0,
+               we should error out. delta == 0 means that this tick is
+               caused by a "no immediate reload" policy, so it shouldn't
+               be adjusted.  */
+            delta_adjust = DELTA_NO_ADJUST;
+        }
+
+        if (!(s->policy_mask & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER)) {
+            /* Avoid re-trigger on deferred reload if "no immediate trigger"
+               policy isn't used.  */
+            trigger = (delta_adjust == DELTA_ADJUST);
+        }
+
+        s->delta = s->limit;
+
+        ptimer_reload(s, delta_adjust);
+    }
+
+    if (trigger) {
+        ptimer_trigger(s);
     }
 }
 
@@ -86,9 +155,10 @@ uint64_t ptimer_get_count(ptimer_state *s)
 {
     uint64_t counter;
 
-    if (s->enabled) {
+    if (s->enabled && s->delta != 0) {
         int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
         int64_t next = s->next_event;
+        int64_t last = s->last_event;
         bool expired = (now - next >= 0);
         bool oneshot = (s->enabled == 2);
 
@@ -113,7 +183,7 @@ uint64_t ptimer_get_count(ptimer_state *s)
             /* We need to divide time by period, where time is stored in
                rem (64-bit integer) and period is stored in period/period_frac
                (64.32 fixed point).
-              
+
                Doing full precision division is hard, so scale values and
                do a 64-bit division.  The result should be rounded down,
                so that the rounding error never causes the timer to go
@@ -140,6 +210,35 @@ uint64_t ptimer_get_count(ptimer_state *s)
                     div += 1;
             }
             counter = rem / div;
+
+            if (s->policy_mask & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD) {
+                /* Before wrapping around, timer should stay with counter = 0
+                   for a one period.  */
+                if (!oneshot && s->delta == s->limit) {
+                    if (now == last) {
+                        /* Counter == delta here, check whether it was
+                           adjusted and if it was, then right now it is
+                           that "one period".  */
+                        if (counter == s->limit + DELTA_ADJUST) {
+                            return 0;
+                        }
+                    } else if (counter == s->limit) {
+                        /* Since the counter is rounded down and now != last,
+                           the counter == limit means that delta was adjusted
+                           by +1 and right now it is that adjusted period.  */
+                        return 0;
+                    }
+                }
+            }
+        }
+
+        if (s->policy_mask & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN) {
+            /* If now == last then delta == limit, i.e. the counter already
+               represents the correct value. It would be rounded down a 1ns
+               later.  */
+            if (now != last) {
+                counter += 1;
+            }
         }
     } else {
         counter = s->delta;
@@ -152,7 +251,7 @@ void ptimer_set_count(ptimer_state *s, uint64_t count)
     s->delta = count;
     if (s->enabled) {
         s->next_event = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ptimer_reload(s);
+        ptimer_reload(s, 0);
     }
 }
 
@@ -161,13 +260,15 @@ void ptimer_run(ptimer_state *s, int oneshot)
     bool was_disabled = !s->enabled;
 
     if (was_disabled && s->period == 0) {
-        fprintf(stderr, "Timer with period zero, disabling\n");
+        if (!qtest_enabled()) {
+            fprintf(stderr, "Timer with period zero, disabling\n");
+        }
         return;
     }
     s->enabled = oneshot ? 2 : 1;
     if (was_disabled) {
         s->next_event = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ptimer_reload(s);
+        ptimer_reload(s, 0);
     }
 }
 
@@ -191,7 +292,7 @@ void ptimer_set_period(ptimer_state *s, int64_t period)
     s->period_frac = 0;
     if (s->enabled) {
         s->next_event = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ptimer_reload(s);
+        ptimer_reload(s, 0);
     }
 }
 
@@ -203,7 +304,7 @@ void ptimer_set_freq(ptimer_state *s, uint32_t freq)
     s->period_frac = (1000000000ll << 32) / freq;
     if (s->enabled) {
         s->next_event = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ptimer_reload(s);
+        ptimer_reload(s, 0);
     }
 }
 
@@ -216,7 +317,7 @@ void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload)
         s->delta = limit;
     if (s->enabled && reload) {
         s->next_event = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        ptimer_reload(s);
+        ptimer_reload(s, 0);
     }
 }
 
@@ -242,12 +343,13 @@ const VMStateDescription vmstate_ptimer = {
     }
 };
 
-ptimer_state *ptimer_init(QEMUBH *bh)
+ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask)
 {
     ptimer_state *s;
 
     s = (ptimer_state *)g_malloc0(sizeof(ptimer_state));
     s->bh = bh;
     s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ptimer_tick, s);
+    s->policy_mask = policy_mask;
     return s;
 }
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index e55afe6bf2..1b7ea50e9f 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -160,55 +160,63 @@ PropertyInfo qdev_prop_drive = {
 
 /* --- character device --- */
 
-static void parse_chr(DeviceState *dev, const char *str, void **ptr,
-                      const char *propname, Error **errp)
+static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque,
+                    Error **errp)
 {
-    CharDriverState *chr = qemu_chr_find(str);
-    if (chr == NULL) {
-        error_setg(errp, "Property '%s.%s' can't find value '%s'",
-                   object_get_typename(OBJECT(dev)), propname, str);
-        return;
-    }
-    if (qemu_chr_fe_claim(chr) != 0) {
-        error_setg(errp, "Property '%s.%s' can't take value '%s', it's in use",
-                  object_get_typename(OBJECT(dev)), propname, str);
-        return;
-    }
-    *ptr = chr;
+    DeviceState *dev = DEVICE(obj);
+    CharBackend *be = qdev_get_prop_ptr(dev, opaque);
+    char *p;
+
+    p = g_strdup(be->chr && be->chr->label ? be->chr->label : "");
+    visit_type_str(v, name, &p, errp);
+    g_free(p);
 }
 
-static void release_chr(Object *obj, const char *name, void *opaque)
+static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque,
+                    Error **errp)
 {
     DeviceState *dev = DEVICE(obj);
+    Error *local_err = NULL;
     Property *prop = opaque;
-    CharDriverState **ptr = qdev_get_prop_ptr(dev, prop);
-    CharDriverState *chr = *ptr;
+    CharBackend *be = qdev_get_prop_ptr(dev, prop);
+    CharDriverState *s;
+    char *str;
 
-    if (chr) {
-        qemu_chr_add_handlers(chr, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(chr);
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
     }
-}
 
+    visit_type_str(v, name, &str, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
-static char *print_chr(void *ptr)
-{
-    CharDriverState *chr = ptr;
-    const char *val = chr->label ? chr->label : "";
+    if (!*str) {
+        g_free(str);
+        be->chr = NULL;
+        return;
+    }
 
-    return g_strdup(val);
+    s = qemu_chr_find(str);
+    if (s == NULL) {
+        error_setg(errp, "Property '%s.%s' can't find value '%s'",
+                   object_get_typename(obj), prop->name, str);
+    } else if (!qemu_chr_fe_init(be, s, errp)) {
+        error_prepend(errp, "Property '%s.%s' can't take value '%s': ",
+                      object_get_typename(obj), prop->name, str);
+    }
+    g_free(str);
 }
 
-static void get_chr(Object *obj, Visitor *v, const char *name, void *opaque,
-                    Error **errp)
+static void release_chr(Object *obj, const char *name, void *opaque)
 {
-    get_pointer(obj, v, opaque, print_chr, name, errp);
-}
+    DeviceState *dev = DEVICE(obj);
+    Property *prop = opaque;
+    CharBackend *be = qdev_get_prop_ptr(dev, prop);
 
-static void set_chr(Object *obj, Visitor *v, const char *name, void *opaque,
-                    Error **errp)
-{
-    set_pointer(obj, v, opaque, parse_chr, name, errp);
+    qemu_chr_fe_deinit(be);
 }
 
 PropertyInfo qdev_prop_chr = {
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 877726c675..511f170cab 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -709,13 +709,19 @@ static void get_pci_host_devaddr(Object *obj, Visitor *v, const char *name,
     DeviceState *dev = DEVICE(obj);
     Property *prop = opaque;
     PCIHostDeviceAddress *addr = qdev_get_prop_ptr(dev, prop);
-    char buffer[] = "xxxx:xx:xx.x";
+    char buffer[] = "ffff:ff:ff.f";
     char *p = buffer;
     int rc = 0;
 
-    rc = snprintf(buffer, sizeof(buffer), "%04x:%02x:%02x.%d",
-                  addr->domain, addr->bus, addr->slot, addr->function);
-    assert(rc == sizeof(buffer) - 1);
+    /*
+     * Catch "invalid" device reference from vfio-pci and allow the
+     * default buffer representing the non-existant device to be used.
+     */
+    if (~addr->domain || ~addr->bus || ~addr->slot || ~addr->function) {
+        rc = snprintf(buffer, sizeof(buffer), "%04x:%02x:%02x.%0d",
+                      addr->domain, addr->bus, addr->slot, addr->function);
+        assert(rc == sizeof(buffer) - 1);
+    }
 
     visit_type_str(v, name, &p, errp);
 }
diff --git a/hw/cortexm/stm32/usart.c b/hw/cortexm/stm32/usart.c
index 21853bbb11..49fb877da2 100644
--- a/hw/cortexm/stm32/usart.c
+++ b/hw/cortexm/stm32/usart.c
@@ -1088,7 +1088,9 @@ static void stm32f4_usart_dr_post_read_callback(Object *reg, Object *periph,
 
     peripheral_register_and_raw_value(state->reg.sr, ~USART_SR_RXNE);
     if (state->chr) {
+#if 0
         qemu_chr_accept_input(state->chr);
+#endif
     }
 }
 
@@ -1316,8 +1318,10 @@ static void stm32_usart_realize_callback(DeviceState *dev, Error **errp)
 
         // char-device callbacks.
         if (state->chr) {
+#if 0
             qemu_chr_add_handlers(state->chr, stm32f4_usart_can_receive,
                     stm32f4_usart_receive, NULL, obj);
+#endif
         }
 
         switch (state->port_index) {
@@ -1361,7 +1365,7 @@ static void stm32_usart_realize_callback(DeviceState *dev, Error **errp)
 
         snprintf(chardev_name, ARRAY_SIZE(chardev_name)-1, "serial%d",
                 0 + state->port_index - STM32_PORT_USART1);
-        chr = qemu_chr_new(chardev_name, "null", NULL);
+        chr = qemu_chr_new(chardev_name, "null");
         if (!(chr)) {
             hw_error("Can't assign serial port to %s.\n", periph_name);
         }
@@ -1379,7 +1383,9 @@ static void stm32_usart_reset_callback(DeviceState *dev)
     cm_device_parent_reset(dev, TYPE_STM32_USART);
 
     if (state->chr) {
+#if 0
         qemu_chr_accept_input(state->chr);
+#endif
     }
 
     const STM32Capabilities *capabilities =
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 3d712d592f..bdb092ee9d 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -272,6 +272,9 @@ static void cirrus_update_memory_access(CirrusVGAState *s);
 static bool blit_region_is_unsafe(struct CirrusVGAState *s,
                                   int32_t pitch, int32_t addr)
 {
+    if (!pitch) {
+        return true;
+    }
     if (pitch < 0) {
         int64_t min = addr
             + ((int64_t)s->cirrus_blt_height-1) * pitch;
@@ -715,7 +718,7 @@ static int cirrus_bitblt_videotovideo_patterncopy(CirrusVGAState * s)
                                             s->cirrus_addr_mask));
 }
 
-static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
+static int cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
 {
     int sx = 0, sy = 0;
     int dx = 0, dy = 0;
@@ -729,6 +732,9 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
         int width, height;
 
         depth = s->vga.get_bpp(&s->vga) / 8;
+        if (!depth) {
+            return 0;
+        }
         s->vga.get_resolution(&s->vga, &width, &height);
 
         /* extra x, y */
@@ -783,6 +789,8 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h)
     cirrus_invalidate_region(s, s->cirrus_blt_dstaddr,
 				s->cirrus_blt_dstpitch, s->cirrus_blt_width,
 				s->cirrus_blt_height);
+
+    return 1;
 }
 
 static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
@@ -790,11 +798,9 @@ static int cirrus_bitblt_videotovideo_copy(CirrusVGAState * s)
     if (blit_is_unsafe(s))
         return 0;
 
-    cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
+    return cirrus_do_copy(s, s->cirrus_blt_dstaddr - s->vga.start_addr,
             s->cirrus_blt_srcaddr - s->vga.start_addr,
             s->cirrus_blt_width, s->cirrus_blt_height);
-
-    return 1;
 }
 
 /***************************************
diff --git a/hw/display/milkymist-tmu2.c b/hw/display/milkymist-tmu2.c
index 9c0018448a..5c666f9b24 100644
--- a/hw/display/milkymist-tmu2.c
+++ b/hw/display/milkymist-tmu2.c
@@ -213,7 +213,7 @@ static void tmu2_start(MilkymistTMU2State *s)
     /* Read the QEMU source framebuffer into an OpenGL texture */
     glGenTextures(1, &texture);
     glBindTexture(GL_TEXTURE_2D, texture);
-    fb_len = 2*s->regs[R_TEXHRES]*s->regs[R_TEXVRES];
+    fb_len = 2ULL * s->regs[R_TEXHRES] * s->regs[R_TEXVRES];
     fb = cpu_physical_memory_map(s->regs[R_TEXFBUF], &fb_len, 0);
     if (fb == NULL) {
         glDeleteTextures(1, &texture);
diff --git a/hw/display/pl110.c b/hw/display/pl110.c
index c069c0b7fd..8c7dcc6f0a 100644
--- a/hw/display/pl110.c
+++ b/hw/display/pl110.c
@@ -466,17 +466,16 @@ static const GraphicHwOps pl110_gfx_ops = {
     .gfx_update  = pl110_update_display,
 };
 
-static int pl110_initfn(SysBusDevice *sbd)
+static void pl110_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
     PL110State *s = PL110(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
     memory_region_init_io(&s->iomem, OBJECT(s), &pl110_ops, s, "pl110", 0x1000);
     sysbus_init_mmio(sbd, &s->iomem);
     sysbus_init_irq(sbd, &s->irq);
     qdev_init_gpio_in(dev, pl110_mux_ctrl_set, 1);
     s->con = graphic_console_init(dev, 0, &pl110_gfx_ops, s);
-    return 0;
 }
 
 static void pl110_init(Object *obj)
@@ -503,11 +502,10 @@ static void pl111_init(Object *obj)
 static void pl110_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    k->init = pl110_initfn;
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
     dc->vmsd = &vmstate_pl110;
+    dc->realize = pl110_realize;
 }
 
 static const TypeInfo pl110_info = {
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 0e2682d28b..62d0c80dcf 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -992,6 +992,34 @@ static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
     return crc32(0xffffffff, p, len) ^ 0xffffffff;
 }
 
+static bool qxl_rom_monitors_config_changed(QXLRom *rom,
+        VDAgentMonitorsConfig *monitors_config,
+        unsigned int max_outputs)
+{
+    int i;
+    unsigned int monitors_count;
+
+    monitors_count = MIN(monitors_config->num_of_monitors, max_outputs);
+
+    if (rom->client_monitors_config.count != monitors_count) {
+        return true;
+    }
+
+    for (i = 0 ; i < rom->client_monitors_config.count ; ++i) {
+        VDAgentMonConfig *monitor = &monitors_config->monitors[i];
+        QXLURect *rect = &rom->client_monitors_config.heads[i];
+        /* monitor->depth ignored */
+        if ((rect->left != monitor->x) ||
+            (rect->top != monitor->y)  ||
+            (rect->right != monitor->x + monitor->width) ||
+            (rect->bottom != monitor->y + monitor->height)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 /* called from main context only */
 static int interface_client_monitors_config(QXLInstance *sin,
                                         VDAgentMonitorsConfig *monitors_config)
@@ -1000,6 +1028,7 @@ static int interface_client_monitors_config(QXLInstance *sin,
     QXLRom *rom = memory_region_get_ram_ptr(&qxl->rom_bar);
     int i;
     unsigned max_outputs = ARRAY_SIZE(rom->client_monitors_config.heads);
+    bool config_changed = false;
 
     if (qxl->revision < 4) {
         trace_qxl_client_monitors_config_unsupported_by_device(qxl->id,
@@ -1030,6 +1059,10 @@ static int interface_client_monitors_config(QXLInstance *sin,
     }
 #endif
 
+    config_changed = qxl_rom_monitors_config_changed(rom,
+                                                     monitors_config,
+                                                     max_outputs);
+
     memset(&rom->client_monitors_config, 0,
            sizeof(rom->client_monitors_config));
     rom->client_monitors_config.count = monitors_config->num_of_monitors;
@@ -1059,7 +1092,9 @@ static int interface_client_monitors_config(QXLInstance *sin,
     trace_qxl_interrupt_client_monitors_config(qxl->id,
                         rom->client_monitors_config.count,
                         rom->client_monitors_config.heads);
-    qxl_send_events(qxl, QXL_INTERRUPT_CLIENT_MONITORS_CONFIG);
+    if (config_changed) {
+        qxl_send_events(qxl, QXL_INTERRUPT_CLIENT_MONITORS_CONFIG);
+    }
     return 1;
 }
 
diff --git a/hw/display/ssd0323.c b/hw/display/ssd0323.c
index 6d1faf44af..e182893157 100644
--- a/hw/display/ssd0323.c
+++ b/hw/display/ssd0323.c
@@ -48,18 +48,18 @@ typedef struct {
     SSISlave ssidev;
     QemuConsole *con;
 
-    int cmd_len;
-    int cmd;
-    int cmd_data[8];
-    int row;
-    int row_start;
-    int row_end;
-    int col;
-    int col_start;
-    int col_end;
-    int redraw;
-    int remap;
-    enum ssd0323_mode mode;
+    uint32_t cmd_len;
+    int32_t cmd;
+    int32_t cmd_data[8];
+    int32_t row;
+    int32_t row_start;
+    int32_t row_end;
+    int32_t col;
+    int32_t col_start;
+    int32_t col_end;
+    int32_t redraw;
+    int32_t remap;
+    uint32_t mode;
     uint8_t framebuffer[128 * 80 / 2];
 } ssd0323_state;
 
@@ -279,83 +279,62 @@ static void ssd0323_cd(void *opaque, int n, int level)
     s->mode = level ? SSD0323_DATA : SSD0323_CMD;
 }
 
-static void ssd0323_save(QEMUFile *f, void *opaque)
+static int ssd0323_post_load(void *opaque, int version_id)
 {
-    SSISlave *ss = SSI_SLAVE(opaque);
     ssd0323_state *s = (ssd0323_state *)opaque;
-    int i;
-
-    qemu_put_be32(f, s->cmd_len);
-    qemu_put_be32(f, s->cmd);
-    for (i = 0; i < 8; i++)
-        qemu_put_be32(f, s->cmd_data[i]);
-    qemu_put_be32(f, s->row);
-    qemu_put_be32(f, s->row_start);
-    qemu_put_be32(f, s->row_end);
-    qemu_put_be32(f, s->col);
-    qemu_put_be32(f, s->col_start);
-    qemu_put_be32(f, s->col_end);
-    qemu_put_be32(f, s->redraw);
-    qemu_put_be32(f, s->remap);
-    qemu_put_be32(f, s->mode);
-    qemu_put_buffer(f, s->framebuffer, sizeof(s->framebuffer));
-
-    qemu_put_be32(f, ss->cs);
-}
-
-static int ssd0323_load(QEMUFile *f, void *opaque, int version_id)
-{
-    SSISlave *ss = SSI_SLAVE(opaque);
-    ssd0323_state *s = (ssd0323_state *)opaque;
-    int i;
 
-    if (version_id != 1)
-        return -EINVAL;
-
-    s->cmd_len = qemu_get_be32(f);
-    if (s->cmd_len < 0 || s->cmd_len > ARRAY_SIZE(s->cmd_data)) {
+    if (s->cmd_len > ARRAY_SIZE(s->cmd_data)) {
         return -EINVAL;
     }
-    s->cmd = qemu_get_be32(f);
-    for (i = 0; i < 8; i++)
-        s->cmd_data[i] = qemu_get_be32(f);
-    s->row = qemu_get_be32(f);
     if (s->row < 0 || s->row >= 80) {
         return -EINVAL;
     }
-    s->row_start = qemu_get_be32(f);
     if (s->row_start < 0 || s->row_start >= 80) {
         return -EINVAL;
     }
-    s->row_end = qemu_get_be32(f);
     if (s->row_end < 0 || s->row_end >= 80) {
         return -EINVAL;
     }
-    s->col = qemu_get_be32(f);
     if (s->col < 0 || s->col >= 64) {
         return -EINVAL;
     }
-    s->col_start = qemu_get_be32(f);
     if (s->col_start < 0 || s->col_start >= 64) {
         return -EINVAL;
     }
-    s->col_end = qemu_get_be32(f);
     if (s->col_end < 0 || s->col_end >= 64) {
         return -EINVAL;
     }
-    s->redraw = qemu_get_be32(f);
-    s->remap = qemu_get_be32(f);
-    s->mode = qemu_get_be32(f);
     if (s->mode != SSD0323_CMD && s->mode != SSD0323_DATA) {
         return -EINVAL;
     }
-    qemu_get_buffer(f, s->framebuffer, sizeof(s->framebuffer));
-
-    ss->cs = qemu_get_be32(f);
 
     return 0;
 }
 
+static const VMStateDescription vmstate_ssd0323 = {
+    .name = "ssd0323_oled",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .post_load = ssd0323_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT32(cmd_len, ssd0323_state),
+        VMSTATE_INT32(cmd, ssd0323_state),
+        VMSTATE_INT32_ARRAY(cmd_data, ssd0323_state, 8),
+        VMSTATE_INT32(row, ssd0323_state),
+        VMSTATE_INT32(row_start, ssd0323_state),
+        VMSTATE_INT32(row_end, ssd0323_state),
+        VMSTATE_INT32(col, ssd0323_state),
+        VMSTATE_INT32(col_start, ssd0323_state),
+        VMSTATE_INT32(col_end, ssd0323_state),
+        VMSTATE_INT32(redraw, ssd0323_state),
+        VMSTATE_INT32(remap, ssd0323_state),
+        VMSTATE_UINT32(mode, ssd0323_state),
+        VMSTATE_BUFFER(framebuffer, ssd0323_state),
+        VMSTATE_SSI_SLAVE(ssidev, ssd0323_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const GraphicHwOps ssd0323_ops = {
     .invalidate  = ssd0323_invalidate_display,
     .gfx_update  = ssd0323_update_display,
@@ -372,18 +351,17 @@ static void ssd0323_realize(SSISlave *d, Error **errp)
     qemu_console_resize(s->con, 128 * MAGNIFY, 64 * MAGNIFY);
 
     qdev_init_gpio_in(dev, ssd0323_cd, 1);
-
-    register_savevm(dev, "ssd0323_oled", -1, 1,
-                    ssd0323_save, ssd0323_load, s);
 }
 
 static void ssd0323_class_init(ObjectClass *klass, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(klass);
     SSISlaveClass *k = SSI_SLAVE_CLASS(klass);
 
     k->realize = ssd0323_realize;
     k->transfer = ssd0323_transfer;
     k->cs_polarity = SSI_CS_HIGH;
+    dc->vmsd = &vmstate_ssd0323;
 }
 
 static const TypeInfo ssd0323_info = {
diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c
index f5aff1cbe0..1af95562f2 100644
--- a/hw/display/vga-isa.c
+++ b/hw/display/vga-isa.c
@@ -39,6 +39,8 @@ typedef struct ISAVGAState {
     ISADevice parent_obj;
 
     struct VGACommonState state;
+    PortioList portio_vga;
+    PortioList portio_vbe;
 } ISAVGAState;
 
 static void vga_isa_reset(DeviceState *dev)
@@ -60,9 +62,11 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp)
     vga_common_init(s, OBJECT(dev), true);
     s->legacy_address_space = isa_address_space(isadev);
     vga_io_memory = vga_init_io(s, OBJECT(dev), &vga_ports, &vbe_ports);
-    isa_register_portio_list(isadev, 0x3b0, vga_ports, s, "vga");
+    isa_register_portio_list(isadev, &d->portio_vga,
+                             0x3b0, vga_ports, s, "vga");
     if (vbe_ports) {
-        isa_register_portio_list(isadev, 0x1ce, vbe_ports, s, "vbe");
+        isa_register_portio_list(isadev, &d->portio_vbe,
+                                 0x1ce, vbe_ports, s, "vbe");
     }
     memory_region_add_subregion_overlap(isa_address_space(isadev),
                                         0x000a0000,
diff --git a/hw/display/virtio-gpu-3d.c b/hw/display/virtio-gpu-3d.c
index 758d33a09d..23f39de94d 100644
--- a/hw/display/virtio-gpu-3d.c
+++ b/hw/display/virtio-gpu-3d.c
@@ -347,6 +347,7 @@ static void virgl_cmd_get_capset_info(VirtIOGPU *g,
 
     VIRTIO_GPU_FILL_CMD(info);
 
+    memset(&resp, 0, sizeof(resp));
     if (info.capset_index == 0) {
         resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
         virgl_renderer_get_cap_set(resp.capset_id,
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index 34a724c754..ef92c4ad6f 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -48,6 +48,7 @@ static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
 
     set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories);
     dc->props = virtio_gpu_pci_properties;
+    dc->hotpluggable = false;
     k->realize = virtio_gpu_pci_realize;
     pcidev_k->class_id = PCI_CLASS_DISPLAY_OTHER;
 }
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 7fe6ed8bf0..5f32e1aae9 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -84,6 +84,7 @@ static void update_cursor_data_virgl(VirtIOGPU *g,
 
     if (width != s->current_cursor->width ||
         height != s->current_cursor->height) {
+        free(data);
         return;
     }
 
@@ -333,6 +334,7 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
         qemu_log_mask(LOG_GUEST_ERROR,
                       "%s: host couldn't handle guest format %d\n",
                       __func__, c2d.format);
+        g_free(res);
         cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
         return;
     }
@@ -990,12 +992,9 @@ static const VMStateDescription vmstate_virtio_gpu_scanouts = {
 static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOGPU *g = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(g);
     struct virtio_gpu_simple_resource *res;
     int i;
 
-    virtio_save(vdev, f);
-
     /* in 2d mode we should never find unprocessed commands here */
     assert(QTAILQ_EMPTY(&g->cmdq));
 
@@ -1020,16 +1019,10 @@ static void virtio_gpu_save(QEMUFile *f, void *opaque, size_t size)
 static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size)
 {
     VirtIOGPU *g = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(g);
     struct virtio_gpu_simple_resource *res;
     struct virtio_gpu_scanout *scanout;
     uint32_t resource_id, pformat;
-    int i, ret;
-
-    ret = virtio_load(vdev, f, VIRTIO_GPU_VM_VERSION);
-    if (ret) {
-        return ret;
-    }
+    int i;
 
     resource_id = qemu_get_be32(f);
     while (resource_id != 0) {
@@ -1219,8 +1212,32 @@ static void virtio_gpu_reset(VirtIODevice *vdev)
 #endif
 }
 
-VMSTATE_VIRTIO_DEVICE(gpu, VIRTIO_GPU_VM_VERSION, virtio_gpu_load,
-                      virtio_gpu_save);
+/*
+ * For historical reasons virtio_gpu does not adhere to virtio migration
+ * scheme as described in doc/virtio-migration.txt, in a sense that no
+ * save/load callback are provided to the core. Instead the device data
+ * is saved/loaded after the core data.
+ *
+ * Because of this we need a special vmsd.
+ */
+static const VMStateDescription vmstate_virtio_gpu = {
+    .name = "virtio-gpu",
+    .minimum_version_id = VIRTIO_GPU_VM_VERSION,
+    .version_id = VIRTIO_GPU_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE /* core */,
+        {
+            .name = "virtio-gpu",
+            .info = &(const VMStateInfo) {
+                        .name = "virtio-gpu",
+                        .get = virtio_gpu_load,
+                        .put = virtio_gpu_save,
+            },
+            .flags = VMS_SINGLE,
+        } /* device */,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static Property virtio_gpu_properties[] = {
     DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 5b510a17fd..f9b017d86b 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -120,8 +120,19 @@ static void virtio_vga_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
      * virtio regions are moved to the end of bar #2, to make room for
      * the stdvga mmio registers at the start of bar #2.
      */
-    vpci_dev->modern_mem_bar = 2;
-    vpci_dev->msix_bar = 4;
+    vpci_dev->modern_mem_bar_idx = 2;
+    vpci_dev->msix_bar_idx = 4;
+
+    if (!(vpci_dev->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ)) {
+        /*
+         * with page-per-vq=off there is no padding space we can use
+         * for the stdvga registers.  Make the common and isr regions
+         * smaller then.
+         */
+        vpci_dev->common.size /= 2;
+        vpci_dev->isr.size /= 2;
+    }
+
     offset = memory_region_size(&vpci_dev->modern_bar);
     offset -= vpci_dev->notify.size;
     vpci_dev->notify.offset = offset;
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index e51a05ea7e..6599cf078d 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -676,11 +676,13 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s)
             cursor.bpp = vmsvga_fifo_read(s);
 
             args = SVGA_BITMAP_SIZE(x, y) + SVGA_PIXMAP_SIZE(x, y, cursor.bpp);
-            if (cursor.width > 256 ||
-                cursor.height > 256 ||
-                cursor.bpp > 32 ||
-                SVGA_BITMAP_SIZE(x, y) > sizeof cursor.mask ||
-                SVGA_PIXMAP_SIZE(x, y, cursor.bpp) > sizeof cursor.image) {
+            if (cursor.width > 256
+                || cursor.height > 256
+                || cursor.bpp > 32
+                || SVGA_BITMAP_SIZE(x, y)
+                    > sizeof(cursor.mask) / sizeof(cursor.mask[0])
+                || SVGA_PIXMAP_SIZE(x, y, cursor.bpp)
+                    > sizeof(cursor.image) / sizeof(cursor.image[0])) {
                     goto badcmd;
             }
 
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 46b7d5eded..7a8727aa21 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -90,28 +90,29 @@ static int common_bind(struct common *c)
     xen_pfn_t mfn;
 
     if (xenstore_read_fe_uint64(&c->xendev, "page-ref", &val) == -1)
-	return -1;
+        return -1;
     mfn = (xen_pfn_t)val;
     assert(val == mfn);
 
     if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
-	return -1;
+        return -1;
 
     c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
                                    PROT_READ | PROT_WRITE, 1, &mfn, NULL);
     if (c->page == NULL)
-	return -1;
+        return -1;
 
     xen_be_bind_evtchn(&c->xendev);
-    xen_be_printf(&c->xendev, 1, "ring mfn %"PRI_xen_pfn", remote-port %d, local-port %d\n",
-		  mfn, c->xendev.remote_port, c->xendev.local_port);
+    xen_pv_printf(&c->xendev, 1,
+                  "ring mfn %"PRI_xen_pfn", remote-port %d, local-port %d\n",
+                  mfn, c->xendev.remote_port, c->xendev.local_port);
 
     return 0;
 }
 
 static void common_unbind(struct common *c)
 {
-    xen_be_unbind_evtchn(&c->xendev);
+    xen_pv_unbind_evtchn(&c->xendev);
     if (c->page) {
         xenforeignmemory_unmap(xen_fmem, c->page, 1);
 	c->page = NULL;
@@ -214,7 +215,7 @@ static int xenfb_kbd_event(struct XenInput *xenfb,
     XENKBD_IN_RING_REF(page, prod) = *event;
     xen_wmb();		/* ensure ring contents visible */
     page->in_prod = prod + 1;
-    return xen_be_send_notify(&xenfb->c.xendev);
+    return xen_pv_send_notify(&xenfb->c.xendev);
 }
 
 /* Send a keyboard (or mouse button) event */
@@ -345,7 +346,7 @@ static int input_initialise(struct XenDevice *xendev)
     int rc;
 
     if (!in->c.con) {
-        xen_be_printf(xendev, 1, "ds not set (yet)\n");
+        xen_pv_printf(xendev, 1, "ds not set (yet)\n");
         return -1;
     }
 
@@ -396,7 +397,7 @@ static void input_event(struct XenDevice *xendev)
     if (page->out_prod == page->out_cons)
 	return;
     page->out_cons = page->out_prod;
-    xen_be_send_notify(&xenfb->c.xendev);
+    xen_pv_send_notify(&xenfb->c.xendev);
 }
 
 /* -------------------------------------------------------------------- */
@@ -500,8 +501,8 @@ static int xenfb_map_fb(struct XenFB *xenfb)
 }
 
 static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
-			      int width, int height, int depth,
-			      size_t fb_len, int offset, int row_stride)
+                              int width, int height, int depth,
+                              size_t fb_len, int offset, int row_stride)
 {
     size_t mfn_sz = sizeof(*((struct xenfb_page *)0)->pd);
     size_t pd_len = sizeof(((struct xenfb_page *)0)->pd) / mfn_sz;
@@ -510,40 +511,47 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
     int max_width, max_height;
 
     if (fb_len_lim > fb_len_max) {
-	xen_be_printf(&xenfb->c.xendev, 0, "fb size limit %zu exceeds %zu, corrected\n",
-		      fb_len_lim, fb_len_max);
-	fb_len_lim = fb_len_max;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "fb size limit %zu exceeds %zu, corrected\n",
+                      fb_len_lim, fb_len_max);
+        fb_len_lim = fb_len_max;
     }
     if (fb_len_lim && fb_len > fb_len_lim) {
-	xen_be_printf(&xenfb->c.xendev, 0, "frontend fb size %zu limited to %zu\n",
-		      fb_len, fb_len_lim);
-	fb_len = fb_len_lim;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "frontend fb size %zu limited to %zu\n",
+                      fb_len, fb_len_lim);
+        fb_len = fb_len_lim;
     }
     if (depth != 8 && depth != 16 && depth != 24 && depth != 32) {
-	xen_be_printf(&xenfb->c.xendev, 0, "can't handle frontend fb depth %d\n",
-		      depth);
-	return -1;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "can't handle frontend fb depth %d\n",
+                      depth);
+        return -1;
     }
     if (row_stride <= 0 || row_stride > fb_len) {
-	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend stride %d\n", row_stride);
-	return -1;
+        xen_pv_printf(&xenfb->c.xendev, 0, "invalid frontend stride %d\n",
+                      row_stride);
+        return -1;
     }
     max_width = row_stride / (depth / 8);
     if (width < 0 || width > max_width) {
-	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend width %d limited to %d\n",
-		      width, max_width);
-	width = max_width;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "invalid frontend width %d limited to %d\n",
+                      width, max_width);
+        width = max_width;
     }
     if (offset < 0 || offset >= fb_len) {
-	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend offset %d (max %zu)\n",
-		      offset, fb_len - 1);
-	return -1;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "invalid frontend offset %d (max %zu)\n",
+                      offset, fb_len - 1);
+        return -1;
     }
     max_height = (fb_len - offset) / row_stride;
     if (height < 0 || height > max_height) {
-	xen_be_printf(&xenfb->c.xendev, 0, "invalid frontend height %d limited to %d\n",
-		      height, max_height);
-	height = max_height;
+        xen_pv_printf(&xenfb->c.xendev, 0,
+                      "invalid frontend height %d limited to %d\n",
+                      height, max_height);
+        height = max_height;
     }
     xenfb->fb_len = fb_len;
     xenfb->row_stride = row_stride;
@@ -553,8 +561,9 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
     xenfb->offset = offset;
     xenfb->up_fullscreen = 1;
     xenfb->do_resize = 1;
-    xen_be_printf(&xenfb->c.xendev, 1, "framebuffer %dx%dx%d offset %d stride %d\n",
-		  width, height, depth, offset, row_stride);
+    xen_pv_printf(&xenfb->c.xendev, 1,
+                  "framebuffer %dx%dx%d offset %d stride %d\n",
+                  width, height, depth, offset, row_stride);
     return 0;
 }
 
@@ -631,7 +640,7 @@ static void xenfb_guest_copy(struct XenFB *xenfb, int x, int y, int w, int h)
 	}
     }
     if (oops) /* should not happen */
-        xen_be_printf(&xenfb->c.xendev, 0, "%s: oops: convert %d -> %d bpp?\n",
+        xen_pv_printf(&xenfb->c.xendev, 0, "%s: oops: convert %d -> %d bpp?\n",
                       __FUNCTION__, xenfb->depth, bpp);
 
     dpy_gfx_update(xenfb->c.con, x, y, w, h);
@@ -663,7 +672,7 @@ static void xenfb_send_event(struct XenFB *xenfb, union xenfb_in_event *event)
     xen_wmb();                  /* ensure ring contents visible */
     page->in_prod = prod + 1;
 
-    xen_be_send_notify(&xenfb->c.xendev);
+    xen_pv_send_notify(&xenfb->c.xendev);
 }
 
 static void xenfb_send_refresh_period(struct XenFB *xenfb, int period)
@@ -696,9 +705,9 @@ static void xenfb_update(void *opaque)
         return;
 
     if (!xenfb->feature_update) {
-	/* we don't get update notifications, thus use the
-	 * sledge hammer approach ... */
-	xenfb->up_fullscreen = 1;
+        /* we don't get update notifications, thus use the
+         * sledge hammer approach ... */
+        xenfb->up_fullscreen = 1;
     }
 
     /* resize if needed */
@@ -721,7 +730,8 @@ static void xenfb_update(void *opaque)
             break;
         }
         dpy_gfx_replace_surface(xenfb->c.con, surface);
-        xen_be_printf(&xenfb->c.xendev, 1, "update: resizing: %dx%d @ %d bpp%s\n",
+        xen_pv_printf(&xenfb->c.xendev, 1,
+                      "update: resizing: %dx%d @ %d bpp%s\n",
                       xenfb->width, xenfb->height, xenfb->depth,
                       is_buffer_shared(surface) ? " (shared)" : "");
         xenfb->up_fullscreen = 1;
@@ -729,18 +739,19 @@ static void xenfb_update(void *opaque)
 
     /* run queued updates */
     if (xenfb->up_fullscreen) {
-	xen_be_printf(&xenfb->c.xendev, 3, "update: fullscreen\n");
-	xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
+        xen_pv_printf(&xenfb->c.xendev, 3, "update: fullscreen\n");
+        xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
     } else if (xenfb->up_count) {
-	xen_be_printf(&xenfb->c.xendev, 3, "update: %d rects\n", xenfb->up_count);
-	for (i = 0; i < xenfb->up_count; i++)
-	    xenfb_guest_copy(xenfb,
-			     xenfb->up_rects[i].x,
-			     xenfb->up_rects[i].y,
-			     xenfb->up_rects[i].w,
-			     xenfb->up_rects[i].h);
+        xen_pv_printf(&xenfb->c.xendev, 3, "update: %d rects\n",
+                      xenfb->up_count);
+        for (i = 0; i < xenfb->up_count; i++)
+            xenfb_guest_copy(xenfb,
+                             xenfb->up_rects[i].x,
+                             xenfb->up_rects[i].y,
+                             xenfb->up_rects[i].w,
+                             xenfb->up_rects[i].h);
     } else {
-	xen_be_printf(&xenfb->c.xendev, 3, "update: nothing\n");
+        xen_pv_printf(&xenfb->c.xendev, 3, "update: nothing\n");
     }
     xenfb->up_count = 0;
     xenfb->up_fullscreen = 0;
@@ -794,14 +805,14 @@ static void xenfb_handle_events(struct XenFB *xenfb)
 	    w = MIN(event->update.width, xenfb->width - x);
 	    h = MIN(event->update.height, xenfb->height - y);
 	    if (w < 0 || h < 0) {
-                xen_be_printf(&xenfb->c.xendev, 1, "bogus update ignored\n");
+                xen_pv_printf(&xenfb->c.xendev, 1, "bogus update ignored\n");
 		break;
 	    }
 	    if (x != event->update.x ||
                 y != event->update.y ||
 		w != event->update.width ||
 		h != event->update.height) {
-                xen_be_printf(&xenfb->c.xendev, 1, "bogus update clipped\n");
+                xen_pv_printf(&xenfb->c.xendev, 1, "bogus update clipped\n");
 	    }
 	    if (w == xenfb->width && h > xenfb->height / 2) {
 		/* scroll detector: updated more than 50% of the lines,
@@ -883,7 +894,7 @@ static int fb_initialise(struct XenDevice *xendev)
     if (fb->feature_update)
 	xenstore_write_be_int(xendev, "request-update", 1);
 
-    xen_be_printf(xendev, 1, "feature-update=%d, videoram=%d\n",
+    xen_pv_printf(xendev, 1, "feature-update=%d, videoram=%d\n",
 		  fb->feature_update, videoram);
     return 0;
 }
@@ -902,7 +913,7 @@ static void fb_disconnect(struct XenDevice *xendev)
                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
                       -1, 0);
     if (fb->pixels == MAP_FAILED) {
-        xen_be_printf(xendev, 0,
+        xen_pv_printf(xendev, 0,
                 "Couldn't replace the framebuffer with anonymous memory errno=%d\n",
                 errno);
     }
@@ -923,7 +934,7 @@ static void fb_frontend_changed(struct XenDevice *xendev, const char *node)
     if (fb->bug_trigger == 0 && strcmp(node, "state") == 0 &&
         xendev->fe_state == XenbusStateConnected &&
         xendev->be_state == XenbusStateConnected) {
-        xen_be_printf(xendev, 2, "re-trigger connected (frontend bug)\n");
+        xen_pv_printf(xendev, 2, "re-trigger connected (frontend bug)\n");
         xen_be_set_state(xendev, XenbusStateConnected);
         fb->bug_trigger = 1; /* only once */
     }
@@ -934,7 +945,7 @@ static void fb_event(struct XenDevice *xendev)
     struct XenFB *xenfb = container_of(xendev, struct XenFB, c.xendev);
 
     xenfb_handle_events(xenfb);
-    xen_be_send_notify(&xenfb->c.xendev);
+    xen_pv_send_notify(&xenfb->c.xendev);
 }
 
 /* -------------------------------------------------------------------- */
@@ -977,14 +988,14 @@ void xen_init_display(int domid)
 wait_more:
     i++;
     main_loop_wait(true);
-    xfb = xen_be_find_xendev("vfb", domid, 0);
-    xin = xen_be_find_xendev("vkbd", domid, 0);
+    xfb = xen_pv_find_xendev("vfb", domid, 0);
+    xin = xen_pv_find_xendev("vkbd", domid, 0);
     if (!xfb || !xin) {
         if (i < 256) {
             usleep(10000);
             goto wait_more;
         }
-        xen_be_printf(NULL, 1, "displaystate setup failed\n");
+        xen_pv_printf(NULL, 1, "displaystate setup failed\n");
         return;
     }
 
diff --git a/hw/dma/i8257.c b/hw/dma/i8257.c
index f345c54762..8bd82e8bc8 100644
--- a/hw/dma/i8257.c
+++ b/hw/dma/i8257.c
@@ -553,10 +553,12 @@ static void i8257_realize(DeviceState *dev, Error **errp)
     memory_region_add_subregion(isa_address_space_io(isa),
                                 d->base, &d->channel_io);
 
-    isa_register_portio_list(isa, d->page_base, page_portio_list, d,
+    isa_register_portio_list(isa, &d->portio_page,
+                             d->page_base, page_portio_list, d,
                              "dma-page");
     if (d->pageh_base >= 0) {
-        isa_register_portio_list(isa, d->pageh_base, pageh_portio_list, d,
+        isa_register_portio_list(isa, &d->portio_pageh,
+                                 d->pageh_base, pageh_portio_list, d,
                                  "dma-pageh");
     }
 
@@ -598,6 +600,8 @@ static void i8257_class_init(ObjectClass *klass, void *data)
     idc->release_DREQ = i8257_dma_release_DREQ;
     idc->schedule = i8257_dma_schedule;
     idc->register_channel = i8257_dma_register_channel;
+    /* Reason: needs to be wired up by isa_bus_dma() to work */
+    dc->cannot_instantiate_with_device_add_yet = true;
 }
 
 static const TypeInfo i8257_info = {
diff --git a/hw/dma/omap_dma.c b/hw/dma/omap_dma.c
index 700cd6b43e..f6f86f9639 100644
--- a/hw/dma/omap_dma.c
+++ b/hw/dma/omap_dma.c
@@ -1975,7 +1975,7 @@ static void omap_dma4_write(void *opaque, hwaddr addr,
         ch->endian[1] =(value >> 19) & 1;
         ch->endian_lock[1] =(value >> 18) & 1;
         if (ch->endian[0] != ch->endian[1])
-            fprintf(stderr, "%s: DMA endiannes conversion enable attempt\n",
+            fprintf(stderr, "%s: DMA endianness conversion enable attempt\n",
                             __FUNCTION__);
         ch->write_mode = (value >> 16) & 3;
         ch->burst[1] = (value & 0xc000) >> 14;
diff --git a/hw/dma/pl080.c b/hw/dma/pl080.c
index 3bed5c3390..7724c93b8f 100644
--- a/hw/dma/pl080.c
+++ b/hw/dma/pl080.c
@@ -351,7 +351,7 @@ static void pl080_write(void *opaque, hwaddr offset,
         break;
     case 12: /* Configuration */
         s->conf = value;
-        if (s->conf & (PL080_CONF_M1 | PL080_CONF_M1)) {
+        if (s->conf & (PL080_CONF_M1 | PL080_CONF_M2)) {
             qemu_log_mask(LOG_UNIMP,
                           "pl080_write: Big-endian DMA not implemented\n");
         }
diff --git a/hw/dma/rc4030.c b/hw/dma/rc4030.c
index 2f2576fafb..17c8518fea 100644
--- a/hw/dma/rc4030.c
+++ b/hw/dma/rc4030.c
@@ -616,34 +616,9 @@ static void rc4030_reset(DeviceState *dev)
     qemu_irq_lower(s->jazz_bus_irq);
 }
 
-static int rc4030_load(QEMUFile *f, void *opaque, int version_id)
+static int rc4030_post_load(void *opaque, int version_id)
 {
     rc4030State* s = opaque;
-    int i, j;
-
-    if (version_id != 2)
-        return -EINVAL;
-
-    s->config = qemu_get_be32(f);
-    s->invalid_address_register = qemu_get_be32(f);
-    for (i = 0; i < 8; i++)
-        for (j = 0; j < 4; j++)
-            s->dma_regs[i][j] = qemu_get_be32(f);
-    s->dma_tl_base = qemu_get_be32(f);
-    s->dma_tl_limit = qemu_get_be32(f);
-    s->cache_maint = qemu_get_be32(f);
-    s->remote_failed_address = qemu_get_be32(f);
-    s->memory_failed_address = qemu_get_be32(f);
-    s->cache_ptag = qemu_get_be32(f);
-    s->cache_ltag = qemu_get_be32(f);
-    s->cache_bmask = qemu_get_be32(f);
-    s->memory_refresh_rate = qemu_get_be32(f);
-    s->nvram_protect = qemu_get_be32(f);
-    for (i = 0; i < 15; i++)
-        s->rem_speed[i] = qemu_get_be32(f);
-    s->imr_jazz = qemu_get_be32(f);
-    s->isr_jazz = qemu_get_be32(f);
-    s->itr = qemu_get_be32(f);
 
     set_next_tick(s);
     update_jazz_irq(s);
@@ -651,32 +626,31 @@ static int rc4030_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
-static void rc4030_save(QEMUFile *f, void *opaque)
-{
-    rc4030State* s = opaque;
-    int i, j;
-
-    qemu_put_be32(f, s->config);
-    qemu_put_be32(f, s->invalid_address_register);
-    for (i = 0; i < 8; i++)
-        for (j = 0; j < 4; j++)
-            qemu_put_be32(f, s->dma_regs[i][j]);
-    qemu_put_be32(f, s->dma_tl_base);
-    qemu_put_be32(f, s->dma_tl_limit);
-    qemu_put_be32(f, s->cache_maint);
-    qemu_put_be32(f, s->remote_failed_address);
-    qemu_put_be32(f, s->memory_failed_address);
-    qemu_put_be32(f, s->cache_ptag);
-    qemu_put_be32(f, s->cache_ltag);
-    qemu_put_be32(f, s->cache_bmask);
-    qemu_put_be32(f, s->memory_refresh_rate);
-    qemu_put_be32(f, s->nvram_protect);
-    for (i = 0; i < 15; i++)
-        qemu_put_be32(f, s->rem_speed[i]);
-    qemu_put_be32(f, s->imr_jazz);
-    qemu_put_be32(f, s->isr_jazz);
-    qemu_put_be32(f, s->itr);
-}
+static const VMStateDescription vmstate_rc4030 = {
+    .name = "rc4030",
+    .version_id = 3,
+    .post_load = rc4030_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(config, rc4030State),
+        VMSTATE_UINT32(invalid_address_register, rc4030State),
+        VMSTATE_UINT32_2DARRAY(dma_regs, rc4030State, 8, 4),
+        VMSTATE_UINT32(dma_tl_base, rc4030State),
+        VMSTATE_UINT32(dma_tl_limit, rc4030State),
+        VMSTATE_UINT32(cache_maint, rc4030State),
+        VMSTATE_UINT32(remote_failed_address, rc4030State),
+        VMSTATE_UINT32(memory_failed_address, rc4030State),
+        VMSTATE_UINT32(cache_ptag, rc4030State),
+        VMSTATE_UINT32(cache_ltag, rc4030State),
+        VMSTATE_UINT32(cache_bmask, rc4030State),
+        VMSTATE_UINT32(memory_refresh_rate, rc4030State),
+        VMSTATE_UINT32(nvram_protect, rc4030State),
+        VMSTATE_UINT32_ARRAY(rem_speed, rc4030State, 16),
+        VMSTATE_UINT32(imr_jazz, rc4030State),
+        VMSTATE_UINT32(isr_jazz, rc4030State),
+        VMSTATE_UINT32(itr, rc4030State),
+        VMSTATE_END_OF_LIST()
+    }
+};
 
 static void rc4030_do_dma(void *opaque, int n, uint8_t *buf, int len, int is_write)
 {
@@ -753,8 +727,6 @@ static void rc4030_initfn(Object *obj)
     sysbus_init_irq(sysbus, &s->timer_irq);
     sysbus_init_irq(sysbus, &s->jazz_bus_irq);
 
-    register_savevm(NULL, "rc4030", 0, 2, rc4030_save, rc4030_load, s);
-
     sysbus_init_mmio(sysbus, &s->iomem_chipset);
     sysbus_init_mmio(sysbus, &s->iomem_jazzio);
 }
@@ -813,6 +785,7 @@ static void rc4030_class_init(ObjectClass *klass, void *class_data)
     dc->realize = rc4030_realize;
     dc->unrealize = rc4030_unrealize;
     dc->reset = rc4030_reset;
+    dc->vmsd = &vmstate_rc4030;
 }
 
 static const TypeInfo rc4030_info = {
diff --git a/hw/dma/xilinx_axidma.c b/hw/dma/xilinx_axidma.c
index a4753e55a2..6065689ad1 100644
--- a/hw/dma/xilinx_axidma.c
+++ b/hw/dma/xilinx_axidma.c
@@ -111,6 +111,7 @@ struct Stream {
     unsigned int complete_cnt;
     uint32_t regs[R_MAX];
     uint8_t app[20];
+    unsigned char txbuf[16 * 1024];
 };
 
 struct XilinxAXIDMAStreamSlave {
@@ -256,7 +257,6 @@ static void stream_process_mem2s(struct Stream *s, StreamSlave *tx_data_dev,
                                  StreamSlave *tx_control_dev)
 {
     uint32_t prev_d;
-    unsigned char txbuf[16 * 1024];
     unsigned int txlen;
 
     if (!stream_running(s) || stream_idle(s)) {
@@ -277,17 +277,17 @@ static void stream_process_mem2s(struct Stream *s, StreamSlave *tx_data_dev,
         }
 
         txlen = s->desc.control & SDESC_CTRL_LEN_MASK;
-        if ((txlen + s->pos) > sizeof txbuf) {
+        if ((txlen + s->pos) > sizeof s->txbuf) {
             hw_error("%s: too small internal txbuf! %d\n", __func__,
                      txlen + s->pos);
         }
 
         cpu_physical_memory_read(s->desc.buffer_address,
-                                 txbuf + s->pos, txlen);
+                                 s->txbuf + s->pos, txlen);
         s->pos += txlen;
 
         if (stream_desc_eof(&s->desc)) {
-            stream_push(tx_data_dev, txbuf, s->pos);
+            stream_push(tx_data_dev, s->txbuf, s->pos);
             s->pos = 0;
             stream_complete(s);
         }
@@ -548,7 +548,7 @@ static void xilinx_axidma_realize(DeviceState *dev, Error **errp)
 
         st->nr = i;
         st->bh = qemu_bh_new(timer_hit, st);
-        st->ptimer = ptimer_init(st->bh);
+        st->ptimer = ptimer_init(st->bh, PTIMER_POLICY_DEFAULT);
         ptimer_set_freq(st->ptimer, s->freqhz);
     }
     return;
diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
index f3574aa8f3..c36c394fda 100644
--- a/hw/gpio/imx_gpio.c
+++ b/hw/gpio/imx_gpio.c
@@ -237,7 +237,7 @@ static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
         break;
 
     case ISR_ADDR:
-        s->isr |= ~value;
+        s->isr &= ~value;
         imx_gpio_set_all_int_lines(s);
         break;
 
diff --git a/hw/i2c/bitbang_i2c.c b/hw/i2c/bitbang_i2c.c
index d3a29891f8..8be88ee265 100644
--- a/hw/i2c/bitbang_i2c.c
+++ b/hw/i2c/bitbang_i2c.c
@@ -130,14 +130,25 @@ int bitbang_i2c_set(bitbang_i2c_interface *i2c, int line, int level)
         return bitbang_i2c_ret(i2c, 1);
 
     case WAITING_FOR_ACK:
+    {
+        int ret;
+
         if (i2c->current_addr < 0) {
             i2c->current_addr = i2c->buffer;
             DPRINTF("Address 0x%02x\n", i2c->current_addr);
-            i2c_start_transfer(i2c->bus, i2c->current_addr >> 1,
-                               i2c->current_addr & 1);
+            ret = i2c_start_transfer(i2c->bus, i2c->current_addr >> 1,
+                                     i2c->current_addr & 1);
         } else {
             DPRINTF("Sent 0x%02x\n", i2c->buffer);
-            i2c_send(i2c->bus, i2c->buffer);
+            ret = i2c_send(i2c->bus, i2c->buffer);
+        }
+        if (ret) {
+            /* NACK (either addressing a nonexistent device, or the
+             * device we were sending to decided to NACK us).
+             */
+            DPRINTF("Got NACK\n");
+            bitbang_i2c_enter_stop(i2c);
+            return bitbang_i2c_ret(i2c, 1);
         }
         if (i2c->current_addr & 1) {
             i2c->state = RECEIVING_BIT7;
@@ -145,7 +156,7 @@ int bitbang_i2c_set(bitbang_i2c_interface *i2c, int line, int level)
             i2c->state = SENDING_BIT7;
         }
         return bitbang_i2c_ret(i2c, 0);
-
+    }
     case RECEIVING_BIT7:
         i2c->buffer = i2c_recv(i2c->bus);
         DPRINTF("RX byte 0x%02x\n", i2c->buffer);
diff --git a/hw/i2c/core.c b/hw/i2c/core.c
index 4afbe0bde5..abd4c4cddb 100644
--- a/hw/i2c/core.c
+++ b/hw/i2c/core.c
@@ -88,7 +88,12 @@ int i2c_bus_busy(I2CBus *bus)
     return !QLIST_EMPTY(&bus->current_devs);
 }
 
-/* Returns non-zero if the address is not valid.  */
+/*
+ * Returns non-zero if the address is not valid.  If this is called
+ * again without an intervening i2c_end_transfer(), like in the SMBus
+ * case where the operation is switched from write to read, this
+ * function will not rescan the bus and thus cannot fail.
+ */
 /* TODO: Make this handle multiple masters.  */
 int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
 {
@@ -104,15 +109,25 @@ int i2c_start_transfer(I2CBus *bus, uint8_t address, int recv)
         bus->broadcast = true;
     }
 
-    QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
-        DeviceState *qdev = kid->child;
-        I2CSlave *candidate = I2C_SLAVE(qdev);
-        if ((candidate->address == address) || (bus->broadcast)) {
-            node = g_malloc(sizeof(struct I2CNode));
-            node->elt = candidate;
-            QLIST_INSERT_HEAD(&bus->current_devs, node, next);
-            if (!bus->broadcast) {
-                break;
+    /*
+     * If there are already devices in the list, that means we are in
+     * the middle of a transaction and we shouldn't rescan the bus.
+     *
+     * This happens with any SMBus transaction, even on a pure I2C
+     * device.  The interface does a transaction start without
+     * terminating the previous transaction.
+     */
+    if (QLIST_EMPTY(&bus->current_devs)) {
+        QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
+            DeviceState *qdev = kid->child;
+            I2CSlave *candidate = I2C_SLAVE(qdev);
+            if ((candidate->address == address) || (bus->broadcast)) {
+                node = g_malloc(sizeof(struct I2CNode));
+                node->elt = candidate;
+                QLIST_INSERT_HEAD(&bus->current_devs, node, next);
+                if (!bus->broadcast) {
+                    break;
+                }
             }
         }
     }
@@ -137,10 +152,6 @@ void i2c_end_transfer(I2CBus *bus)
     I2CSlaveClass *sc;
     I2CNode *node, *next;
 
-    if (QLIST_EMPTY(&bus->current_devs)) {
-        return;
-    }
-
     QLIST_FOREACH_SAFE(node, &bus->current_devs, next, next) {
         sc = I2C_SLAVE_GET_CLASS(node->elt);
         if (sc->event) {
diff --git a/hw/i2c/smbus.c b/hw/i2c/smbus.c
index 3979b3dad7..5b4dd3eba4 100644
--- a/hw/i2c/smbus.c
+++ b/hw/i2c/smbus.c
@@ -248,7 +248,9 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command)
         return -1;
     }
     i2c_send(bus, command);
-    i2c_start_transfer(bus, addr, 1);
+    if (i2c_start_transfer(bus, addr, 1)) {
+        assert(0);
+    }
     data = i2c_recv(bus);
     i2c_nack(bus);
     i2c_end_transfer(bus);
@@ -273,7 +275,9 @@ int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command)
         return -1;
     }
     i2c_send(bus, command);
-    i2c_start_transfer(bus, addr, 1);
+    if (i2c_start_transfer(bus, addr, 1)) {
+        assert(0);
+    }
     data = i2c_recv(bus);
     data |= i2c_recv(bus) << 8;
     i2c_nack(bus);
@@ -302,7 +306,9 @@ int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data)
         return -1;
     }
     i2c_send(bus, command);
-    i2c_start_transfer(bus, addr, 1);
+    if (i2c_start_transfer(bus, addr, 1)) {
+        assert(0);
+    }
     len = i2c_recv(bus);
     if (len > 32) {
         len = 0;
diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 90e94ffefd..909ead6a77 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += multiboot.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
 obj-y += x86-iommu.o intel_iommu.o
+obj-y += amd_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index a26a4bb03f..9708cdc463 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -53,13 +53,13 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/pci-host/q35.h"
 #include "hw/i386/x86-iommu.h"
-#include "hw/timer/hpet.h"
 
 #include "hw/acpi/aml-build.h"
 
 #include "qapi/qmp/qint.h"
 #include "qom/qom-qobject.h"
-#include "hw/i386/x86-iommu.h"
+#include "hw/i386/amd_iommu.h"
+#include "hw/i386/intel_iommu.h"
 
 #include "hw/acpi/ipmi.h"
 
@@ -339,24 +339,38 @@ build_fadt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm,
 void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid,
                        CPUArchIdList *apic_ids, GArray *entry)
 {
-    int apic_id;
-    AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
+    uint32_t apic_id = apic_ids->cpus[uid].arch_id;
 
-    apic_id = apic_ids->cpus[uid].arch_id;
-    apic->type = ACPI_APIC_PROCESSOR;
-    apic->length = sizeof(*apic);
-    apic->processor_id = uid;
-    apic->local_apic_id = apic_id;
-    if (apic_ids->cpus[uid].cpu != NULL) {
-        apic->flags = cpu_to_le32(1);
+    /* ACPI spec says that LAPIC entry for non present
+     * CPU may be omitted from MADT or it must be marked
+     * as disabled. However omitting non present CPU from
+     * MADT breaks hotplug on linux. So possible CPUs
+     * should be put in MADT but kept disabled.
+     */
+    if (apic_id < 255) {
+        AcpiMadtProcessorApic *apic = acpi_data_push(entry, sizeof *apic);
+
+        apic->type = ACPI_APIC_PROCESSOR;
+        apic->length = sizeof(*apic);
+        apic->processor_id = uid;
+        apic->local_apic_id = apic_id;
+        if (apic_ids->cpus[uid].cpu != NULL) {
+            apic->flags = cpu_to_le32(1);
+        } else {
+            apic->flags = cpu_to_le32(0);
+        }
     } else {
-        /* ACPI spec says that LAPIC entry for non present
-         * CPU may be omitted from MADT or it must be marked
-         * as disabled. However omitting non present CPU from
-         * MADT breaks hotplug on linux. So possible CPUs
-         * should be put in MADT but kept disabled.
-         */
-        apic->flags = cpu_to_le32(0);
+        AcpiMadtProcessorX2Apic *apic = acpi_data_push(entry, sizeof *apic);
+
+        apic->type = ACPI_APIC_LOCAL_X2APIC;
+        apic->length = sizeof(*apic);
+        apic->uid = cpu_to_le32(uid);
+        apic->x2apic_id = cpu_to_le32(apic_id);
+        if (apic_ids->cpus[uid].cpu != NULL) {
+            apic->flags = cpu_to_le32(1);
+        } else {
+            apic->flags = cpu_to_le32(0);
+        }
     }
 }
 
@@ -368,11 +382,11 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
     int madt_start = table_data->len;
     AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(pcms->acpi_dev);
     AcpiDeviceIf *adev = ACPI_DEVICE_IF(pcms->acpi_dev);
+    bool x2apic_mode = false;
 
     AcpiMultipleApicTable *madt;
     AcpiMadtIoApic *io_apic;
     AcpiMadtIntsrcovr *intsrcovr;
-    AcpiMadtLocalNmi *local_nmi;
     int i;
 
     madt = acpi_data_push(table_data, sizeof *madt);
@@ -381,6 +395,9 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
 
     for (i = 0; i < apic_ids->len; i++) {
         adevc->madt_cpu(adev, i, apic_ids, table_data);
+        if (apic_ids->cpus[i].arch_id > 254) {
+            x2apic_mode = true;
+        }
     }
     g_free(apic_ids);
 
@@ -413,12 +430,25 @@ build_madt(GArray *table_data, BIOSLinker *linker, PCMachineState *pcms)
         intsrcovr->flags  = cpu_to_le16(0xd); /* active high, level triggered */
     }
 
-    local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
-    local_nmi->type         = ACPI_APIC_LOCAL_NMI;
-    local_nmi->length       = sizeof(*local_nmi);
-    local_nmi->processor_id = 0xff; /* all processors */
-    local_nmi->flags        = cpu_to_le16(0);
-    local_nmi->lint         = 1; /* ACPI_LINT1 */
+    if (x2apic_mode) {
+        AcpiMadtLocalX2ApicNmi *local_nmi;
+
+        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+        local_nmi->type   = ACPI_APIC_LOCAL_X2APIC_NMI;
+        local_nmi->length = sizeof(*local_nmi);
+        local_nmi->uid    = 0xFFFFFFFF; /* all processors */
+        local_nmi->flags  = cpu_to_le16(0);
+        local_nmi->lint   = 1; /* ACPI_LINT1 */
+    } else {
+        AcpiMadtLocalNmi *local_nmi;
+
+        local_nmi = acpi_data_push(table_data, sizeof *local_nmi);
+        local_nmi->type         = ACPI_APIC_LOCAL_NMI;
+        local_nmi->length       = sizeof(*local_nmi);
+        local_nmi->processor_id = 0xff; /* all processors */
+        local_nmi->flags        = cpu_to_le16(0);
+        local_nmi->lint         = 1; /* ACPI_LINT1 */
+    }
 
     build_header(linker, table_data,
                  (void *)(table_data->data + madt_start), "APIC",
@@ -789,7 +819,7 @@ static gint crs_range_compare(gconstpointer a, gconstpointer b)
 static void crs_replace_with_free_ranges(GPtrArray *ranges,
                                          uint64_t start, uint64_t end)
 {
-    GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free);
+    GPtrArray *free_ranges = g_ptr_array_new();
     uint64_t free_base = start;
     int i;
 
@@ -813,7 +843,7 @@ static void crs_replace_with_free_ranges(GPtrArray *ranges,
         g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i));
     }
 
-    g_ptr_array_free(free_ranges, false);
+    g_ptr_array_free(free_ranges, true);
 }
 
 /*
@@ -2038,6 +2068,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
         method = aml_method("_E03", 0, AML_NOTSERIALIZED);
         aml_append(method, aml_call0(MEMORY_HOTPLUG_HANDLER_PATH));
         aml_append(scope, method);
+
+        if (pcms->acpi_nvdimm_state.is_enabled) {
+            method = aml_method("_E04", 0, AML_NOTSERIALIZED);
+            aml_append(method, aml_notify(aml_name("\\_SB.NVDR"),
+                                          aml_int(0x80)));
+            aml_append(scope, method);
+        }
     }
     aml_append(dsdt, scope);
 
@@ -2390,7 +2427,6 @@ static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
     AcpiSystemResourceAffinityTable *srat;
-    AcpiSratProcessorAffinity *core;
     AcpiSratMemoryAffinity *numamem;
 
     int i;
@@ -2409,22 +2445,34 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
     srat->reserved1 = cpu_to_le32(1);
 
     for (i = 0; i < apic_ids->len; i++) {
-        int j;
-        int apic_id = apic_ids->cpus[i].arch_id;
-
-        core = acpi_data_push(table_data, sizeof *core);
-        core->type = ACPI_SRAT_PROCESSOR_APIC;
-        core->length = sizeof(*core);
-        core->local_apic_id = apic_id;
-        for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, numa_info[j].node_cpu)) {
+        int j = numa_get_node_for_cpu(i);
+        uint32_t apic_id = apic_ids->cpus[i].arch_id;
+
+        if (apic_id < 255) {
+            AcpiSratProcessorAffinity *core;
+
+            core = acpi_data_push(table_data, sizeof *core);
+            core->type = ACPI_SRAT_PROCESSOR_APIC;
+            core->length = sizeof(*core);
+            core->local_apic_id = apic_id;
+            if (j < nb_numa_nodes) {
                 core->proximity_lo = j;
-                break;
             }
+            memset(core->proximity_hi, 0, 3);
+            core->local_sapic_eid = 0;
+            core->flags = cpu_to_le32(1);
+        } else {
+            AcpiSratProcessorX2ApicAffinity *core;
+
+            core = acpi_data_push(table_data, sizeof *core);
+            core->type = ACPI_SRAT_PROCESSOR_x2APIC;
+            core->length = sizeof(*core);
+            core->x2apic_id = cpu_to_le32(apic_id);
+            if (j < nb_numa_nodes) {
+                core->proximity_domain = cpu_to_le32(j);
+            }
+            core->flags = cpu_to_le32(1);
         }
-        memset(core->proximity_hi, 0, 3);
-        core->local_sapic_eid = 0;
-        core->flags = cpu_to_le32(1);
     }
 
 
@@ -2557,11 +2605,68 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
     scope->length = ioapic_scope_size;
     scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
     scope->bus = Q35_PSEUDO_BUS_PLATFORM;
-    scope->path[0] = cpu_to_le16(Q35_PSEUDO_DEVFN_IOAPIC);
+    scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
+    scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
 
     build_header(linker, table_data, (void *)(table_data->data + dmar_start),
                  "DMAR", table_data->len - dmar_start, 1, NULL, NULL);
 }
+/*
+ *   IVRS table as specified in AMD IOMMU Specification v2.62, Section 5.2
+ *   accessible here http://support.amd.com/TechDocs/48882_IOMMU.pdf
+ */
+static void
+build_amd_iommu(GArray *table_data, BIOSLinker *linker)
+{
+    int iommu_start = table_data->len;
+    AMDVIState *s = AMD_IOMMU_DEVICE(x86_iommu_get_default());
+
+    /* IVRS header */
+    acpi_data_push(table_data, sizeof(AcpiTableHeader));
+    /* IVinfo - IO virtualization information common to all
+     * IOMMU units in a system
+     */
+    build_append_int_noprefix(table_data, 40UL << 8/* PASize */, 4);
+    /* reserved */
+    build_append_int_noprefix(table_data, 0, 8);
+
+    /* IVHD definition - type 10h */
+    build_append_int_noprefix(table_data, 0x10, 1);
+    /* virtualization flags */
+    build_append_int_noprefix(table_data,
+                             (1UL << 0) | /* HtTunEn      */
+                             (1UL << 4) | /* iotblSup     */
+                             (1UL << 6) | /* PrefSup      */
+                             (1UL << 7),  /* PPRSup       */
+                             1);
+    /* IVHD length */
+    build_append_int_noprefix(table_data, 0x24, 2);
+    /* DeviceID */
+    build_append_int_noprefix(table_data, s->devid, 2);
+    /* Capability offset */
+    build_append_int_noprefix(table_data, s->capab_offset, 2);
+    /* IOMMU base address */
+    build_append_int_noprefix(table_data, s->mmio.addr, 8);
+    /* PCI Segment Group */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* IOMMU info */
+    build_append_int_noprefix(table_data, 0, 2);
+    /* IOMMU Feature Reporting */
+    build_append_int_noprefix(table_data,
+                             (48UL << 30) | /* HATS   */
+                             (48UL << 28) | /* GATS   */
+                             (1UL << 2),    /* GTSup  */
+                             4);
+    /*
+     *   Type 1 device entry reporting all devices
+     *   These are 4-byte device entries currently reporting the range of
+     *   Refer to Spec - Table 95:IVHD Device Entry Type Codes(4-byte)
+     */
+    build_append_int_noprefix(table_data, 0x0000001, 4);
+
+    build_header(linker, table_data, (void *)(table_data->data + iommu_start),
+                 "IVRS", table_data->len - iommu_start, 1, NULL, NULL);
+}
 
 static GArray *
 build_rsdp(GArray *rsdp_table, BIOSLinker *linker, unsigned rsdt_tbl_offset)
@@ -2622,11 +2727,6 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
     return true;
 }
 
-static bool acpi_has_iommu(void)
-{
-    return !!x86_iommu_get_default();
-}
-
 static
 void acpi_build(AcpiBuildTables *tables, MachineState *machine)
 {
@@ -2706,13 +2806,19 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
         acpi_add_table(table_offsets, tables_blob);
         build_mcfg_q35(tables_blob, tables->linker, &mcfg);
     }
-    if (acpi_has_iommu()) {
-        acpi_add_table(table_offsets, tables_blob);
-        build_dmar_q35(tables_blob, tables->linker);
+    if (x86_iommu_get_default()) {
+        IommuType IOMMUType = x86_iommu_get_type();
+        if (IOMMUType == TYPE_AMD) {
+            acpi_add_table(table_offsets, tables_blob);
+            build_amd_iommu(tables_blob, tables->linker);
+        } else if (IOMMUType == TYPE_INTEL) {
+            acpi_add_table(table_offsets, tables_blob);
+            build_dmar_q35(tables_blob, tables->linker);
+        }
     }
     if (pcms->acpi_nvdimm_state.is_enabled) {
         nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
-                          pcms->acpi_nvdimm_state.dsm_mem);
+                          &pcms->acpi_nvdimm_state, machine->ram_slots);
     }
 
     /* Add tables supplied by user (if any) */
@@ -2754,7 +2860,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine)
          */
         int legacy_aml_len =
             pcmc->legacy_acpi_table_size +
-            ACPI_BUILD_LEGACY_CPU_AML_SIZE * max_cpus;
+            ACPI_BUILD_LEGACY_CPU_AML_SIZE * pcms->apic_id_limit;
         int legacy_table_size =
             ROUND_UP(tables_blob->len - aml_len + legacy_aml_len,
                      ACPI_BUILD_ALIGN_SIZE);
@@ -2830,7 +2936,7 @@ static MemoryRegion *acpi_add_rom_blob(AcpiBuildState *build_state,
                                        uint64_t max_size)
 {
     return rom_add_blob(name, blob->data, acpi_data_len(blob), max_size, -1,
-                        name, acpi_build_update, build_state);
+                        name, acpi_build_update, build_state, NULL);
 }
 
 static const VMStateDescription vmstate_acpi_build = {
@@ -2855,7 +2961,7 @@ void acpi_setup(void)
         return;
     }
 
-    if (!pcmc->has_acpi_build) {
+    if (!pcms->acpi_build_enabled) {
         ACPI_BUILD_DPRINTF("ACPI build disabled. Bailing out.\n");
         return;
     }
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
new file mode 100644
index 0000000000..47b79d9112
--- /dev/null
+++ b/hw/i386/amd_iommu.c
@@ -0,0 +1,1212 @@
+/*
+ * QEMU emulation of AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Cache implementation inspired by hw/i386/intel_iommu.c
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/amd_iommu.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+/* used AMD-Vi MMIO registers */
+const char *amdvi_mmio_low[] = {
+    "AMDVI_MMIO_DEVTAB_BASE",
+    "AMDVI_MMIO_CMDBUF_BASE",
+    "AMDVI_MMIO_EVTLOG_BASE",
+    "AMDVI_MMIO_CONTROL",
+    "AMDVI_MMIO_EXCL_BASE",
+    "AMDVI_MMIO_EXCL_LIMIT",
+    "AMDVI_MMIO_EXT_FEATURES",
+    "AMDVI_MMIO_PPR_BASE",
+    "UNHANDLED"
+};
+const char *amdvi_mmio_high[] = {
+    "AMDVI_MMIO_COMMAND_HEAD",
+    "AMDVI_MMIO_COMMAND_TAIL",
+    "AMDVI_MMIO_EVTLOG_HEAD",
+    "AMDVI_MMIO_EVTLOG_TAIL",
+    "AMDVI_MMIO_STATUS",
+    "AMDVI_MMIO_PPR_HEAD",
+    "AMDVI_MMIO_PPR_TAIL",
+    "UNHANDLED"
+};
+
+struct AMDVIAddressSpace {
+    uint8_t bus_num;            /* bus number                           */
+    uint8_t devfn;              /* device function                      */
+    AMDVIState *iommu_state;    /* AMDVI - one per machine              */
+    MemoryRegion iommu;         /* Device's address translation region  */
+    MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
+    AddressSpace as;            /* device's corresponding address space */
+};
+
+/* AMDVI cache entry */
+typedef struct AMDVIIOTLBEntry {
+    uint16_t domid;             /* assigned domain id  */
+    uint16_t devid;             /* device owning entry */
+    uint64_t perms;             /* access permissions  */
+    uint64_t translated_addr;   /* translated address  */
+    uint64_t page_mask;         /* physical page size  */
+} AMDVIIOTLBEntry;
+
+/* configure MMIO registers at startup/reset */
+static void amdvi_set_quad(AMDVIState *s, hwaddr addr, uint64_t val,
+                           uint64_t romask, uint64_t w1cmask)
+{
+    stq_le_p(&s->mmior[addr], val);
+    stq_le_p(&s->romask[addr], romask);
+    stq_le_p(&s->w1cmask[addr], w1cmask);
+}
+
+static uint16_t amdvi_readw(AMDVIState *s, hwaddr addr)
+{
+    return lduw_le_p(&s->mmior[addr]);
+}
+
+static uint32_t amdvi_readl(AMDVIState *s, hwaddr addr)
+{
+    return ldl_le_p(&s->mmior[addr]);
+}
+
+static uint64_t amdvi_readq(AMDVIState *s, hwaddr addr)
+{
+    return ldq_le_p(&s->mmior[addr]);
+}
+
+/* internal write */
+static void amdvi_writeq_raw(AMDVIState *s, uint64_t val, hwaddr addr)
+{
+    stq_le_p(&s->mmior[addr], val);
+}
+
+/* external write */
+static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
+{
+    uint16_t romask = lduw_le_p(&s->romask[addr]);
+    uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
+    uint16_t oldval = lduw_le_p(&s->mmior[addr]);
+    stw_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
+{
+    uint32_t romask = ldl_le_p(&s->romask[addr]);
+    uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
+    uint32_t oldval = ldl_le_p(&s->mmior[addr]);
+    stl_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    uint64_t romask = ldq_le_p(&s->romask[addr]);
+    uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
+    uint32_t oldval = ldq_le_p(&s->mmior[addr]);
+    stq_le_p(&s->mmior[addr],
+            ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+}
+
+/* OR a 64-bit register with a 64-bit value */
+static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    return amdvi_readq(s, addr) | val;
+}
+
+/* OR a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_orq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+    amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) | val);
+}
+
+/* AND a 64-bit register with a 64-bit value storing result in the register */
+static void amdvi_assign_andq(AMDVIState *s, hwaddr addr, uint64_t val)
+{
+   amdvi_writeq_raw(s, addr, amdvi_readq(s, addr) & val);
+}
+
+static void amdvi_generate_msi_interrupt(AMDVIState *s)
+{
+    MSIMessage msg = {};
+    MemTxAttrs attrs = {
+        .requester_id = pci_requester_id(&s->pci.dev)
+    };
+
+    if (msi_enabled(&s->pci.dev)) {
+        msg = msi_get_message(&s->pci.dev, 0);
+        address_space_stl_le(&address_space_memory, msg.address, msg.data,
+                             attrs, NULL);
+    }
+}
+
+static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
+{
+    /* event logging not enabled */
+    if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
+        AMDVI_MMIO_STATUS_EVT_OVF)) {
+        return;
+    }
+
+    /* event log buffer full */
+    if (s->evtlog_tail >= s->evtlog_len) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
+        /* generate interrupt */
+        amdvi_generate_msi_interrupt(s);
+        return;
+    }
+
+    if (dma_memory_write(&address_space_memory, s->evtlog + s->evtlog_tail,
+        &evt, AMDVI_EVENT_LEN)) {
+        trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
+    }
+
+    s->evtlog_tail += AMDVI_EVENT_LEN;
+    amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+    amdvi_generate_msi_interrupt(s);
+}
+
+static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
+                                int length)
+{
+    int index = start / 64, bitpos = start % 64;
+    uint64_t mask = MAKE_64BIT_MASK(start, length);
+    buffer[index] &= ~mask;
+    buffer[index] |= (value << bitpos) & mask;
+}
+/*
+ * AMDVi event structure
+ *    0:15   -> DeviceID
+ *    55:63  -> event type + miscellaneous info
+ *    63:127 -> related address
+ */
+static void amdvi_encode_event(uint64_t *evt, uint16_t devid, uint64_t addr,
+                               uint16_t info)
+{
+    amdvi_setevent_bits(evt, devid, 0, 16);
+    amdvi_setevent_bits(evt, info, 55, 8);
+    amdvi_setevent_bits(evt, addr, 63, 64);
+}
+/* log an error encountered during a page walk
+ *
+ * @addr: virtual address in translation request
+ */
+static void amdvi_page_fault(AMDVIState *s, uint16_t devid,
+                             hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_IOPF_I | AMDVI_EVENT_IOPF;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/*
+ * log a master abort accessing device table
+ *  @devtab : address of device table entry
+ *  @info : error flags
+ */
+static void amdvi_log_devtab_error(AMDVIState *s, uint16_t devid,
+                                   hwaddr devtab, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_DEV_TAB_HW_ERROR;
+
+    amdvi_encode_event(evt, devid, devtab, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an event trying to access command buffer
+ *   @addr : address that couldn't be accessed
+ */
+static void amdvi_log_command_error(AMDVIState *s, hwaddr addr)
+{
+    uint64_t evt[4], info = AMDVI_EVENT_COMMAND_HW_ERROR;
+
+    amdvi_encode_event(evt, 0, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+            PCI_STATUS_SIG_TARGET_ABORT);
+}
+/* log an illegal comand event
+ *   @addr : address of illegal command
+ */
+static void amdvi_log_illegalcom_error(AMDVIState *s, uint16_t info,
+                                       hwaddr addr)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_ILLEGAL_COMMAND_ERROR;
+    amdvi_encode_event(evt, 0, addr, info);
+    amdvi_log_event(s, evt);
+}
+/* log an error accessing device table
+ *
+ *  @devid : device owning the table entry
+ *  @devtab : address of device table entry
+ *  @info : error flags
+ */
+static void amdvi_log_illegaldevtab_error(AMDVIState *s, uint16_t devid,
+                                          hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+}
+/* log an error accessing a PTE entry
+ * @addr : address that couldn't be accessed
+ */
+static void amdvi_log_pagetab_error(AMDVIState *s, uint16_t devid,
+                                    hwaddr addr, uint16_t info)
+{
+    uint64_t evt[4];
+
+    info |= AMDVI_EVENT_PAGE_TAB_HW_ERROR;
+    amdvi_encode_event(evt, devid, addr, info);
+    amdvi_log_event(s, evt);
+    pci_word_test_and_set_mask(s->pci.dev.config + PCI_STATUS,
+             PCI_STATUS_SIG_TARGET_ABORT);
+}
+
+static gboolean amdvi_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+    return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint amdvi_uint64_hash(gconstpointer v)
+{
+    return (guint)*(const uint64_t *)v;
+}
+
+static AMDVIIOTLBEntry *amdvi_iotlb_lookup(AMDVIState *s, hwaddr addr,
+                                           uint64_t devid)
+{
+    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    return g_hash_table_lookup(s->iotlb, &key);
+}
+
+static void amdvi_iotlb_reset(AMDVIState *s)
+{
+    assert(s->iotlb);
+    trace_amdvi_iotlb_reset();
+    g_hash_table_remove_all(s->iotlb);
+}
+
+static gboolean amdvi_iotlb_remove_by_devid(gpointer key, gpointer value,
+                                            gpointer user_data)
+{
+    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+    uint16_t devid = *(uint16_t *)user_data;
+    return entry->devid == devid;
+}
+
+static void amdvi_iotlb_remove_page(AMDVIState *s, hwaddr addr,
+                                    uint64_t devid)
+{
+    uint64_t key = (addr >> AMDVI_PAGE_SHIFT_4K) |
+                   ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+    g_hash_table_remove(s->iotlb, &key);
+}
+
+static void amdvi_update_iotlb(AMDVIState *s, uint16_t devid,
+                               uint64_t gpa, IOMMUTLBEntry to_cache,
+                               uint16_t domid)
+{
+    AMDVIIOTLBEntry *entry = g_new(AMDVIIOTLBEntry, 1);
+    uint64_t *key = g_new(uint64_t, 1);
+    uint64_t gfn = gpa >> AMDVI_PAGE_SHIFT_4K;
+
+    /* don't cache erroneous translations */
+    if (to_cache.perm != IOMMU_NONE) {
+        trace_amdvi_cache_update(domid, PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                PCI_FUNC(devid), gpa, to_cache.translated_addr);
+
+        if (g_hash_table_size(s->iotlb) >= AMDVI_IOTLB_MAX_SIZE) {
+            amdvi_iotlb_reset(s);
+        }
+
+        entry->domid = domid;
+        entry->perms = to_cache.perm;
+        entry->translated_addr = to_cache.translated_addr;
+        entry->page_mask = to_cache.addr_mask;
+        *key = gfn | ((uint64_t)(devid) << AMDVI_DEVID_SHIFT);
+        g_hash_table_replace(s->iotlb, key, entry);
+    }
+}
+
+static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
+{
+    /* pad the last 3 bits */
+    hwaddr addr = cpu_to_le64(extract64(cmd[0], 3, 49)) << 3;
+    uint64_t data = cpu_to_le64(cmd[1]);
+
+    if (extract64(cmd[0], 51, 8)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    if (extract64(cmd[0], 0, 1)) {
+        if (dma_memory_write(&address_space_memory, addr, &data,
+            AMDVI_COMPLETION_DATA_SIZE)) {
+            trace_amdvi_completion_wait_fail(addr);
+        }
+    }
+    /* set completion interrupt */
+    if (extract64(cmd[0], 1, 1)) {
+        amdvi_test_mask(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
+        /* generate interrupt */
+        amdvi_generate_msi_interrupt(s);
+    }
+    trace_amdvi_completion_wait(addr, data);
+}
+
+/* log error without aborting since linux seems to be using reserved bits */
+static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
+{
+    uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
+
+    /* This command should invalidate internal caches of which there isn't */
+    if (extract64(cmd[0], 15, 16) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                             PCI_FUNC(devid));
+}
+
+static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 15, 16) ||  extract64(cmd[0], 19, 8) ||
+        extract64(cmd[1], 0, 2) || extract64(cmd[1], 3, 29)
+        || extract64(cmd[1], 47, 16)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+    trace_amdvi_ppr_exec();
+}
+
+static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 0, 60) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    amdvi_iotlb_reset(s);
+    trace_amdvi_all_inval();
+}
+
+static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
+                                            gpointer user_data)
+{
+    AMDVIIOTLBEntry *entry = (AMDVIIOTLBEntry *)value;
+    uint16_t domid = *(uint16_t *)user_data;
+    return entry->domid == domid;
+}
+
+/* we don't have devid - we can't remove pages by address */
+static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
+{
+    uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+
+    if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 16, 12) ||
+        extract64(cmd[0], 3, 10)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
+                                &domid);
+    trace_amdvi_pages_inval(domid);
+}
+
+static void amdvi_prefetch_pages(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 16, 8) || extract64(cmd[0], 20, 8) ||
+        extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
+        extract64(cmd[1], 5, 7)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+
+    trace_amdvi_prefetch_pages();
+}
+
+static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
+{
+    if (extract64(cmd[0], 16, 16) || cmd[1]) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    trace_amdvi_intr_inval();
+}
+
+/* FIXME: Try to work with the specified size instead of all the pages
+ * when the S bit is on
+ */
+static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
+{
+
+    uint16_t devid = extract64(cmd[0], 0, 16);
+    if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 9)) {
+        amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    if (extract64(cmd[1], 0, 1)) {
+        g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_devid,
+                                    &devid);
+    } else {
+        amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
+                                cpu_to_le16(extract64(cmd[1], 0, 16)));
+    }
+    trace_amdvi_iotlb_inval();
+}
+
+/* not honouring reserved bits is regarded as an illegal command */
+static void amdvi_cmdbuf_exec(AMDVIState *s)
+{
+    uint64_t cmd[2];
+
+    if (dma_memory_read(&address_space_memory, s->cmdbuf + s->cmdbuf_head,
+        cmd, AMDVI_COMMAND_SIZE)) {
+        trace_amdvi_command_read_fail(s->cmdbuf, s->cmdbuf_head);
+        amdvi_log_command_error(s, s->cmdbuf + s->cmdbuf_head);
+        return;
+    }
+
+    switch (extract64(cmd[0], 60, 4)) {
+    case AMDVI_CMD_COMPLETION_WAIT:
+        amdvi_completion_wait(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_DEVTAB_ENTRY:
+        amdvi_inval_devtab_entry(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_AMDVI_PAGES:
+        amdvi_inval_pages(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_IOTLB_PAGES:
+        iommu_inval_iotlb(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_INTR_TABLE:
+        amdvi_inval_inttable(s, cmd);
+        break;
+    case AMDVI_CMD_PREFETCH_AMDVI_PAGES:
+        amdvi_prefetch_pages(s, cmd);
+        break;
+    case AMDVI_CMD_COMPLETE_PPR_REQUEST:
+        amdvi_complete_ppr(s, cmd);
+        break;
+    case AMDVI_CMD_INVAL_AMDVI_ALL:
+        amdvi_inval_all(s, cmd);
+        break;
+    default:
+        trace_amdvi_unhandled_command(extract64(cmd[1], 60, 4));
+        /* log illegal command */
+        amdvi_log_illegalcom_error(s, extract64(cmd[1], 60, 4),
+                                   s->cmdbuf + s->cmdbuf_head);
+    }
+}
+
+static void amdvi_cmdbuf_run(AMDVIState *s)
+{
+    if (!s->cmdbuf_enabled) {
+        trace_amdvi_command_error(amdvi_readq(s, AMDVI_MMIO_CONTROL));
+        return;
+    }
+
+    /* check if there is work to do. */
+    while (s->cmdbuf_head != s->cmdbuf_tail) {
+        trace_amdvi_command_exec(s->cmdbuf_head, s->cmdbuf_tail, s->cmdbuf);
+        amdvi_cmdbuf_exec(s);
+        s->cmdbuf_head += AMDVI_COMMAND_SIZE;
+        amdvi_writeq_raw(s, s->cmdbuf_head, AMDVI_MMIO_COMMAND_HEAD);
+
+        /* wrap head pointer */
+        if (s->cmdbuf_head >= s->cmdbuf_len * AMDVI_COMMAND_SIZE) {
+            s->cmdbuf_head = 0;
+        }
+    }
+}
+
+static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+{
+    uint8_t index = (addr & ~0x2000) / 8;
+
+    if ((addr & 0x2000)) {
+        /* high table */
+        index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
+        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+    } else {
+        index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
+        trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
+    }
+}
+
+static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    AMDVIState *s = opaque;
+
+    uint64_t val = -1;
+    if (addr + size > AMDVI_MMIO_SIZE) {
+        trace_amdvi_mmio_read("error: addr outside region: max ",
+                (uint64_t)AMDVI_MMIO_SIZE, addr, size);
+        return (uint64_t)-1;
+    }
+
+    if (size == 2) {
+        val = amdvi_readw(s, addr);
+    } else if (size == 4) {
+        val = amdvi_readl(s, addr);
+    } else if (size == 8) {
+        val = amdvi_readq(s, addr);
+    }
+    amdvi_mmio_trace(addr, size);
+
+    return val;
+}
+
+static void amdvi_handle_control_write(AMDVIState *s)
+{
+    unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
+    s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
+
+    s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
+    s->evtlog_enabled = s->enabled && !!(control &
+                        AMDVI_MMIO_CONTROL_EVENTLOGEN);
+
+    s->evtlog_intr = !!(control & AMDVI_MMIO_CONTROL_EVENTINTEN);
+    s->completion_wait_intr = !!(control & AMDVI_MMIO_CONTROL_COMWAITINTEN);
+    s->cmdbuf_enabled = s->enabled && !!(control &
+                        AMDVI_MMIO_CONTROL_CMDBUFLEN);
+
+    /* update the flags depending on the control register */
+    if (s->cmdbuf_enabled) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN);
+    } else {
+        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_CMDBUF_RUN);
+    }
+    if (s->evtlog_enabled) {
+        amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_RUN);
+    } else {
+        amdvi_assign_andq(s, AMDVI_MMIO_STATUS, ~AMDVI_MMIO_STATUS_EVT_RUN);
+    }
+
+    trace_amdvi_control_status(control);
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_devtab_write(AMDVIState *s)
+
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
+    s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
+
+    /* set device table length */
+    s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
+                    (AMDVI_MMIO_DEVTAB_SIZE_UNIT /
+                     AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
+}
+
+static inline void amdvi_handle_cmdhead_write(AMDVIState *s)
+{
+    s->cmdbuf_head = amdvi_readq(s, AMDVI_MMIO_COMMAND_HEAD)
+                     & AMDVI_MMIO_CMDBUF_HEAD_MASK;
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_cmdbase_write(AMDVIState *s)
+{
+    s->cmdbuf = amdvi_readq(s, AMDVI_MMIO_COMMAND_BASE)
+                & AMDVI_MMIO_CMDBUF_BASE_MASK;
+    s->cmdbuf_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_CMDBUF_SIZE_BYTE)
+                    & AMDVI_MMIO_CMDBUF_SIZE_MASK);
+    s->cmdbuf_head = s->cmdbuf_tail = 0;
+}
+
+static inline void amdvi_handle_cmdtail_write(AMDVIState *s)
+{
+    s->cmdbuf_tail = amdvi_readq(s, AMDVI_MMIO_COMMAND_TAIL)
+                     & AMDVI_MMIO_CMDBUF_TAIL_MASK;
+    amdvi_cmdbuf_run(s);
+}
+
+static inline void amdvi_handle_excllim_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EXCL_LIMIT);
+    s->excl_limit = (val & AMDVI_MMIO_EXCL_LIMIT_MASK) |
+                    AMDVI_MMIO_EXCL_LIMIT_LOW;
+}
+
+static inline void amdvi_handle_evtbase_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
+    s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
+    s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
+                    & AMDVI_MMIO_EVTLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_evttail_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_TAIL);
+    s->evtlog_tail = val & AMDVI_MMIO_EVTLOG_TAIL_MASK;
+}
+
+static inline void amdvi_handle_evthead_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_HEAD);
+    s->evtlog_head = val & AMDVI_MMIO_EVTLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprbase_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_BASE);
+    s->ppr_log = val & AMDVI_MMIO_PPRLOG_BASE_MASK;
+    s->pprlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_PPRLOG_SIZE_BYTE)
+                    & AMDVI_MMIO_PPRLOG_SIZE_MASK);
+}
+
+static inline void amdvi_handle_pprhead_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_HEAD);
+    s->pprlog_head = val & AMDVI_MMIO_PPRLOG_HEAD_MASK;
+}
+
+static inline void amdvi_handle_pprtail_write(AMDVIState *s)
+{
+    uint64_t val = amdvi_readq(s, AMDVI_MMIO_PPR_TAIL);
+    s->pprlog_tail = val & AMDVI_MMIO_PPRLOG_TAIL_MASK;
+}
+
+/* FIXME: something might go wrong if System Software writes in chunks
+ * of one byte but linux writes in chunks of 4 bytes so currently it
+ * works correctly with linux but will definitely be busted if software
+ * reads/writes 8 bytes
+ */
+static void amdvi_mmio_reg_write(AMDVIState *s, unsigned size, uint64_t val,
+                                 hwaddr addr)
+{
+    if (size == 2) {
+        amdvi_writew(s, addr, val);
+    } else if (size == 4) {
+        amdvi_writel(s, addr, val);
+    } else if (size == 8) {
+        amdvi_writeq(s, addr, val);
+    }
+}
+
+static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+                             unsigned size)
+{
+    AMDVIState *s = opaque;
+    unsigned long offset = addr & 0x07;
+
+    if (addr + size > AMDVI_MMIO_SIZE) {
+        trace_amdvi_mmio_write("error: addr outside region: max ",
+                (uint64_t)AMDVI_MMIO_SIZE, size, val, offset);
+        return;
+    }
+
+    amdvi_mmio_trace(addr, size);
+    switch (addr & ~0x07) {
+    case AMDVI_MMIO_CONTROL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_control_write(s);
+        break;
+    case AMDVI_MMIO_DEVICE_TABLE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+       /*  set device table address
+        *   This also suffers from inability to tell whether software
+        *   is done writing
+        */
+        if (offset || (size == 8)) {
+            amdvi_handle_devtab_write(s);
+        }
+        break;
+    case AMDVI_MMIO_COMMAND_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_cmdhead_write(s);
+        break;
+    case AMDVI_MMIO_COMMAND_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        /* FIXME - make sure System Software has finished writing incase
+         * it writes in chucks less than 8 bytes in a robust way.As for
+         * now, this hacks works for the linux driver
+         */
+        if (offset || (size == 8)) {
+            amdvi_handle_cmdbase_write(s);
+        }
+        break;
+    case AMDVI_MMIO_COMMAND_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_cmdtail_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evtbase_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evthead_write(s);
+        break;
+    case AMDVI_MMIO_EVENT_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_evttail_write(s);
+        break;
+    case AMDVI_MMIO_EXCL_LIMIT:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_excllim_write(s);
+        break;
+        /* PPR log base - unused for now */
+    case AMDVI_MMIO_PPR_BASE:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprbase_write(s);
+        break;
+        /* PPR log head - also unused for now */
+    case AMDVI_MMIO_PPR_HEAD:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprhead_write(s);
+        break;
+        /* PPR log tail - unused for now */
+    case AMDVI_MMIO_PPR_TAIL:
+        amdvi_mmio_reg_write(s, size, val, addr);
+        amdvi_handle_pprtail_write(s);
+        break;
+    }
+}
+
+static inline uint64_t amdvi_get_perms(uint64_t entry)
+{
+    return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
+           AMDVI_DEV_PERM_SHIFT;
+}
+
+/* a valid entry should have V = 1 and reserved bits honoured */
+static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
+                               uint64_t *dte)
+{
+    if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
+        || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
+        || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
+        amdvi_log_illegaldevtab_error(s, devid,
+                                      s->devtab +
+                                      devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+        return false;
+    }
+
+    return dte[0] & AMDVI_DEV_VALID;
+}
+
+/* get a device table entry given the devid */
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+{
+    uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+
+    if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
+        AMDVI_DEVTAB_ENTRY_SIZE)) {
+        trace_amdvi_dte_get_fail(s->devtab, offset);
+        /* log error accessing dte */
+        amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
+        return false;
+    }
+
+    *entry = le64_to_cpu(*entry);
+    if (!amdvi_validate_dte(s, devid, entry)) {
+        trace_amdvi_invalid_dte(entry[0]);
+        return false;
+    }
+
+    return true;
+}
+
+/* get pte translation mode */
+static inline uint8_t get_pte_translation_mode(uint64_t pte)
+{
+    return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+}
+
+static inline uint64_t pte_override_page_mask(uint64_t pte)
+{
+    uint8_t page_mask = 12;
+    uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) ^ AMDVI_DEV_PT_ROOT_MASK;
+    /* find the first zero bit */
+    while (addr & 1) {
+        page_mask++;
+        addr = addr >> 1;
+    }
+
+    return ~((1ULL << page_mask) - 1);
+}
+
+static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
+{
+    return ~((1UL << ((oldlevel * 9) + 3)) - 1);
+}
+
+static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
+                                          uint16_t devid)
+{
+    uint64_t pte;
+
+    if (dma_memory_read(&address_space_memory, pte_addr, &pte, sizeof(pte))) {
+        trace_amdvi_get_pte_hwerror(pte_addr);
+        amdvi_log_pagetab_error(s, devid, pte_addr, 0);
+        pte = 0;
+        return pte;
+    }
+
+    pte = le64_to_cpu(pte);
+    return pte;
+}
+
+static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
+                            IOMMUTLBEntry *ret, unsigned perms,
+                            hwaddr addr)
+{
+    unsigned level, present, pte_perms, oldlevel;
+    uint64_t pte = dte[0], pte_addr, page_mask;
+
+    /* make sure the DTE has TV = 1 */
+    if (pte & AMDVI_DEV_TRANSLATION_VALID) {
+        level = get_pte_translation_mode(pte);
+        if (level >= 7) {
+            trace_amdvi_mode_invalid(level, addr);
+            return;
+        }
+        if (level == 0) {
+            goto no_remap;
+        }
+
+        /* we are at the leaf page table or page table encodes a huge page */
+        while (level > 0) {
+            pte_perms = amdvi_get_perms(pte);
+            present = pte & 1;
+            if (!present || perms != (perms & pte_perms)) {
+                amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
+                trace_amdvi_page_fault(addr);
+                return;
+            }
+
+            /* go to the next lower level */
+            pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
+            /* add offset and load pte */
+            pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
+            pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
+            if (!pte) {
+                return;
+            }
+            oldlevel = level;
+            level = get_pte_translation_mode(pte);
+            if (level == 0x7) {
+                break;
+            }
+        }
+
+        if (level == 0x7) {
+            page_mask = pte_override_page_mask(pte);
+        } else {
+            page_mask = pte_get_page_mask(oldlevel);
+        }
+
+        /* get access permissions from pte */
+        ret->iova = addr & page_mask;
+        ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
+        ret->addr_mask = ~page_mask;
+        ret->perm = amdvi_get_perms(pte);
+        return;
+    }
+no_remap:
+    ret->iova = addr & AMDVI_PAGE_MASK_4K;
+    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+    ret->perm = amdvi_get_perms(pte);
+}
+
+static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
+                               bool is_write, IOMMUTLBEntry *ret)
+{
+    AMDVIState *s = as->iommu_state;
+    uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+    AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
+    uint64_t entry[4];
+
+    if (iotlb_entry) {
+        trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+                PCI_FUNC(devid), addr, iotlb_entry->translated_addr);
+        ret->iova = addr & ~iotlb_entry->page_mask;
+        ret->translated_addr = iotlb_entry->translated_addr;
+        ret->addr_mask = iotlb_entry->page_mask;
+        ret->perm = iotlb_entry->perms;
+        return;
+    }
+
+    /* devices with V = 0 are not translated */
+    if (!amdvi_get_dte(s, devid, entry)) {
+        goto out;
+    }
+
+    amdvi_page_walk(as, entry, ret,
+                    is_write ? AMDVI_PERM_WRITE : AMDVI_PERM_READ, addr);
+
+    amdvi_update_iotlb(s, devid, addr, *ret,
+                       entry[1] & AMDVI_DEV_DOMID_ID_MASK);
+    return;
+
+out:
+    ret->iova = addr & AMDVI_PAGE_MASK_4K;
+    ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
+    ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
+    ret->perm = IOMMU_RW;
+}
+
+static inline bool amdvi_is_interrupt_addr(hwaddr addr)
+{
+    return addr >= AMDVI_INT_ADDR_FIRST && addr <= AMDVI_INT_ADDR_LAST;
+}
+
+static IOMMUTLBEntry amdvi_translate(MemoryRegion *iommu, hwaddr addr,
+                                     bool is_write)
+{
+    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+    AMDVIState *s = as->iommu_state;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE
+    };
+
+    if (!s->enabled) {
+        /* AMDVI disabled - corresponds to iommu=off not
+         * failure to provide any parameter
+         */
+        ret.iova = addr & AMDVI_PAGE_MASK_4K;
+        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+        ret.perm = IOMMU_RW;
+        return ret;
+    } else if (amdvi_is_interrupt_addr(addr)) {
+        ret.iova = addr & AMDVI_PAGE_MASK_4K;
+        ret.translated_addr = addr & AMDVI_PAGE_MASK_4K;
+        ret.addr_mask = ~AMDVI_PAGE_MASK_4K;
+        ret.perm = IOMMU_WO;
+        return ret;
+    }
+
+    amdvi_do_translate(as, addr, is_write, &ret);
+    trace_amdvi_translation_result(as->bus_num, PCI_SLOT(as->devfn),
+            PCI_FUNC(as->devfn), addr, ret.translated_addr);
+    return ret;
+}
+
+static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    AMDVIState *s = opaque;
+    AMDVIAddressSpace **iommu_as;
+    int bus_num = pci_bus_num(bus);
+
+    iommu_as = s->address_spaces[bus_num];
+
+    /* allocate memory during the first run */
+    if (!iommu_as) {
+        iommu_as = g_malloc0(sizeof(AMDVIAddressSpace *) * PCI_DEVFN_MAX);
+        s->address_spaces[bus_num] = iommu_as;
+    }
+
+    /* set up AMD-Vi region */
+    if (!iommu_as[devfn]) {
+        iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
+        iommu_as[devfn]->bus_num = (uint8_t)bus_num;
+        iommu_as[devfn]->devfn = (uint8_t)devfn;
+        iommu_as[devfn]->iommu_state = s;
+
+        memory_region_init_iommu(&iommu_as[devfn]->iommu, OBJECT(s),
+                                 &s->iommu_ops, "amd-iommu", UINT64_MAX);
+        address_space_init(&iommu_as[devfn]->as, &iommu_as[devfn]->iommu,
+                           "amd-iommu");
+    }
+    return &iommu_as[devfn]->as;
+}
+
+static const MemoryRegionOps mmio_mem_ops = {
+    .read = amdvi_mmio_read,
+    .write = amdvi_mmio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = false,
+    },
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    }
+};
+
+static void amdvi_iommu_notify_flag_changed(MemoryRegion *iommu,
+                                            IOMMUNotifierFlag old,
+                                            IOMMUNotifierFlag new)
+{
+    AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+
+    if (new & IOMMU_NOTIFIER_MAP) {
+        error_report("device %02x.%02x.%x requires iommu notifier which is not "
+                     "currently supported", as->bus_num, PCI_SLOT(as->devfn),
+                     PCI_FUNC(as->devfn));
+        exit(1);
+    }
+}
+
+static void amdvi_init(AMDVIState *s)
+{
+    amdvi_iotlb_reset(s);
+
+    s->iommu_ops.translate = amdvi_translate;
+    s->iommu_ops.notify_flag_changed = amdvi_iommu_notify_flag_changed;
+    s->devtab_len = 0;
+    s->cmdbuf_len = 0;
+    s->cmdbuf_head = 0;
+    s->cmdbuf_tail = 0;
+    s->evtlog_head = 0;
+    s->evtlog_tail = 0;
+    s->excl_enabled = false;
+    s->excl_allow = false;
+    s->mmio_enabled = false;
+    s->enabled = false;
+    s->ats_enabled = false;
+    s->cmdbuf_enabled = false;
+
+    /* reset MMIO */
+    memset(s->mmior, 0, AMDVI_MMIO_SIZE);
+    amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES,
+            0xffffffffffffffef, 0);
+    amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67);
+
+    /* reset device ident */
+    pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD);
+    pci_config_set_prog_interface(s->pci.dev.config, 00);
+    pci_config_set_device_id(s->pci.dev.config, s->devid);
+    pci_config_set_class(s->pci.dev.config, 0x0806);
+
+    /* reset AMDVI specific capabilities, all r/o */
+    pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW,
+                 s->mmio.addr & ~(0xffff0000));
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH,
+                (s->mmio.addr & ~(0xffff)) >> 16);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE,
+                 0xff000000);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0);
+    pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC,
+            AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR);
+}
+
+static void amdvi_reset(DeviceState *dev)
+{
+    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+
+    msi_reset(&s->pci.dev);
+    amdvi_init(s);
+}
+
+static void amdvi_realize(DeviceState *dev, Error **err)
+{
+    int ret = 0;
+    AMDVIState *s = AMD_IOMMU_DEVICE(dev);
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
+    PCIBus *bus = PC_MACHINE(qdev_get_machine())->bus;
+    s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
+                                     amdvi_uint64_equal, g_free, g_free);
+
+    /* This device should take care of IOMMU PCI properties */
+    x86_iommu->type = TYPE_AMD;
+    qdev_set_parent_bus(DEVICE(&s->pci), &bus->qbus);
+    object_property_set_bool(OBJECT(&s->pci), true, "realized", err);
+    s->capab_offset = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0,
+                                         AMDVI_CAPAB_SIZE);
+    assert(s->capab_offset > 0);
+    ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE);
+    assert(ret > 0);
+    ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, AMDVI_CAPAB_REG_SIZE);
+    assert(ret > 0);
+
+    /* set up MMIO */
+    memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, "amdvi-mmio",
+                          AMDVI_MMIO_SIZE);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
+    sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
+    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
+    msi_init(&s->pci.dev, 0, 1, true, false, err);
+    amdvi_init(s);
+}
+
+static const VMStateDescription vmstate_amdvi = {
+    .name = "amd-iommu",
+    .unmigratable = 1
+};
+
+static void amdvi_instance_init(Object *klass)
+{
+    AMDVIState *s = AMD_IOMMU_DEVICE(klass);
+
+    object_initialize(&s->pci, sizeof(s->pci), TYPE_AMD_IOMMU_PCI);
+}
+
+static void amdvi_class_init(ObjectClass *klass, void* data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    X86IOMMUClass *dc_class = X86_IOMMU_CLASS(klass);
+
+    dc->reset = amdvi_reset;
+    dc->vmsd = &vmstate_amdvi;
+    dc->hotpluggable = false;
+    dc_class->realize = amdvi_realize;
+}
+
+static const TypeInfo amdvi = {
+    .name = TYPE_AMD_IOMMU_DEVICE,
+    .parent = TYPE_X86_IOMMU_DEVICE,
+    .instance_size = sizeof(AMDVIState),
+    .instance_init = amdvi_instance_init,
+    .class_init = amdvi_class_init
+};
+
+static const TypeInfo amdviPCI = {
+    .name = "AMDVI-PCI",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(AMDVIPCIState),
+};
+
+static void amdviPCI_register_types(void)
+{
+    type_register_static(&amdviPCI);
+    type_register_static(&amdvi);
+}
+
+type_init(amdviPCI_register_types);
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
new file mode 100644
index 0000000000..884926e9e7
--- /dev/null
+++ b/hw/i386/amd_iommu.h
@@ -0,0 +1,289 @@
+/*
+ * QEMU emulation of an AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, <davidkiarie4@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AMD_IOMMU_H_
+#define AMD_IOMMU_H_
+
+#include "hw/hw.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/sysbus.h"
+#include "sysemu/dma.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/i386/x86-iommu.h"
+
+/* Capability registers */
+#define AMDVI_CAPAB_BAR_LOW           0x04
+#define AMDVI_CAPAB_BAR_HIGH          0x08
+#define AMDVI_CAPAB_RANGE             0x0C
+#define AMDVI_CAPAB_MISC              0x10
+
+#define AMDVI_CAPAB_SIZE              0x18
+#define AMDVI_CAPAB_REG_SIZE          0x04
+
+/* Capability header data */
+#define AMDVI_CAPAB_ID_SEC            0xf
+#define AMDVI_CAPAB_FLAT_EXT          (1 << 28)
+#define AMDVI_CAPAB_EFR_SUP           (1 << 27)
+#define AMDVI_CAPAB_FLAG_NPCACHE      (1 << 26)
+#define AMDVI_CAPAB_FLAG_HTTUNNEL     (1 << 25)
+#define AMDVI_CAPAB_FLAG_IOTLBSUP     (1 << 24)
+#define AMDVI_CAPAB_INIT_TYPE         (3 << 16)
+
+/* No. of used MMIO registers */
+#define AMDVI_MMIO_REGS_HIGH  8
+#define AMDVI_MMIO_REGS_LOW   7
+
+/* MMIO registers */
+#define AMDVI_MMIO_DEVICE_TABLE       0x0000
+#define AMDVI_MMIO_COMMAND_BASE       0x0008
+#define AMDVI_MMIO_EVENT_BASE         0x0010
+#define AMDVI_MMIO_CONTROL            0x0018
+#define AMDVI_MMIO_EXCL_BASE          0x0020
+#define AMDVI_MMIO_EXCL_LIMIT         0x0028
+#define AMDVI_MMIO_EXT_FEATURES       0x0030
+#define AMDVI_MMIO_COMMAND_HEAD       0x2000
+#define AMDVI_MMIO_COMMAND_TAIL       0x2008
+#define AMDVI_MMIO_EVENT_HEAD         0x2010
+#define AMDVI_MMIO_EVENT_TAIL         0x2018
+#define AMDVI_MMIO_STATUS             0x2020
+#define AMDVI_MMIO_PPR_BASE           0x0038
+#define AMDVI_MMIO_PPR_HEAD           0x2030
+#define AMDVI_MMIO_PPR_TAIL           0x2038
+
+#define AMDVI_MMIO_SIZE               0x4000
+
+#define AMDVI_MMIO_DEVTAB_SIZE_MASK   ((1ULL << 12) - 1)
+#define AMDVI_MMIO_DEVTAB_BASE_MASK   (((1ULL << 52) - 1) & ~ \
+                                       AMDVI_MMIO_DEVTAB_SIZE_MASK)
+#define AMDVI_MMIO_DEVTAB_ENTRY_SIZE  32
+#define AMDVI_MMIO_DEVTAB_SIZE_UNIT   4096
+
+/* some of this are similar but just for readability */
+#define AMDVI_MMIO_CMDBUF_SIZE_BYTE       (AMDVI_MMIO_COMMAND_BASE + 7)
+#define AMDVI_MMIO_CMDBUF_SIZE_MASK       0x0f
+#define AMDVI_MMIO_CMDBUF_BASE_MASK       AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_CMDBUF_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_CMDBUF_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_EVTLOG_SIZE_BYTE       (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_EVTLOG_SIZE_MASK       AMDVI_MMIO_CMDBUF_SIZE_MASK
+#define AMDVI_MMIO_EVTLOG_BASE_MASK       AMDVI_MMIO_CMDBUF_BASE_MASK
+#define AMDVI_MMIO_EVTLOG_HEAD_MASK       (((1ULL << 19) - 1) & ~0x0f)
+#define AMDVI_MMIO_EVTLOG_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+
+#define AMDVI_MMIO_PPRLOG_SIZE_BYTE       (AMDVI_MMIO_EVENT_BASE + 7)
+#define AMDVI_MMIO_PPRLOG_HEAD_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_TAIL_MASK       AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_PPRLOG_BASE_MASK       AMDVI_MMIO_EVTLOG_BASE_MASK
+#define AMDVI_MMIO_PPRLOG_SIZE_MASK       AMDVI_MMIO_EVTLOG_SIZE_MASK
+
+#define AMDVI_MMIO_EXCL_ENABLED_MASK      (1ULL << 0)
+#define AMDVI_MMIO_EXCL_ALLOW_MASK        (1ULL << 1)
+#define AMDVI_MMIO_EXCL_LIMIT_MASK        AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_EXCL_LIMIT_LOW         0xfff
+
+/* mmio control register flags */
+#define AMDVI_MMIO_CONTROL_AMDVIEN        (1ULL << 0)
+#define AMDVI_MMIO_CONTROL_HTTUNEN        (1ULL << 1)
+#define AMDVI_MMIO_CONTROL_EVENTLOGEN     (1ULL << 2)
+#define AMDVI_MMIO_CONTROL_EVENTINTEN     (1ULL << 3)
+#define AMDVI_MMIO_CONTROL_COMWAITINTEN   (1ULL << 4)
+#define AMDVI_MMIO_CONTROL_CMDBUFLEN      (1ULL << 12)
+
+/* MMIO status register bits */
+#define AMDVI_MMIO_STATUS_CMDBUF_RUN  (1 << 4)
+#define AMDVI_MMIO_STATUS_EVT_RUN     (1 << 3)
+#define AMDVI_MMIO_STATUS_COMP_INT    (1 << 2)
+#define AMDVI_MMIO_STATUS_EVT_OVF     (1 << 0)
+
+#define AMDVI_CMDBUF_ID_BYTE              0x07
+#define AMDVI_CMDBUF_ID_RSHIFT            4
+
+#define AMDVI_CMD_COMPLETION_WAIT         0x01
+#define AMDVI_CMD_INVAL_DEVTAB_ENTRY      0x02
+#define AMDVI_CMD_INVAL_AMDVI_PAGES       0x03
+#define AMDVI_CMD_INVAL_IOTLB_PAGES       0x04
+#define AMDVI_CMD_INVAL_INTR_TABLE        0x05
+#define AMDVI_CMD_PREFETCH_AMDVI_PAGES    0x06
+#define AMDVI_CMD_COMPLETE_PPR_REQUEST    0x07
+#define AMDVI_CMD_INVAL_AMDVI_ALL         0x08
+
+#define AMDVI_DEVTAB_ENTRY_SIZE           32
+
+/* Device table entry bits 0:63 */
+#define AMDVI_DEV_VALID                   (1ULL << 0)
+#define AMDVI_DEV_TRANSLATION_VALID       (1ULL << 1)
+#define AMDVI_DEV_MODE_MASK               0x7
+#define AMDVI_DEV_MODE_RSHIFT             9
+#define AMDVI_DEV_PT_ROOT_MASK            0xffffffffff000
+#define AMDVI_DEV_PT_ROOT_RSHIFT          12
+#define AMDVI_DEV_PERM_SHIFT              61
+#define AMDVI_DEV_PERM_READ               (1ULL << 61)
+#define AMDVI_DEV_PERM_WRITE              (1ULL << 62)
+
+/* Device table entry bits 64:127 */
+#define AMDVI_DEV_DOMID_ID_MASK          ((1ULL << 16) - 1)
+
+/* Event codes and flags, as stored in the info field */
+#define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY  (0x1U << 12)
+#define AMDVI_EVENT_IOPF                  (0x2U << 12)
+#define   AMDVI_EVENT_IOPF_I              (1U << 3)
+#define AMDVI_EVENT_DEV_TAB_HW_ERROR      (0x3U << 12)
+#define AMDVI_EVENT_PAGE_TAB_HW_ERROR     (0x4U << 12)
+#define AMDVI_EVENT_ILLEGAL_COMMAND_ERROR (0x5U << 12)
+#define AMDVI_EVENT_COMMAND_HW_ERROR      (0x6U << 12)
+
+#define AMDVI_EVENT_LEN                  16
+#define AMDVI_PERM_READ             (1 << 0)
+#define AMDVI_PERM_WRITE            (1 << 1)
+
+#define AMDVI_FEATURE_PREFETCH            (1ULL << 0) /* page prefetch       */
+#define AMDVI_FEATURE_PPR                 (1ULL << 1) /* PPR Support         */
+#define AMDVI_FEATURE_GT                  (1ULL << 4) /* Guest Translation   */
+#define AMDVI_FEATURE_IA                  (1ULL << 6) /* inval all support   */
+#define AMDVI_FEATURE_GA                  (1ULL << 7) /* guest VAPIC support */
+#define AMDVI_FEATURE_HE                  (1ULL << 8) /* hardware error regs */
+#define AMDVI_FEATURE_PC                  (1ULL << 9) /* Perf counters       */
+
+/* reserved DTE bits */
+#define AMDVI_DTE_LOWER_QUAD_RESERVED  0x80300000000000fc
+#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100
+#define AMDVI_DTE_UPPER_QUAD_RESERVED  0x08f0000000000000
+
+/* AMDVI paging mode */
+#define AMDVI_GATS_MODE                 (6ULL <<  12)
+#define AMDVI_HATS_MODE                 (6ULL <<  10)
+
+/* IOTLB */
+#define AMDVI_IOTLB_MAX_SIZE 1024
+#define AMDVI_DEVID_SHIFT    36
+
+/* extended feature support */
+#define AMDVI_EXT_FEATURES (AMDVI_FEATURE_PREFETCH | AMDVI_FEATURE_PPR | \
+        AMDVI_FEATURE_IA | AMDVI_FEATURE_GT | AMDVI_FEATURE_HE | \
+        AMDVI_GATS_MODE | AMDVI_HATS_MODE)
+
+/* capabilities header */
+#define AMDVI_CAPAB_FEATURES (AMDVI_CAPAB_FLAT_EXT | \
+        AMDVI_CAPAB_FLAG_NPCACHE | AMDVI_CAPAB_FLAG_IOTLBSUP \
+        | AMDVI_CAPAB_ID_SEC | AMDVI_CAPAB_INIT_TYPE | \
+        AMDVI_CAPAB_FLAG_HTTUNNEL |  AMDVI_CAPAB_EFR_SUP)
+
+/* AMDVI default address */
+#define AMDVI_BASE_ADDR 0xfed80000
+
+/* page management constants */
+#define AMDVI_PAGE_SHIFT 12
+#define AMDVI_PAGE_SIZE  (1ULL << AMDVI_PAGE_SHIFT)
+
+#define AMDVI_PAGE_SHIFT_4K 12
+#define AMDVI_PAGE_MASK_4K  (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1))
+
+#define AMDVI_MAX_VA_ADDR          (48UL << 5)
+#define AMDVI_MAX_PH_ADDR          (40UL << 8)
+#define AMDVI_MAX_GVA_ADDR         (48UL << 15)
+
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE    8
+
+#define AMDVI_COMMAND_SIZE   16
+/* Completion Wait data size */
+#define AMDVI_COMPLETION_DATA_SIZE    8
+
+#define AMDVI_COMMAND_SIZE   16
+
+#define AMDVI_INT_ADDR_FIRST 0xfee00000
+#define AMDVI_INT_ADDR_LAST  0xfeefffff
+
+#define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
+#define AMD_IOMMU_DEVICE(obj)\
+    OBJECT_CHECK(AMDVIState, (obj), TYPE_AMD_IOMMU_DEVICE)
+
+#define TYPE_AMD_IOMMU_PCI "AMDVI-PCI"
+
+typedef struct AMDVIAddressSpace AMDVIAddressSpace;
+
+/* functions to steal PCI config space */
+typedef struct AMDVIPCIState {
+    PCIDevice dev;               /* The PCI device itself        */
+} AMDVIPCIState;
+
+typedef struct AMDVIState {
+    X86IOMMUState iommu;        /* IOMMU bus device             */
+    AMDVIPCIState pci;          /* IOMMU PCI device             */
+
+    uint32_t version;
+    uint32_t capab_offset;       /* capability offset pointer    */
+
+    uint64_t mmio_addr;
+
+    uint32_t devid;              /* auto-assigned devid          */
+
+    bool enabled;                /* IOMMU enabled                */
+    bool ats_enabled;            /* address translation enabled  */
+    bool cmdbuf_enabled;         /* command buffer enabled       */
+    bool evtlog_enabled;         /* event log enabled            */
+    bool excl_enabled;
+
+    hwaddr devtab;               /* base address device table    */
+    size_t devtab_len;           /* device table length          */
+
+    hwaddr cmdbuf;               /* command buffer base address  */
+    uint64_t cmdbuf_len;         /* command buffer length        */
+    uint32_t cmdbuf_head;        /* current IOMMU read position  */
+    uint32_t cmdbuf_tail;        /* next Software write position */
+    bool completion_wait_intr;
+
+    hwaddr evtlog;               /* base address event log       */
+    bool evtlog_intr;
+    uint32_t evtlog_len;         /* event log length             */
+    uint32_t evtlog_head;        /* current IOMMU write position */
+    uint32_t evtlog_tail;        /* current Software read position */
+
+    /* unused for now */
+    hwaddr excl_base;            /* base DVA - IOMMU exclusion range */
+    hwaddr excl_limit;           /* limit of IOMMU exclusion range   */
+    bool excl_allow;             /* translate accesses to the exclusion range */
+    bool excl_enable;            /* exclusion range enabled          */
+
+    hwaddr ppr_log;              /* base address ppr log */
+    uint32_t pprlog_len;         /* ppr log len  */
+    uint32_t pprlog_head;        /* ppr log head */
+    uint32_t pprlog_tail;        /* ppr log tail */
+
+    MemoryRegion mmio;                 /* MMIO region                  */
+    uint8_t mmior[AMDVI_MMIO_SIZE];    /* read/write MMIO              */
+    uint8_t w1cmask[AMDVI_MMIO_SIZE];  /* read/write 1 clear mask      */
+    uint8_t romask[AMDVI_MMIO_SIZE];   /* MMIO read/only mask          */
+    bool mmio_enabled;
+
+    /* IOMMU function */
+    MemoryRegionIOMMUOps iommu_ops;
+
+    /* for each served device */
+    AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+
+    /* IOTLB */
+    GHashTable *iotlb;
+} AMDVIState;
+
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 28c31a2cdf..5f3e35123d 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -21,16 +21,20 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qapi/error.h"
 #include "hw/sysbus.h"
 #include "exec/address-spaces.h"
 #include "intel_iommu_internal.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/i386/pc.h"
+#include "hw/i386/apic-msidef.h"
 #include "hw/boards.h"
 #include "hw/i386/x86-iommu.h"
 #include "hw/pci-host/q35.h"
 #include "sysemu/kvm.h"
+#include "hw/i386/apic_internal.h"
+#include "kvm_i386.h"
 
 /*#define DEBUG_INTEL_IOMMU*/
 #ifdef DEBUG_INTEL_IOMMU
@@ -214,7 +218,7 @@ static void vtd_reset_iotlb(IntelIOMMUState *s)
     g_hash_table_remove_all(s->iotlb);
 }
 
-static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint8_t source_id,
+static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
                                   uint32_t level)
 {
     return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
@@ -279,18 +283,17 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
 static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
                                    hwaddr mesg_data_reg)
 {
-    hwaddr addr;
-    uint32_t data;
+    MSIMessage msi;
 
     assert(mesg_data_reg < DMAR_REG_SIZE);
     assert(mesg_addr_reg < DMAR_REG_SIZE);
 
-    addr = vtd_get_long_raw(s, mesg_addr_reg);
-    data = vtd_get_long_raw(s, mesg_data_reg);
+    msi.address = vtd_get_long_raw(s, mesg_addr_reg);
+    msi.data = vtd_get_long_raw(s, mesg_data_reg);
 
-    VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data);
-    address_space_stl_le(&address_space_memory, addr, data,
-                         MEMTXATTRS_UNSPECIFIED, NULL);
+    VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32,
+                msi.address, msi.data);
+    apic_get_class()->send_msi(&msi);
 }
 
 /* Generate a fault event to software via MSI if conditions are met.
@@ -985,6 +988,7 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
         mask = 7;   /* Mask bit 2:0 in the SID field */
         break;
     }
+    mask = ~mask;
     VTD_DPRINTF(INV, "device-selective invalidation source 0x%"PRIx16
                     " mask %"PRIu16, source_id, mask);
     vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
@@ -1974,14 +1978,20 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
-static void vtd_iommu_notify_started(MemoryRegion *iommu)
+static void vtd_iommu_notify_flag_changed(MemoryRegion *iommu,
+                                          IOMMUNotifierFlag old,
+                                          IOMMUNotifierFlag new)
 {
     VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
 
-    hw_error("Device at bus %s addr %02x.%d requires iommu notifier which "
-             "is currently not supported by intel-iommu emulation",
-             vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
-             PCI_FUNC(vtd_as->devfn));
+    if (new & IOMMU_NOTIFIER_MAP) {
+        error_report("Device at bus %s addr %02x.%d requires iommu "
+                     "notifier which is currently not supported by "
+                     "intel-iommu emulation",
+                     vtd_as->bus->qbus.name, PCI_SLOT(vtd_as->devfn),
+                     PCI_FUNC(vtd_as->devfn));
+        exit(1);
+    }
 }
 
 static const VMStateDescription vtd_vmstate = {
@@ -2005,6 +2015,9 @@ static const MemoryRegionOps vtd_mem_ops = {
 
 static Property vtd_properties[] = {
     DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
+    DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
+                            ON_OFF_AUTO_AUTO),
+    DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2127,6 +2140,7 @@ static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
     msg.dest_mode = irq->dest_mode;
     msg.redir_hint = irq->redir_hint;
     msg.dest = irq->dest;
+    msg.__addr_hi = irq->dest & 0xffffff00;
     msg.__addr_head = cpu_to_le32(0xfee);
     /* Keep this from original MSI address bits */
     msg.__not_used = irq->msi_addr_last_bits;
@@ -2167,7 +2181,7 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
     }
 
     addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
-    if (le16_to_cpu(addr.addr.__head) != 0xfee) {
+    if (addr.addr.__head != 0xfee) {
         VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
                     "0x%"PRIx32, addr.data);
         return -VTD_FR_IR_REQ_RSVD;
@@ -2203,6 +2217,8 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
         }
     } else {
         uint8_t vector = origin->data & 0xff;
+        uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
+
         VTD_DPRINTF(IR, "received IOAPIC interrupt");
         /* IOAPIC entry vector should be aligned with IRTE vector
          * (see vt-d spec 5.1.5.1). */
@@ -2211,6 +2227,15 @@ static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
                         "entry: %d, IRTE: %d, index: %d",
                         vector, irq.vector, index);
         }
+
+        /* The Trigger Mode field must match the Trigger Mode in the IRTE.
+         * (see vt-d spec 5.1.5.1). */
+        if (trigger_mode != irq.trigger_mode) {
+            VTD_DPRINTF(GENERAL, "IOAPIC trigger mode inconsistent: "
+                        "entry: %u, IRTE: %u, index: %d",
+                        trigger_mode, irq.trigger_mode, index);
+        }
+
     }
 
     /*
@@ -2275,11 +2300,7 @@ static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
                 " for device sid 0x%04x",
                 to.address, to.data, sid);
 
-    if (dma_memory_write(&address_space_memory, to.address,
-                         &to.data, size)) {
-        VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64
-                    " value 0x%"PRIx32, to.address, to.data);
-    }
+    apic_get_class()->send_msi(&to);
 
     return MEMTX_OK;
 }
@@ -2348,7 +2369,7 @@ static void vtd_init(IntelIOMMUState *s)
     memset(s->womask, 0, DMAR_REG_SIZE);
 
     s->iommu_ops.translate = vtd_iommu_translate;
-    s->iommu_ops.notify_started = vtd_iommu_notify_started;
+    s->iommu_ops.notify_flag_changed = vtd_iommu_notify_flag_changed;
     s->root = 0;
     s->root_extended = false;
     s->dmar_enabled = false;
@@ -2364,7 +2385,11 @@ static void vtd_init(IntelIOMMUState *s)
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
     if (x86_iommu->intr_supported) {
-        s->ecap |= VTD_ECAP_IR | VTD_ECAP_EIM | VTD_ECAP_MHMV;
+        s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
+        if (s->intr_eim == ON_OFF_AUTO_ON) {
+            s->ecap |= VTD_ECAP_EIM;
+        }
+        assert(s->intr_eim != ON_OFF_AUTO_AUTO);
     }
 
     vtd_reset_context_cache(s);
@@ -2439,12 +2464,48 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     IntelIOMMUState *s = opaque;
     VTDAddressSpace *vtd_as;
 
-    assert(0 <= devfn && devfn <= X86_IOMMU_PCI_DEVFN_MAX);
+    assert(0 <= devfn && devfn < X86_IOMMU_PCI_DEVFN_MAX);
 
     vtd_as = vtd_find_add_as(s, bus, devfn);
     return &vtd_as->as;
 }
 
+static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
+{
+    X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
+
+    /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
+    if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
+        !kvm_irqchip_is_split()) {
+        error_setg(errp, "Intel Interrupt Remapping cannot work with "
+                         "kernel-irqchip=on, please use 'split|off'.");
+        return false;
+    }
+    if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
+        error_setg(errp, "eim=on cannot be selected without intremap=on");
+        return false;
+    }
+
+    if (s->intr_eim == ON_OFF_AUTO_AUTO) {
+        s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
+                      && x86_iommu->intr_supported ?
+                                              ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
+    }
+    if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
+        if (!kvm_irqchip_in_kernel()) {
+            error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
+            return false;
+        }
+        if (!kvm_enable_x2apic()) {
+            error_setg(errp, "eim=on requires support on the KVM side"
+                             "(X2APIC_API, first shipped in v4.7)");
+            return false;
+        }
+    }
+
+    return true;
+}
+
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
     PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
@@ -2453,6 +2514,12 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 
     VTD_DPRINTF(GENERAL, "");
+    x86_iommu->type = TYPE_INTEL;
+
+    if (!vtd_decide_config(s, errp)) {
+        return;
+    }
+
     memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
@@ -2467,14 +2534,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
-
-    /* Currently Intel IOMMU IR only support "kernel-irqchip={off|split}" */
-    if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
-        !kvm_irqchip_is_split()) {
-        error_report("Intel Interrupt Remapping cannot work with "
-                     "kernel-irqchip=on, please use 'split|off'.");
-        exit(1);
-    }
 }
 
 static void vtd_class_init(ObjectClass *klass, void *data)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0829a5064f..11abfa2233 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -115,7 +115,7 @@
 
 /* The shift of source_id in the key of IOTLB hash table */
 #define VTD_IOTLB_SID_SHIFT         36
-#define VTD_IOTLB_LVL_SHIFT         44
+#define VTD_IOTLB_LVL_SHIFT         52
 #define VTD_IOTLB_MAX_SIZE          1024    /* Max size of the hash table */
 
 /* IOTLB_REG */
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 2bd0de82b4..01cbaa88d2 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -15,6 +15,7 @@
 #include "hw/i386/apic_internal.h"
 #include "hw/pci/msi.h"
 #include "sysemu/kvm.h"
+#include "target-i386/kvm_i386.h"
 
 static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
                                     int reg_id, uint32_t val)
@@ -28,13 +29,16 @@ static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic,
     return *((uint32_t *)(kapic->regs + (reg_id << 4)));
 }
 
-void kvm_put_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
+static void kvm_put_apic_state(APICCommonState *s, struct kvm_lapic_state *kapic)
 {
-    APICCommonState *s = APIC_COMMON(dev);
     int i;
 
     memset(kapic, 0, sizeof(*kapic));
-    kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+    if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+        kvm_apic_set_reg(kapic, 0x2, s->initial_apic_id);
+    } else {
+        kvm_apic_set_reg(kapic, 0x2, s->id << 24);
+    }
     kvm_apic_set_reg(kapic, 0x8, s->tpr);
     kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24);
     kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
@@ -59,7 +63,11 @@ void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic)
     APICCommonState *s = APIC_COMMON(dev);
     int i, v;
 
-    s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+    if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) {
+        assert(kvm_apic_get_reg(kapic, 0x2) == s->initial_apic_id);
+    } else {
+        s->id = kvm_apic_get_reg(kapic, 0x2) >> 24;
+    }
     s->tpr = kvm_apic_get_reg(kapic, 0x8);
     s->arb_id = kvm_apic_get_reg(kapic, 0x9);
     s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24;
@@ -125,10 +133,30 @@ static void kvm_apic_vapic_base_update(APICCommonState *s)
     }
 }
 
-static void do_inject_external_nmi(void *data)
+static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
+{
+    APICCommonState *s = data.host_ptr;
+    struct kvm_lapic_state kapic;
+    int ret;
+
+    kvm_put_apicbase(s->cpu, s->apicbase);
+    kvm_put_apic_state(s, &kapic);
+
+    ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(ret));
+        abort();
+    }
+}
+
+static void kvm_apic_post_load(APICCommonState *s)
+{
+    run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
+}
+
+static void do_inject_external_nmi(CPUState *cpu, run_on_cpu_data data)
 {
-    APICCommonState *s = data;
-    CPUState *cpu = CPU(s->cpu);
+    APICCommonState *s = data.host_ptr;
     uint32_t lvt;
     int ret;
 
@@ -146,7 +174,18 @@ static void do_inject_external_nmi(void *data)
 
 static void kvm_apic_external_nmi(APICCommonState *s)
 {
-    run_on_cpu(CPU(s->cpu), do_inject_external_nmi, s);
+    run_on_cpu(CPU(s->cpu), do_inject_external_nmi, RUN_ON_CPU_HOST_PTR(s));
+}
+
+static void kvm_send_msi(MSIMessage *msg)
+{
+    int ret;
+
+    ret = kvm_irqchip_send_msi(kvm_state, *msg);
+    if (ret < 0) {
+        fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
+                strerror(-ret));
+    }
 }
 
 static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr,
@@ -159,13 +198,8 @@ static void kvm_apic_mem_write(void *opaque, hwaddr addr,
                                uint64_t data, unsigned size)
 {
     MSIMessage msg = { .address = addr, .data = data };
-    int ret;
 
-    ret = kvm_irqchip_send_msi(kvm_state, msg);
-    if (ret < 0) {
-        fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
-                strerror(-ret));
-    }
+    kvm_send_msi(&msg);
 }
 
 static const MemoryRegionOps kvm_apic_io_ops = {
@@ -178,6 +212,8 @@ static void kvm_apic_reset(APICCommonState *s)
 {
     /* Not used by KVM, which uses the CPU mp_state instead.  */
     s->wait_for_sipi = 0;
+
+    run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s));
 }
 
 static void kvm_apic_realize(DeviceState *dev, Error **errp)
@@ -206,9 +242,11 @@ static void kvm_apic_class_init(ObjectClass *klass, void *data)
     k->set_base = kvm_apic_set_base;
     k->set_tpr = kvm_apic_set_tpr;
     k->get_tpr = kvm_apic_get_tpr;
+    k->post_load = kvm_apic_post_load;
     k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting;
     k->vapic_base_update = kvm_apic_vapic_base_update;
     k->external_nmi = kvm_apic_external_nmi;
+    k->send_msi = kvm_send_msi;
 }
 
 static const TypeInfo kvm_apic_info = {
diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c
index 2b207de01b..11d1b726b6 100644
--- a/hw/i386/kvm/i8259.c
+++ b/hw/i386/kvm/i8259.c
@@ -92,7 +92,7 @@ static void kvm_pic_put(PICCommonState *s)
 
     ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip);
     if (ret < 0) {
-        fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(ret));
+        fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(ret));
         abort();
     }
 }
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 8238fbc630..87dcbdd51a 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1251,6 +1251,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
             error_propagate(errp, local_err);
             return -ENOTSUP;
         }
+        dev->dev.cap_present |= QEMU_PCI_CAP_MSI;
         dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
         /* Only 32-bit/no-mask currently supported */
         ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSI, pos, 10,
@@ -1285,6 +1286,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
             error_propagate(errp, local_err);
             return -ENOTSUP;
         }
+        dev->dev.cap_present |= QEMU_PCI_CAP_MSIX;
         dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
         ret = pci_add_capability2(pci_dev, PCI_CAP_ID_MSIX, pos, 12,
                                   &local_err);
@@ -1648,6 +1650,7 @@ static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp)
         dev->msix_table = NULL;
         return;
     }
+    dev->dev.msix_table = (uint8_t *)dev->msix_table;
 
     assigned_dev_msix_reset(dev);
 
@@ -1665,6 +1668,7 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
         error_report("error unmapping msix_table! %s", strerror(errno));
     }
     dev->msix_table = NULL;
+    dev->dev.msix_table = NULL;
 }
 
 static const VMStateDescription vmstate_assigned_device = {
diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 3bf1ddd976..b30d1b90c6 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -17,6 +17,7 @@
 #include "sysemu/kvm.h"
 #include "hw/i386/apic_internal.h"
 #include "hw/sysbus.h"
+#include "tcg/tcg.h"
 
 #define VAPIC_IO_PORT           0x7e
 
@@ -449,6 +450,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
     resume_all_vcpus();
 
     if (!kvm_enabled()) {
+        /* tb_lock will be reset when cpu_loop_exit_noexc longjmps
+         * back into the cpu_exec loop. */
+        tb_lock();
         tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
         cpu_loop_exit_noexc(cs);
     }
@@ -483,10 +487,9 @@ typedef struct VAPICEnableTPRReporting {
     bool enable;
 } VAPICEnableTPRReporting;
 
-static void vapic_do_enable_tpr_reporting(void *data)
+static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
 {
-    VAPICEnableTPRReporting *info = data;
-
+    VAPICEnableTPRReporting *info = data.host_ptr;
     apic_enable_tpr_access_reporting(info->apic, info->enable);
 }
 
@@ -501,7 +504,7 @@ static void vapic_enable_tpr_reporting(bool enable)
     CPU_FOREACH(cs) {
         cpu = X86_CPU(cs);
         info.apic = cpu->apic_state;
-        run_on_cpu(cs, vapic_do_enable_tpr_reporting, &info);
+        run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
     }
 }
 
@@ -734,10 +737,10 @@ static void vapic_realize(DeviceState *dev, Error **errp)
     nb_option_roms++;
 }
 
-static void do_vapic_enable(void *data)
+static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
 {
-    VAPICROMState *s = data;
-    X86CPU *cpu = X86_CPU(first_cpu);
+    VAPICROMState *s = data.host_ptr;
+    X86CPU *cpu = X86_CPU(cs);
 
     static const uint8_t enabled = 1;
     cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
@@ -758,7 +761,7 @@ static void kvmvapic_vm_state_change(void *opaque, int running,
 
     if (s->state == VAPIC_ACTIVE) {
         if (smp_cpus == 1) {
-            run_on_cpu(first_cpu, do_vapic_enable, s);
+            run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
         } else {
             zero = g_malloc0(s->rom_state.vapic_size);
             cpu_physical_memory_write(s->vapic_paddr, zero,
@@ -768,6 +771,7 @@ static void kvmvapic_vm_state_change(void *opaque, int running,
     }
 
     qemu_del_vm_change_state_handler(s->vmsentry);
+    s->vmsentry = NULL;
 }
 
 static int vapic_post_load(void *opaque, int version_id)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 022dd1b205..a9e64a88e5 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -68,6 +68,7 @@
 #include "qapi-visit.h"
 #include "qom/cpu.h"
 #include "hw/nmi.h"
+#include "hw/i386/intel_iommu.h"
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -161,13 +162,15 @@ int cpu_get_pic_interrupt(CPUX86State *env)
     X86CPU *cpu = x86_env_get_cpu(env);
     int intno;
 
-    intno = apic_get_interrupt(cpu->apic_state);
-    if (intno >= 0) {
-        return intno;
-    }
-    /* read the irq from the PIC */
-    if (!apic_accept_pic_intr(cpu->apic_state)) {
-        return -1;
+    if (!kvm_irqchip_in_kernel()) {
+        intno = apic_get_interrupt(cpu->apic_state);
+        if (intno >= 0) {
+            return intno;
+        }
+        /* read the irq from the PIC */
+        if (!apic_accept_pic_intr(cpu->apic_state)) {
+            return -1;
+        }
     }
 
     intno = pic_read_irq(isa_pic);
@@ -180,7 +183,7 @@ static void pic_irq_request(void *opaque, int irq, int level)
     X86CPU *cpu = X86_CPU(cs);
 
     DPRINTF("pic_irqs: %s irq %d\n", level? "raise" : "lower", irq);
-    if (cpu->apic_state) {
+    if (cpu->apic_state && !kvm_irqchip_in_kernel()) {
         CPU_FOREACH(cs) {
             cpu = X86_CPU(cs);
             if (apic_accept_pic_intr(cpu->apic_state)) {
@@ -530,9 +533,9 @@ static uint64_t port92_read(void *opaque, hwaddr addr,
     return ret;
 }
 
-static void port92_init(ISADevice *dev, qemu_irq *a20_out)
+static void port92_init(ISADevice *dev, qemu_irq a20_out)
 {
-    qdev_connect_gpio_out_named(DEVICE(dev), PORT92_A20_LINE, 0, *a20_out);
+    qdev_connect_gpio_out_named(DEVICE(dev), PORT92_A20_LINE, 0, a20_out);
 }
 
 static const VMStateDescription vmstate_port92_isa = {
@@ -741,20 +744,19 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
     int i, j;
 
     fw_cfg = fw_cfg_init_io_dma(FW_CFG_IO_BASE, FW_CFG_IO_BASE + 4, as);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
 
     /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
      *
-     * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
-     * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
-     * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
-     * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
-     * may see".
+     * For machine types prior to 1.8, SeaBIOS needs FW_CFG_MAX_CPUS for
+     * building MPTable, ACPI MADT, ACPI CPU hotplug and ACPI SRAT table,
+     * that tables are based on xAPIC ID and QEMU<->SeaBIOS interface
+     * for CPU hotplug also uses APIC ID and not "CPU index".
+     * This means that FW_CFG_MAX_CPUS is not the "maximum number of CPUs",
+     * but the "limit to the APIC ID values SeaBIOS may see".
      *
-     * So, this means we must not use max_cpus, here, but the maximum possible
-     * APIC ID value, plus one.
-     *
-     * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
-     *     the APIC ID, not the "CPU index"
+     * So for compatibility reasons with old BIOSes we are stuck with
+     * "etc/max-cpus" actually being apic_id_limit
      */
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)pcms->apic_id_limit);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
@@ -777,11 +779,9 @@ static FWCfgState *bochs_bios_init(AddressSpace *as, PCMachineState *pcms)
     for (i = 0; i < max_cpus; i++) {
         unsigned int apic_id = x86_cpu_apic_id_from_index(i);
         assert(apic_id < pcms->apic_id_limit);
-        for (j = 0; j < nb_numa_nodes; j++) {
-            if (test_bit(i, numa_info[j].node_cpu)) {
-                numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
-                break;
-            }
+        j = numa_get_node_for_cpu(i);
+        if (j < nb_numa_nodes) {
+            numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
         }
     }
     for (i = 0; i < nb_numa_nodes; i++) {
@@ -1087,17 +1087,6 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level)
     }
 }
 
-static int pc_present_cpus_count(PCMachineState *pcms)
-{
-    int i, boot_cpus = 0;
-    for (i = 0; i < pcms->possible_cpus->len; i++) {
-        if (pcms->possible_cpus->cpus[i].cpu) {
-            boot_cpus++;
-        }
-    }
-    return boot_cpus;
-}
-
 static X86CPU *pc_new_cpu(const char *typename, int64_t apic_id,
                           Error **errp)
 {
@@ -1190,12 +1179,6 @@ void pc_cpus_init(PCMachineState *pcms)
      * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
      */
     pcms->apic_id_limit = x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
-    if (pcms->apic_id_limit > ACPI_CPU_HOTPLUG_ID_LIMIT) {
-        error_report("max_cpus is too large. APIC ID of last CPU is %u",
-                     pcms->apic_id_limit - 1);
-        exit(1);
-    }
-
     pcms->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
                                     sizeof(CPUArchId) * max_cpus);
     for (i = 0; i < max_cpus; i++) {
@@ -1240,6 +1223,19 @@ static void pc_build_feature_control_file(PCMachineState *pcms)
     fw_cfg_add_file(pcms->fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
 }
 
+static void rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count)
+{
+    if (cpus_count > 0xff) {
+        /* If the number of CPUs can't be represented in 8 bits, the
+         * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just
+         * to make old BIOSes fail more predictably.
+         */
+        rtc_set_memory(rtc, 0x5f, 0);
+    } else {
+        rtc_set_memory(rtc, 0x5f, cpus_count - 1);
+    }
+}
+
 static
 void pc_machine_done(Notifier *notifier, void *data)
 {
@@ -1248,7 +1244,7 @@ void pc_machine_done(Notifier *notifier, void *data)
     PCIBus *bus = pcms->bus;
 
     /* set the number of CPUs */
-    rtc_set_memory(pcms->rtc, 0x5f, pc_present_cpus_count(pcms) - 1);
+    rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
 
     if (bus) {
         int extra_hosts = 0;
@@ -1271,6 +1267,21 @@ void pc_machine_done(Notifier *notifier, void *data)
     if (pcms->fw_cfg) {
         pc_build_smbios(pcms->fw_cfg);
         pc_build_feature_control_file(pcms);
+        /* update FW_CFG_NB_CPUS to account for -device added CPUs */
+        fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
+    }
+
+    if (pcms->apic_id_limit > 255) {
+        IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+        if (!iommu || !iommu->x86_iommu.intr_supported ||
+            iommu->intr_eim != ON_OFF_AUTO_ON) {
+            error_report("current -smp configuration requires "
+                         "Extended Interrupt Mode enabled. "
+                         "You can add an IOMMU using: "
+                         "-device intel-iommu,intremap=on,eim=on");
+            exit(EXIT_FAILURE);
+        }
     }
 }
 
@@ -1335,6 +1346,7 @@ void xen_load_linux(PCMachineState *pcms)
     assert(MACHINE(pcms)->kernel_filename != NULL);
 
     fw_cfg = fw_cfg_init_io(FW_CFG_IO_BASE);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
     rom_set_fw(fw_cfg);
 
     load_linux(pcms, fw_cfg);
@@ -1589,12 +1601,12 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
         pcspk_init(isa_bus, pit);
     }
 
-    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+    serial_hds_isa_init(isa_bus, 0, MAX_SERIAL_PORTS);
     parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
 
     a20_line = qemu_allocate_irqs(handle_a20_line_change, first_cpu, 2);
     i8042 = isa_create_simple(isa_bus, "i8042");
-    i8042_setup_a20_line(i8042, &a20_line[0]);
+    i8042_setup_a20_line(i8042, a20_line[0]);
     if (!no_vmport) {
         vmport_init(isa_bus);
         vmmouse = isa_try_create(isa_bus, "vmmouse");
@@ -1607,7 +1619,8 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
         qdev_init_nofail(dev);
     }
     port92 = isa_create_simple(isa_bus, "port92");
-    port92_init(port92, &a20_line[1]);
+    port92_init(port92, a20_line[1]);
+    g_free(a20_line);
 
     DMA_init(isa_bus, 0);
 
@@ -1699,6 +1712,10 @@ static void pc_dimm_plug(HotplugHandler *hotplug_dev,
         goto out;
     }
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+        nvdimm_plug(&pcms->acpi_nvdimm_state);
+    }
+
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
     hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
 out:
@@ -1718,6 +1735,12 @@ static void pc_dimm_unplug_request(HotplugHandler *hotplug_dev,
         goto out;
     }
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+        error_setg(&local_err,
+                   "nvdimm device hot unplug is not supported yet.");
+        goto out;
+    }
+
     hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
     hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
 
@@ -1793,9 +1816,11 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
         }
     }
 
+    /* increment the number of CPUs */
+    pcms->boot_cpus++;
     if (dev->hotplugged) {
-        /* increment the number of CPUs */
-        rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) + 1);
+        rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
+        fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
     }
 
     found_cpu = pc_find_cpu_slot(pcms, CPU(dev), NULL);
@@ -1849,7 +1874,11 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
     found_cpu->cpu = NULL;
     object_unparent(OBJECT(dev));
 
-    rtc_set_memory(pcms->rtc, 0x5f, rtc_get_memory(pcms->rtc, 0x5f) - 1);
+    /* decrement the number of CPUs */
+    pcms->boot_cpus--;
+    /* Update the number of CPUs in CMOS */
+    rtc_set_cpus_count(pcms->rtc, pcms->boot_cpus);
+    fw_cfg_modify_i16(pcms->fw_cfg, FW_CFG_NB_CPUS, pcms->boot_cpus);
  out:
     error_propagate(errp, local_err);
 }
@@ -2133,41 +2162,13 @@ static void pc_machine_initfn(Object *obj)
 {
     PCMachineState *pcms = PC_MACHINE(obj);
 
-    object_property_add(obj, PC_MACHINE_MEMHP_REGION_SIZE, "int",
-                        pc_machine_get_hotplug_memory_region_size,
-                        NULL, NULL, NULL, &error_abort);
-
     pcms->max_ram_below_4g = 0; /* use default */
-    object_property_add(obj, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
-                        pc_machine_get_max_ram_below_4g,
-                        pc_machine_set_max_ram_below_4g,
-                        NULL, NULL, &error_abort);
-    object_property_set_description(obj, PC_MACHINE_MAX_RAM_BELOW_4G,
-                                    "Maximum ram below the 4G boundary (32bit boundary)",
-                                    &error_abort);
-
     pcms->smm = ON_OFF_AUTO_AUTO;
-    object_property_add(obj, PC_MACHINE_SMM, "OnOffAuto",
-                        pc_machine_get_smm,
-                        pc_machine_set_smm,
-                        NULL, NULL, &error_abort);
-    object_property_set_description(obj, PC_MACHINE_SMM,
-                                    "Enable SMM (pc & q35)",
-                                    &error_abort);
-
     pcms->vmport = ON_OFF_AUTO_AUTO;
-    object_property_add(obj, PC_MACHINE_VMPORT, "OnOffAuto",
-                        pc_machine_get_vmport,
-                        pc_machine_set_vmport,
-                        NULL, NULL, &error_abort);
-    object_property_set_description(obj, PC_MACHINE_VMPORT,
-                                    "Enable vmport (pc & q35)",
-                                    &error_abort);
-
     /* nvdimm is disabled on default. */
     pcms->acpi_nvdimm_state.is_enabled = false;
-    object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm,
-                             pc_machine_set_nvdimm, &error_abort);
+    /* acpi build is enabled by default if machine supports it */
+    pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
 }
 
 static void pc_machine_reset(void)
@@ -2302,6 +2303,32 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
     hc->unplug_request = pc_machine_device_unplug_request_cb;
     hc->unplug = pc_machine_device_unplug_cb;
     nc->nmi_monitor_handler = x86_nmi;
+
+    object_class_property_add(oc, PC_MACHINE_MEMHP_REGION_SIZE, "int",
+        pc_machine_get_hotplug_memory_region_size, NULL,
+        NULL, NULL, &error_abort);
+
+    object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size",
+        pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g,
+        NULL, NULL, &error_abort);
+
+    object_class_property_set_description(oc, PC_MACHINE_MAX_RAM_BELOW_4G,
+        "Maximum ram below the 4G boundary (32bit boundary)", &error_abort);
+
+    object_class_property_add(oc, PC_MACHINE_SMM, "OnOffAuto",
+        pc_machine_get_smm, pc_machine_set_smm,
+        NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, PC_MACHINE_SMM,
+        "Enable SMM (pc & q35)", &error_abort);
+
+    object_class_property_add(oc, PC_MACHINE_VMPORT, "OnOffAuto",
+        pc_machine_get_vmport, pc_machine_set_vmport,
+        NULL, NULL, &error_abort);
+    object_class_property_set_description(oc, PC_MACHINE_VMPORT,
+        "Enable vmport (pc & q35)", &error_abort);
+
+    object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
+        pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
 }
 
 static const TypeInfo pc_machine_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a07dc816bf..a54a468c0a 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -74,7 +74,6 @@ static void pc_init1(MachineState *machine,
     ISABus *isa_bus;
     PCII440FXState *i440fx_state;
     int piix3_devfn = -1;
-    qemu_irq *gsi;
     qemu_irq *i8259;
     qemu_irq smi_irq;
     GSIState *gsi_state;
@@ -185,16 +184,16 @@ static void pc_init1(MachineState *machine,
     gsi_state = g_malloc0(sizeof(*gsi_state));
     if (kvm_ioapic_in_kernel()) {
         kvm_pc_setup_irq_routing(pcmc->pci_enabled);
-        gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
-                                 GSI_NUM_PINS);
+        pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
+                                       GSI_NUM_PINS);
     } else {
-        gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+        pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
     }
 
     if (pcmc->pci_enabled) {
         pci_bus = i440fx_init(host_type,
                               pci_type,
-                              &i440fx_state, &piix3_devfn, &isa_bus, gsi,
+                              &i440fx_state, &piix3_devfn, &isa_bus, pcms->gsi,
                               system_memory, system_io, machine->ram_size,
                               pcms->below_4g_mem_size,
                               pcms->above_4g_mem_size,
@@ -207,7 +206,7 @@ static void pc_init1(MachineState *machine,
                               &error_abort);
         no_hpet = 1;
     }
-    isa_bus_irqs(isa_bus, gsi);
+    isa_bus_irqs(isa_bus, pcms->gsi);
 
     if (kvm_pic_in_kernel()) {
         i8259 = kvm_i8259_init(isa_bus);
@@ -225,7 +224,7 @@ static void pc_init1(MachineState *machine,
         ioapic_init_gsi(gsi_state, "i440fx");
     }
 
-    pc_register_ferr_irq(gsi[13]);
+    pc_register_ferr_irq(pcms->gsi[13]);
 
     pc_vga_init(isa_bus, pcmc->pci_enabled ? pci_bus : NULL);
 
@@ -235,7 +234,7 @@ static void pc_init1(MachineState *machine,
     }
 
     /* init basic PC hardware */
-    pc_basic_device_init(isa_bus, gsi, &rtc_state, true,
+    pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true,
                          (pcms->vmport != ON_OFF_AUTO_ON), 0x4);
 
     pc_nic_init(isa_bus, pci_bus);
@@ -279,7 +278,7 @@ static void pc_init1(MachineState *machine,
         smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);
         /* TODO: Populate SPD eeprom data.  */
         smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
-                              gsi[9], smi_irq,
+                              pcms->gsi[9], smi_irq,
                               pc_machine_is_smm_enabled(pcms),
                               &piix4_pm);
         smbus_eeprom_init(smbus, 8, NULL, 0);
@@ -438,13 +437,25 @@ static void pc_i440fx_machine_options(MachineClass *m)
     m->default_display = "std";
 }
 
-static void pc_i440fx_2_7_machine_options(MachineClass *m)
+static void pc_i440fx_2_8_machine_options(MachineClass *m)
 {
     pc_i440fx_machine_options(m);
     m->alias = "pc";
     m->is_default = 1;
 }
 
+DEFINE_I440FX_MACHINE(v2_8, "pc-i440fx-2.8", NULL,
+                      pc_i440fx_2_8_machine_options);
+
+
+static void pc_i440fx_2_7_machine_options(MachineClass *m)
+{
+    pc_i440fx_2_8_machine_options(m);
+    m->is_default = 0;
+    m->alias = NULL;
+    SET_MACHINE_COMPAT(m, PC_COMPAT_2_7);
+}
+
 DEFINE_I440FX_MACHINE(v2_7, "pc-i440fx-2.7", NULL,
                       pc_i440fx_2_7_machine_options);
 
@@ -453,8 +464,6 @@ static void pc_i440fx_2_6_machine_options(MachineClass *m)
 {
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_i440fx_2_7_machine_options(m);
-    m->is_default = 0;
-    m->alias = NULL;
     pcmc->legacy_cpu_hotplug = true;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
 }
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c0b9961928..b40d19ee00 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -69,7 +69,6 @@ static void pc_q35_init(MachineState *machine)
     MemoryRegion *ram_memory;
     GSIState *gsi_state;
     ISABus *isa_bus;
-    qemu_irq *gsi;
     qemu_irq *i8259;
     int i;
     ICH9LPCState *ich9_lpc;
@@ -153,10 +152,10 @@ static void pc_q35_init(MachineState *machine)
     gsi_state = g_malloc0(sizeof(*gsi_state));
     if (kvm_ioapic_in_kernel()) {
         kvm_pc_setup_irq_routing(pcmc->pci_enabled);
-        gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
-                                 GSI_NUM_PINS);
+        pcms->gsi = qemu_allocate_irqs(kvm_pc_gsi_handler, gsi_state,
+                                       GSI_NUM_PINS);
     } else {
-        gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+        pcms->gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
     }
 
     /* create pci host bus */
@@ -195,7 +194,7 @@ static void pc_q35_init(MachineState *machine)
     ich9_lpc = ICH9_LPC_DEVICE(lpc);
     lpc_dev = DEVICE(lpc);
     for (i = 0; i < GSI_NUM_PINS; i++) {
-        qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, gsi[i]);
+        qdev_connect_gpio_out_named(lpc_dev, ICH9_GPIO_GSI, i, pcms->gsi[i]);
     }
     pci_bus_irqs(host_bus, ich9_lpc_set_irq, ich9_lpc_map_irq, ich9_lpc,
                  ICH9_LPC_NB_PIRQS);
@@ -213,11 +212,13 @@ static void pc_q35_init(MachineState *machine)
     for (i = 0; i < ISA_NUM_IRQS; i++) {
         gsi_state->i8259_irq[i] = i8259[i];
     }
+    g_free(i8259);
+
     if (pcmc->pci_enabled) {
         ioapic_init_gsi(gsi_state, "q35");
     }
 
-    pc_register_ferr_irq(gsi[13]);
+    pc_register_ferr_irq(pcms->gsi[13]);
 
     assert(pcms->vmport != ON_OFF_AUTO__MAX);
     if (pcms->vmport == ON_OFF_AUTO_AUTO) {
@@ -225,7 +226,7 @@ static void pc_q35_init(MachineState *machine)
     }
 
     /* init basic PC hardware */
-    pc_basic_device_init(isa_bus, gsi, &rtc_state, !mc->no_floppy,
+    pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy,
                          (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104);
 
     /* connect pm stuff to lpc */
@@ -290,14 +291,26 @@ static void pc_q35_machine_options(MachineClass *m)
     m->default_display = "std";
     m->no_floppy = 1;
     m->has_dynamic_sysbus = true;
+    m->max_cpus = 288;
 }
 
-static void pc_q35_2_7_machine_options(MachineClass *m)
+static void pc_q35_2_8_machine_options(MachineClass *m)
 {
     pc_q35_machine_options(m);
     m->alias = "q35";
 }
 
+DEFINE_Q35_MACHINE(v2_8, "pc-q35-2.8", NULL,
+                   pc_q35_2_8_machine_options);
+
+static void pc_q35_2_7_machine_options(MachineClass *m)
+{
+    pc_q35_2_8_machine_options(m);
+    m->alias = NULL;
+    m->max_cpus = 255;
+    SET_MACHINE_COMPAT(m, PC_COMPAT_2_7);
+}
+
 DEFINE_Q35_MACHINE(v2_7, "pc-q35-2.7", NULL,
                    pc_q35_2_7_machine_options);
 
@@ -305,7 +318,6 @@ static void pc_q35_2_6_machine_options(MachineClass *m)
 {
     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
     pc_q35_2_7_machine_options(m);
-    m->alias = NULL;
     pcmc->legacy_cpu_hotplug = true;
     SET_MACHINE_COMPAT(m, PC_COMPAT_2_6);
 }
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 7735e46eaf..d2b497327e 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -7,9 +7,34 @@ xen_platform_log(char *s) "xen platform: %s"
 xen_pv_mmio_read(uint64_t addr) "WARNING: read from Xen PV Device MMIO space (address %"PRIx64")"
 xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (address %"PRIx64")"
 
-# hw/i386/pc.c
-mhp_pc_dimm_assigned_slot(int slot) "0x%d"
-mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
-
 # hw/i386/x86-iommu.c
 x86_iommu_iec_notify(bool global, uint32_t index, uint32_t mask) "Notify IEC invalidation: global=%d index=%" PRIu32 " mask=%" PRIu32
+
+# hw/i386/amd_iommu.c
+amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
+amdvi_cache_update(uint16_t domid, uint8_t bus, uint8_t slot, uint8_t func, uint64_t gpa, uint64_t txaddr) " update iotlb domid 0x%"PRIx16" devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_completion_wait_fail(uint64_t addr) "error: fail to write at address 0x%"PRIx64
+amdvi_mmio_write(const char *reg, uint64_t addr, unsigned size, uint64_t val, uint64_t offset) "%s write addr 0x%"PRIx64", size %u, val 0x%"PRIx64", offset 0x%"PRIx64
+amdvi_mmio_read(const char *reg, uint64_t addr, unsigned size, uint64_t offset) "%s read addr 0x%"PRIx64", size %u offset 0x%"PRIx64
+amdvi_command_error(uint64_t status) "error: Executing commands with command buffer disabled 0x%"PRIx64
+amdvi_command_read_fail(uint64_t addr, uint32_t head) "error: fail to access memory at 0x%"PRIx64" + 0x%"PRIx32
+amdvi_command_exec(uint32_t head, uint32_t tail, uint64_t buf) "command buffer head at 0x%"PRIx32" command buffer tail at 0x%"PRIx32" command buffer base at 0x%"PRIx64
+amdvi_unhandled_command(uint8_t type) "unhandled command 0x%"PRIx8
+amdvi_intr_inval(void) "Interrupt table invalidated"
+amdvi_iotlb_inval(void) "IOTLB pages invalidated"
+amdvi_prefetch_pages(void) "Pre-fetch of AMD-Vi pages requested"
+amdvi_pages_inval(uint16_t domid) "AMD-Vi pages for domain 0x%"PRIx16 " invalidated"
+amdvi_all_inval(void) "Invalidation of all AMD-Vi cache requested "
+amdvi_ppr_exec(void) "Execution of PPR queue requested "
+amdvi_devtab_inval(uint8_t bus, uint8_t slot, uint8_t func) "device table entry for devid: %02x:%02x.%x invalidated"
+amdvi_completion_wait(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_control_status(uint64_t val) "MMIO_STATUS state 0x%"PRIx64
+amdvi_iotlb_reset(void) "IOTLB exceed size limit - reset "
+amdvi_completion_wait_exec(uint64_t addr, uint64_t data) "completion wait requested with store address 0x%"PRIx64" and store data 0x%"PRIx64
+amdvi_dte_get_fail(uint64_t addr, uint32_t offset) "error: failed to access Device Entry devtab 0x%"PRIx64" offset 0x%"PRIx32
+amdvi_invalid_dte(uint64_t addr) "PTE entry at 0x%"PRIx64" is invalid "
+amdvi_get_pte_hwerror(uint64_t addr) "hardware error eccessing PTE at addr 0x%"PRIx64
+amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: translation level 0x%"PRIx8" translating addr 0x%"PRIx64
+amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical address 0x%"PRIx64
+amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
+amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index ce26b2a71d..2278af7c32 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -71,6 +71,11 @@ X86IOMMUState *x86_iommu_get_default(void)
     return x86_iommu_default;
 }
 
+IommuType x86_iommu_get_type(void)
+{
+    return x86_iommu_default->type;
+}
+
 static void x86_iommu_realize(DeviceState *dev, Error **errp)
 {
     X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
@@ -79,6 +84,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
     if (x86_class->realize) {
         x86_class->realize(dev, errp);
     }
+
     x86_iommu_set_default(X86_IOMMU_DEVICE(dev));
 }
 
diff --git a/hw/i386/xen/xen_apic.c b/hw/i386/xen/xen_apic.c
index 21d68ee04b..55769eba7e 100644
--- a/hw/i386/xen/xen_apic.c
+++ b/hw/i386/xen/xen_apic.c
@@ -68,6 +68,11 @@ static void xen_apic_external_nmi(APICCommonState *s)
 {
 }
 
+static void xen_send_msi(MSIMessage *msi)
+{
+    xen_hvm_inject_msi(msi->address, msi->data);
+}
+
 static void xen_apic_class_init(ObjectClass *klass, void *data)
 {
     APICCommonClass *k = APIC_COMMON_CLASS(klass);
@@ -78,6 +83,7 @@ static void xen_apic_class_init(ObjectClass *klass, void *data)
     k->get_tpr = xen_apic_get_tpr;
     k->vapic_base_update = xen_apic_vapic_base_update;
     k->external_nmi = xen_apic_external_nmi;
+    k->send_msi = xen_send_msi;
 }
 
 static const TypeInfo xen_apic_info = {
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index aa7839324c..2e1e543881 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -114,6 +114,10 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
             PCI_CLASS_STORAGE_IDE
             && strcmp(d->name, "xen-pci-passthrough") != 0) {
         pci_piix3_xen_ide_unplug(DEVICE(d));
+    } else if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
+            PCI_CLASS_STORAGE_SCSI
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
+        object_unparent(OBJECT(d));
     }
 }
 
@@ -134,8 +138,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
            devices, and bit 2 the non-primary-master IDE devices. */
         if (val & UNPLUG_ALL_IDE_DISKS) {
             DPRINTF("unplug disks\n");
-            blk_drain_all();
-            blk_flush_all();
             pci_unplug_disks(pci_dev->bus);
         }
         if (val & UNPLUG_ALL_NICS) {
@@ -309,13 +311,38 @@ static void xen_platform_ioport_writeb(void *opaque, hwaddr addr,
                                        uint64_t val, unsigned int size)
 {
     PCIXenPlatformState *s = opaque;
+    PCIDevice *pci_dev = PCI_DEVICE(s);
 
     switch (addr) {
     case 0: /* Platform flags */
         platform_fixed_ioport_writeb(opaque, 0, (uint32_t)val);
         break;
+    case 4:
+        if (val == 1) {
+            /*
+             * SUSE unplug for Xenlinux
+             * xen-kmp used this since xen-3.0.4, instead the official protocol
+             * from xen-3.3+ It did an unconditional "outl(1, (ioaddr + 4));"
+             * Pre VMDP 1.7 used 4 and 8 depending on how VMDP was configured.
+             * If VMDP was to control both disk and LAN it would use 4.
+             * If it controlled just disk or just LAN, it would use 8 below.
+             */
+            pci_unplug_disks(pci_dev->bus);
+            pci_unplug_nics(pci_dev->bus);
+        }
+        break;
     case 8:
-        log_writeb(s, (uint32_t)val);
+        switch (val) {
+        case 1:
+            pci_unplug_disks(pci_dev->bus);
+            break;
+        case 2:
+            pci_unplug_nics(pci_dev->bus);
+            break;
+        default:
+            log_writeb(s, (uint32_t)val);
+            break;
+        }
         break;
     default:
         break;
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index f3438ad78a..3c19bdadc5 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -948,6 +948,7 @@ static void ncq_cb(void *opaque, int ret)
     NCQTransferState *ncq_tfs = (NCQTransferState *)opaque;
     IDEState *ide_state = &ncq_tfs->drive->port.ifs[0];
 
+    ncq_tfs->aiocb = NULL;
     if (ret == -ECANCELED) {
         return;
     }
@@ -1008,6 +1009,7 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
                        &ncq_tfs->sglist, BLOCK_ACCT_READ);
         ncq_tfs->aiocb = dma_blk_read(ide_state->blk, &ncq_tfs->sglist,
                                       ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                      BDRV_SECTOR_SIZE,
                                       ncq_cb, ncq_tfs);
         break;
     case WRITE_FPDMA_QUEUED:
@@ -1021,6 +1023,7 @@ static void execute_ncq_command(NCQTransferState *ncq_tfs)
                        &ncq_tfs->sglist, BLOCK_ACCT_WRITE);
         ncq_tfs->aiocb = dma_blk_write(ide_state->blk, &ncq_tfs->sglist,
                                        ncq_tfs->lba << BDRV_SECTOR_BITS,
+                                       BDRV_SECTOR_SIZE,
                                        ncq_cb, ncq_tfs);
         break;
     default:
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 6189675036..fc1d19c6d4 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -637,6 +637,23 @@ static unsigned int event_status_media(IDEState *s,
     return 8; /* We wrote to 4 extra bytes from the header */
 }
 
+/*
+ * Before transferring data or otherwise signalling acceptance of a command
+ * marked CONDDATA, we must check the validity of the byte_count_limit.
+ */
+static bool validate_bcl(IDEState *s)
+{
+    /* TODO: Check IDENTIFY data word 125 for defacult BCL (currently 0) */
+    if (s->atapi_dma || atapi_byte_count_limit(s)) {
+        return true;
+    }
+
+    /* TODO: Move abort back into core.c and introduce proper error flow between
+     *       ATAPI layer and IDE core layer */
+    ide_abort_command(s);
+    return false;
+}
+
 static void cmd_get_event_status_notification(IDEState *s,
                                               uint8_t *buf)
 {
@@ -1028,12 +1045,19 @@ static void cmd_read_cd(IDEState *s, uint8_t* buf)
         return;
     }
 
-    transfer_request = buf[9];
-    switch(transfer_request & 0xf8) {
-    case 0x00:
+    transfer_request = buf[9] & 0xf8;
+    if (transfer_request == 0x00) {
         /* nothing */
         ide_atapi_cmd_ok(s);
-        break;
+        return;
+    }
+
+    /* Check validity of BCL before transferring data */
+    if (!validate_bcl(s)) {
+        return;
+    }
+
+    switch (transfer_request) {
     case 0x10:
         /* normal read */
         ide_atapi_cmd_read(s, lba, nb_sectors, 2048);
@@ -1266,6 +1290,14 @@ enum {
      * See ATA8-ACS3 "7.21.5 Byte Count Limit"
      */
     NONDATA = 0x04,
+
+    /*
+     * CONDDATA implies a command that transfers data only conditionally based
+     * on the presence of suboptions. It should be exempt from the BCL check at
+     * command validation time, but it needs to be checked at the command
+     * handler level instead.
+     */
+    CONDDATA = 0x08,
 };
 
 static const struct AtapiCmd {
@@ -1289,7 +1321,7 @@ static const struct AtapiCmd {
     [ 0xad ] = { cmd_read_dvd_structure,            CHECK_READY },
     [ 0xbb ] = { cmd_set_speed,                     NONDATA },
     [ 0xbd ] = { cmd_mechanism_status,              0 },
-    [ 0xbe ] = { cmd_read_cd,                       CHECK_READY },
+    [ 0xbe ] = { cmd_read_cd,                       CHECK_READY | CONDDATA },
     /* [1] handler detects and reports not ready condition itself */
 };
 
@@ -1348,15 +1380,12 @@ void ide_atapi_cmd(IDEState *s)
         return;
     }
 
-    /* Nondata commands permit the byte_count_limit to be 0.
+    /* Commands that don't transfer DATA permit the byte_count_limit to be 0.
      * If this is a data-transferring PIO command and BCL is 0,
      * we abort at the /ATA/ level, not the ATAPI level.
      * See ATA8 ACS3 section 7.17.6.49 and 7.21.5 */
-    if (cmd->handler && !(cmd->flags & NONDATA)) {
-        /* TODO: Check IDENTIFY data word 125 for default BCL (currently 0) */
-        if (!(atapi_byte_count_limit(s) || s->atapi_dma)) {
-            /* TODO: Move abort back into core.c and make static inline again */
-            ide_abort_command(s);
+    if (cmd->handler && !(cmd->flags & (NONDATA | CONDDATA))) {
+        if (!validate_bcl(s)) {
             return;
         }
     }
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 45b6df132c..43709e545f 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -882,15 +882,15 @@ static void ide_dma_cb(void *opaque, int ret)
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
         s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset,
-                                          ide_dma_cb, s);
+                                          BDRV_SECTOR_SIZE, ide_dma_cb, s);
         break;
     case IDE_DMA_WRITE:
         s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, offset,
-                                           ide_dma_cb, s);
+                                           BDRV_SECTOR_SIZE, ide_dma_cb, s);
         break;
     case IDE_DMA_TRIM:
         s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
-                                        &s->sg, offset,
+                                        &s->sg, offset, BDRV_SECTOR_SIZE,
                                         ide_issue_trim, s->blk, ide_dma_cb, s,
                                         DMA_DIRECTION_TO_DEVICE);
         break;
@@ -908,7 +908,7 @@ static void ide_dma_cb(void *opaque, int ret)
 
 static void ide_sector_start_dma(IDEState *s, enum ide_dma_cmd dma_cmd)
 {
-    s->status = READY_STAT | SEEK_STAT | DRQ_STAT | BUSY_STAT;
+    s->status = READY_STAT | SEEK_STAT | DRQ_STAT;
     s->io_buffer_size = 0;
     s->dma_cmd = dma_cmd;
 
@@ -2582,7 +2582,7 @@ static void ide_restart_cb(void *opaque, int running, RunState state)
 void ide_register_restart_cb(IDEBus *bus)
 {
     if (bus->dma->ops->restart_dma) {
-        qemu_add_vm_change_state_handler(ide_restart_cb, bus);
+        bus->vmstate = qemu_add_vm_change_state_handler(ide_restart_cb, bus);
     }
 }
 
@@ -2619,10 +2619,12 @@ void ide_init_ioport(IDEBus *bus, ISADevice *dev, int iobase, int iobase2)
 {
     /* ??? Assume only ISA and PCI configurations, and that the PCI-ISA
        bridge has been setup properly to always register with ISA.  */
-    isa_register_portio_list(dev, iobase, ide_portio_list, bus, "ide");
+    isa_register_portio_list(dev, &bus->portio_list,
+                             iobase, ide_portio_list, bus, "ide");
 
     if (iobase2) {
-        isa_register_portio_list(dev, iobase2, ide_portio2_list, bus, "ide");
+        isa_register_portio_list(dev, &bus->portio2_list,
+                                 iobase2, ide_portio2_list, bus, "ide");
     }
 }
 
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 76f97c2539..9742c005d1 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -52,187 +52,6 @@ static const int debug_macio = 0;
 
 #define MACIO_PAGE_SIZE 4096
 
-/*
- * Unaligned DMA read/write access functions required for OS X/Darwin which
- * don't perform DMA transactions on sector boundaries. These functions are
- * modelled on bdrv_co_preadv()/bdrv_co_pwritev() and so should be easy to
- * remove if the unaligned block APIs are ever exposed.
- */
-
-static void pmac_dma_read(BlockBackend *blk,
-                          int64_t offset, unsigned int bytes,
-                          void (*cb)(void *opaque, int ret), void *opaque)
-{
-    DBDMA_io *io = opaque;
-    MACIOIDEState *m = io->opaque;
-    IDEState *s = idebus_active_if(&m->bus);
-    dma_addr_t dma_addr;
-    int64_t sector_num;
-    int nsector;
-    uint64_t align = BDRV_SECTOR_SIZE;
-    size_t head_bytes, tail_bytes;
-
-    qemu_iovec_destroy(&io->iov);
-    qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);
-
-    sector_num = (offset >> 9);
-    nsector = (io->len >> 9);
-
-    MACIO_DPRINTF("--- DMA read transfer (0x%" HWADDR_PRIx ",0x%x): "
-                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
-                  sector_num, nsector);
-
-    dma_addr = io->addr;
-    io->dir = DMA_DIRECTION_FROM_DEVICE;
-    io->dma_len = io->len;
-    io->dma_mem = dma_memory_map(&address_space_memory, dma_addr, &io->dma_len,
-                                 io->dir);
-
-    if (offset & (align - 1)) {
-        head_bytes = offset & (align - 1);
-
-        MACIO_DPRINTF("--- DMA unaligned head: sector %" PRId64 ", "
-                      "discarding %zu bytes\n", sector_num, head_bytes);
-
-        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-    }
-
-    qemu_iovec_add(&io->iov, io->dma_mem, io->len);
-
-    if ((offset + bytes) & (align - 1)) {
-        tail_bytes = (offset + bytes) & (align - 1);
-
-        MACIO_DPRINTF("--- DMA unaligned tail: sector %" PRId64 ", "
-                      "discarding bytes %zu\n", sector_num, tail_bytes);
-
-        qemu_iovec_add(&io->iov, &io->tail_remainder, align - tail_bytes);
-        bytes = ROUND_UP(bytes, align);
-    }
-
-    s->io_buffer_size -= io->len;
-    s->io_buffer_index += io->len;
-
-    io->len = 0;
-
-    MACIO_DPRINTF("--- Block read transfer - sector_num: %" PRIx64 "  "
-                  "nsector: %x\n", (offset >> 9), (bytes >> 9));
-
-    s->bus->dma->aiocb = blk_aio_preadv(blk, offset, &io->iov, 0, cb, io);
-}
-
-static void pmac_dma_write(BlockBackend *blk,
-                         int64_t offset, int bytes,
-                         void (*cb)(void *opaque, int ret), void *opaque)
-{
-    DBDMA_io *io = opaque;
-    MACIOIDEState *m = io->opaque;
-    IDEState *s = idebus_active_if(&m->bus);
-    dma_addr_t dma_addr;
-    int64_t sector_num;
-    int nsector;
-    uint64_t align = BDRV_SECTOR_SIZE;
-    size_t head_bytes, tail_bytes;
-    bool unaligned_head = false, unaligned_tail = false;
-
-    qemu_iovec_destroy(&io->iov);
-    qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);
-
-    sector_num = (offset >> 9);
-    nsector = (io->len >> 9);
-
-    MACIO_DPRINTF("--- DMA write transfer (0x%" HWADDR_PRIx ",0x%x): "
-                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
-                  sector_num, nsector);
-
-    dma_addr = io->addr;
-    io->dir = DMA_DIRECTION_TO_DEVICE;
-    io->dma_len = io->len;
-    io->dma_mem = dma_memory_map(&address_space_memory, dma_addr, &io->dma_len,
-                                 io->dir);
-
-    if (offset & (align - 1)) {
-        head_bytes = offset & (align - 1);
-        sector_num = ((offset & ~(align - 1)) >> 9);
-
-        MACIO_DPRINTF("--- DMA unaligned head: pre-reading head sector %"
-                      PRId64 "\n", sector_num);
-
-        blk_pread(s->blk, (sector_num << 9), &io->head_remainder, align);
-
-        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
-        qemu_iovec_add(&io->iov, io->dma_mem, io->len);
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-
-        unaligned_head = true;
-    }
-
-    if ((offset + bytes) & (align - 1)) {
-        tail_bytes = (offset + bytes) & (align - 1);
-        sector_num = (((offset + bytes) & ~(align - 1)) >> 9);
-
-        MACIO_DPRINTF("--- DMA unaligned tail: pre-reading tail sector %"
-                      PRId64 "\n", sector_num);
-
-        blk_pread(s->blk, (sector_num << 9), &io->tail_remainder, align);
-
-        if (!unaligned_head) {
-            qemu_iovec_add(&io->iov, io->dma_mem, io->len);
-        }
-
-        qemu_iovec_add(&io->iov, &io->tail_remainder + tail_bytes,
-                       align - tail_bytes);
-
-        bytes = ROUND_UP(bytes, align);
-
-        unaligned_tail = true;
-    }
-
-    if (!unaligned_head && !unaligned_tail) {
-        qemu_iovec_add(&io->iov, io->dma_mem, io->len);
-    }
-
-    s->io_buffer_size -= io->len;
-    s->io_buffer_index += io->len;
-
-    io->len = 0;
-
-    MACIO_DPRINTF("--- Block write transfer - sector_num: %" PRIx64 "  "
-                  "nsector: %x\n", (offset >> 9), (bytes >> 9));
-
-    s->bus->dma->aiocb = blk_aio_pwritev(blk, offset, &io->iov, 0, cb, io);
-}
-
-static void pmac_dma_trim(BlockBackend *blk,
-                        int64_t offset, int bytes,
-                        void (*cb)(void *opaque, int ret), void *opaque)
-{
-    DBDMA_io *io = opaque;
-    MACIOIDEState *m = io->opaque;
-    IDEState *s = idebus_active_if(&m->bus);
-    dma_addr_t dma_addr;
-
-    qemu_iovec_destroy(&io->iov);
-    qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);
-
-    dma_addr = io->addr;
-    io->dir = DMA_DIRECTION_TO_DEVICE;
-    io->dma_len = io->len;
-    io->dma_mem = dma_memory_map(&address_space_memory, dma_addr, &io->dma_len,
-                                 io->dir);
-
-    qemu_iovec_add(&io->iov, io->dma_mem, io->len);
-    s->io_buffer_size -= io->len;
-    s->io_buffer_index += io->len;
-    io->len = 0;
-
-    s->bus->dma->aiocb = ide_issue_trim(offset, &io->iov, cb, io, blk);
-}
-
 static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 {
     DBDMA_io *io = opaque;
@@ -244,6 +63,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
     if (ret < 0) {
         MACIO_DPRINTF("DMA error: %d\n", ret);
+        qemu_sglist_destroy(&s->sg);
         ide_atapi_io_error(s, ret);
         goto done;
     }
@@ -258,6 +78,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
 
     if (s->io_buffer_size <= 0) {
         MACIO_DPRINTF("End of IDE transfer\n");
+        qemu_sglist_destroy(&s->sg);
         ide_atapi_cmd_ok(s);
         m->dma_active = false;
         goto done;
@@ -282,7 +103,15 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
     /* Calculate current offset */
     offset = ((int64_t)s->lba << 11) + s->io_buffer_index;
 
-    pmac_dma_read(s->blk, offset, io->len, pmac_ide_atapi_transfer_cb, io);
+    qemu_sglist_init(&s->sg, DEVICE(m), io->len / MACIO_PAGE_SIZE + 1,
+                     &address_space_memory);
+    qemu_sglist_add(&s->sg, io->addr, io->len);
+    s->io_buffer_size -= io->len;
+    s->io_buffer_index += io->len;
+    io->len = 0;
+
+    s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset, 0x1,
+                                      pmac_ide_atapi_transfer_cb, io);
     return;
 
 done:
@@ -310,6 +139,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
 
     if (ret < 0) {
         MACIO_DPRINTF("DMA error: %d\n", ret);
+        qemu_sglist_destroy(&s->sg);
         ide_dma_error(s);
         goto done;
     }
@@ -324,6 +154,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
 
     if (s->io_buffer_size <= 0) {
         MACIO_DPRINTF("End of IDE transfer\n");
+        qemu_sglist_destroy(&s->sg);
         s->status = READY_STAT | SEEK_STAT;
         ide_set_irq(s->bus);
         m->dma_active = false;
@@ -338,15 +169,27 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
     /* Calculate number of sectors */
     offset = (ide_get_sector(s) << 9) + s->io_buffer_index;
 
+    qemu_sglist_init(&s->sg, DEVICE(m), io->len / MACIO_PAGE_SIZE + 1,
+                     &address_space_memory);
+    qemu_sglist_add(&s->sg, io->addr, io->len);
+    s->io_buffer_size -= io->len;
+    s->io_buffer_index += io->len;
+    io->len = 0;
+
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
-        pmac_dma_read(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
+        s->bus->dma->aiocb = dma_blk_read(s->blk, &s->sg, offset, 0x1,
+                                          pmac_ide_atapi_transfer_cb, io);
         break;
     case IDE_DMA_WRITE:
-        pmac_dma_write(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
+        s->bus->dma->aiocb = dma_blk_write(s->blk, &s->sg, offset, 0x1,
+                                           pmac_ide_transfer_cb, io);
         break;
     case IDE_DMA_TRIM:
-        pmac_dma_trim(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
+        s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
+                                        offset, 0x1, ide_issue_trim, s->blk,
+                                        pmac_ide_transfer_cb, io,
+                                        DMA_DIRECTION_TO_DEVICE);
         break;
     default:
         abort();
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index c190fcaa3c..d5777fd0b3 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev)
         if (di != NULL && !di->media_cd) {
             BlockBackend *blk = blk_by_legacy_dinfo(di);
             DeviceState *ds = blk_get_attached_dev(blk);
+
+            blk_drain(blk);
+            blk_flush(blk);
+
             if (ds) {
                 blk_detach_dev(blk, ds);
             }
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 67c76bfcd6..dbaa75cf59 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -31,6 +31,7 @@
 /* --------------------------------- */
 
 static char *idebus_get_fw_dev_path(DeviceState *dev);
+static void idebus_unrealize(DeviceState *qdev, Error **errp);
 
 static Property ide_props[] = {
     DEFINE_PROP_UINT32("unit", IDEDevice, unit, -1),
@@ -44,6 +45,15 @@ static void ide_bus_class_init(ObjectClass *klass, void *data)
     k->get_fw_dev_path = idebus_get_fw_dev_path;
 }
 
+static void idebus_unrealize(DeviceState *qdev, Error **errp)
+{
+    IDEBus *bus = DO_UPCAST(IDEBus, qbus, qdev->parent_bus);
+
+    if (bus->vmstate) {
+        qemu_del_vm_change_state_handler(bus->vmstate);
+    }
+}
+
 static const TypeInfo ide_bus_info = {
     .name = TYPE_IDE_BUS,
     .parent = TYPE_BUS,
@@ -75,10 +85,6 @@ static int ide_qdev_init(DeviceState *qdev)
     IDEDeviceClass *dc = IDE_DEVICE_GET_CLASS(dev);
     IDEBus *bus = DO_UPCAST(IDEBus, qbus, qdev->parent_bus);
 
-    if (!dev->conf.blk) {
-        error_report("No drive specified");
-        goto err;
-    }
     if (dev->unit == -1) {
         dev->unit = bus->master ? 1 : 0;
     }
@@ -158,6 +164,16 @@ static int ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind)
     IDEState *s = bus->ifs + dev->unit;
     Error *err = NULL;
 
+    if (!dev->conf.blk) {
+        if (kind != IDE_CD) {
+            error_report("No drive specified");
+            return -1;
+        } else {
+            /* Anonymous BlockBackend for an empty drive */
+            dev->conf.blk = blk_new();
+        }
+    }
+
     if (dev->conf.discard_granularity == -1) {
         dev->conf.discard_granularity = 512;
     } else if (dev->conf.discard_granularity &&
@@ -257,7 +273,11 @@ static int ide_cd_initfn(IDEDevice *dev)
 
 static int ide_drive_initfn(IDEDevice *dev)
 {
-    DriveInfo *dinfo = blk_legacy_dinfo(dev->conf.blk);
+    DriveInfo *dinfo = NULL;
+
+    if (dev->conf.blk) {
+        dinfo = blk_legacy_dinfo(dev->conf.blk);
+    }
 
     return ide_dev_initfn(dev, dinfo && dinfo->media_cd ? IDE_CD : IDE_HD);
 }
@@ -345,6 +365,7 @@ static void ide_device_class_init(ObjectClass *klass, void *data)
     k->init = ide_qdev_init;
     set_bit(DEVICE_CATEGORY_STORAGE, k->categories);
     k->bus_type = TYPE_IDE_BUS;
+    k->unrealize = idebus_unrealize;
     k->props = ide_props;
 }
 
diff --git a/hw/input/adb.c b/hw/input/adb.c
index f0ad0d4471..43d3205472 100644
--- a/hw/input/adb.c
+++ b/hw/input/adb.c
@@ -25,6 +25,9 @@
 #include "hw/hw.h"
 #include "hw/input/adb.h"
 #include "ui/console.h"
+#include "include/hw/input/adb-keys.h"
+#include "ui/input.h"
+#include "sysemu/sysemu.h"
 
 /* debug ADB */
 //#define DEBUG_ADB
@@ -59,6 +62,9 @@ do { printf("ADB: " fmt , ## __VA_ARGS__); } while (0)
 /* error codes */
 #define ADB_RET_NOTPRESENT (-2)
 
+/* The adb keyboard doesn't have every key imaginable */
+#define NO_KEY 0xff
+
 static void adb_device_reset(ADBDevice *d)
 {
     qdev_reset_all(DEVICE(d));
@@ -187,23 +193,125 @@ typedef struct ADBKeyboardClass {
     DeviceRealize parent_realize;
 } ADBKeyboardClass;
 
-static const uint8_t pc_to_adb_keycode[256] = {
-  0, 53, 18, 19, 20, 21, 23, 22, 26, 28, 25, 29, 27, 24, 51, 48,
- 12, 13, 14, 15, 17, 16, 32, 34, 31, 35, 33, 30, 36, 54,  0,  1,
-  2,  3,  5,  4, 38, 40, 37, 41, 39, 50, 56, 42,  6,  7,  8,  9,
- 11, 45, 46, 43, 47, 44,123, 67, 58, 49, 57,122,120, 99,118, 96,
- 97, 98,100,101,109, 71,107, 89, 91, 92, 78, 86, 87, 88, 69, 83,
- 84, 85, 82, 65,  0,  0, 10,103,111,  0,  0,110, 81,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0, 94,  0, 93,  0,  0,  0,  0,  0,  0,104,102,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 76,125,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,105,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0, 75,  0,  0,124,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,115, 62,116,  0, 59,  0, 60,  0,119,
- 61,121,114,117,  0,  0,  0,  0,  0,  0,  0, 55,126,  0,127,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0, 95,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+int qcode_to_adb_keycode[] = {
+     /* Make sure future additions are automatically set to NO_KEY */
+    [0 ... 0xff]               = NO_KEY,
+
+    [Q_KEY_CODE_SHIFT]         = ADB_KEY_LEFT_SHIFT,
+    [Q_KEY_CODE_SHIFT_R]       = ADB_KEY_RIGHT_SHIFT,
+    [Q_KEY_CODE_ALT]           = ADB_KEY_LEFT_OPTION,
+    [Q_KEY_CODE_ALT_R]         = ADB_KEY_RIGHT_OPTION,
+    [Q_KEY_CODE_ALTGR]         = ADB_KEY_RIGHT_OPTION,
+    [Q_KEY_CODE_CTRL]          = ADB_KEY_LEFT_CONTROL,
+    [Q_KEY_CODE_CTRL_R]        = ADB_KEY_RIGHT_CONTROL,
+    [Q_KEY_CODE_META_L]        = ADB_KEY_COMMAND,
+    [Q_KEY_CODE_META_R]        = ADB_KEY_COMMAND,
+    [Q_KEY_CODE_SPC]           = ADB_KEY_SPACEBAR,
+
+    [Q_KEY_CODE_ESC]           = ADB_KEY_ESC,
+    [Q_KEY_CODE_1]             = ADB_KEY_1,
+    [Q_KEY_CODE_2]             = ADB_KEY_2,
+    [Q_KEY_CODE_3]             = ADB_KEY_3,
+    [Q_KEY_CODE_4]             = ADB_KEY_4,
+    [Q_KEY_CODE_5]             = ADB_KEY_5,
+    [Q_KEY_CODE_6]             = ADB_KEY_6,
+    [Q_KEY_CODE_7]             = ADB_KEY_7,
+    [Q_KEY_CODE_8]             = ADB_KEY_8,
+    [Q_KEY_CODE_9]             = ADB_KEY_9,
+    [Q_KEY_CODE_0]             = ADB_KEY_0,
+    [Q_KEY_CODE_MINUS]         = ADB_KEY_MINUS,
+    [Q_KEY_CODE_EQUAL]         = ADB_KEY_EQUAL,
+    [Q_KEY_CODE_BACKSPACE]     = ADB_KEY_DELETE,
+    [Q_KEY_CODE_TAB]           = ADB_KEY_TAB,
+    [Q_KEY_CODE_Q]             = ADB_KEY_Q,
+    [Q_KEY_CODE_W]             = ADB_KEY_W,
+    [Q_KEY_CODE_E]             = ADB_KEY_E,
+    [Q_KEY_CODE_R]             = ADB_KEY_R,
+    [Q_KEY_CODE_T]             = ADB_KEY_T,
+    [Q_KEY_CODE_Y]             = ADB_KEY_Y,
+    [Q_KEY_CODE_U]             = ADB_KEY_U,
+    [Q_KEY_CODE_I]             = ADB_KEY_I,
+    [Q_KEY_CODE_O]             = ADB_KEY_O,
+    [Q_KEY_CODE_P]             = ADB_KEY_P,
+    [Q_KEY_CODE_BRACKET_LEFT]  = ADB_KEY_LEFT_BRACKET,
+    [Q_KEY_CODE_BRACKET_RIGHT] = ADB_KEY_RIGHT_BRACKET,
+    [Q_KEY_CODE_RET]           = ADB_KEY_RETURN,
+    [Q_KEY_CODE_A]             = ADB_KEY_A,
+    [Q_KEY_CODE_S]             = ADB_KEY_S,
+    [Q_KEY_CODE_D]             = ADB_KEY_D,
+    [Q_KEY_CODE_F]             = ADB_KEY_F,
+    [Q_KEY_CODE_G]             = ADB_KEY_G,
+    [Q_KEY_CODE_H]             = ADB_KEY_H,
+    [Q_KEY_CODE_J]             = ADB_KEY_J,
+    [Q_KEY_CODE_K]             = ADB_KEY_K,
+    [Q_KEY_CODE_L]             = ADB_KEY_L,
+    [Q_KEY_CODE_SEMICOLON]     = ADB_KEY_SEMICOLON,
+    [Q_KEY_CODE_APOSTROPHE]    = ADB_KEY_APOSTROPHE,
+    [Q_KEY_CODE_GRAVE_ACCENT]  = ADB_KEY_GRAVE_ACCENT,
+    [Q_KEY_CODE_BACKSLASH]     = ADB_KEY_BACKSLASH,
+    [Q_KEY_CODE_Z]             = ADB_KEY_Z,
+    [Q_KEY_CODE_X]             = ADB_KEY_X,
+    [Q_KEY_CODE_C]             = ADB_KEY_C,
+    [Q_KEY_CODE_V]             = ADB_KEY_V,
+    [Q_KEY_CODE_B]             = ADB_KEY_B,
+    [Q_KEY_CODE_N]             = ADB_KEY_N,
+    [Q_KEY_CODE_M]             = ADB_KEY_M,
+    [Q_KEY_CODE_COMMA]         = ADB_KEY_COMMA,
+    [Q_KEY_CODE_DOT]           = ADB_KEY_PERIOD,
+    [Q_KEY_CODE_SLASH]         = ADB_KEY_FORWARD_SLASH,
+    [Q_KEY_CODE_ASTERISK]      = ADB_KEY_KP_MULTIPLY,
+    [Q_KEY_CODE_CAPS_LOCK]     = ADB_KEY_CAPS_LOCK,
+
+    [Q_KEY_CODE_F1]            = ADB_KEY_F1,
+    [Q_KEY_CODE_F2]            = ADB_KEY_F2,
+    [Q_KEY_CODE_F3]            = ADB_KEY_F3,
+    [Q_KEY_CODE_F4]            = ADB_KEY_F4,
+    [Q_KEY_CODE_F5]            = ADB_KEY_F5,
+    [Q_KEY_CODE_F6]            = ADB_KEY_F6,
+    [Q_KEY_CODE_F7]            = ADB_KEY_F7,
+    [Q_KEY_CODE_F8]            = ADB_KEY_F8,
+    [Q_KEY_CODE_F9]            = ADB_KEY_F9,
+    [Q_KEY_CODE_F10]           = ADB_KEY_F10,
+    [Q_KEY_CODE_F11]           = ADB_KEY_F11,
+    [Q_KEY_CODE_F12]           = ADB_KEY_F12,
+    [Q_KEY_CODE_PRINT]         = ADB_KEY_F13,
+    [Q_KEY_CODE_SYSRQ]         = ADB_KEY_F13,
+    [Q_KEY_CODE_SCROLL_LOCK]   = ADB_KEY_F14,
+    [Q_KEY_CODE_PAUSE]         = ADB_KEY_F15,
+
+    [Q_KEY_CODE_NUM_LOCK]      = ADB_KEY_KP_CLEAR,
+    [Q_KEY_CODE_KP_EQUALS]     = ADB_KEY_KP_EQUAL,
+    [Q_KEY_CODE_KP_DIVIDE]     = ADB_KEY_KP_DIVIDE,
+    [Q_KEY_CODE_KP_MULTIPLY]   = ADB_KEY_KP_MULTIPLY,
+    [Q_KEY_CODE_KP_SUBTRACT]   = ADB_KEY_KP_SUBTRACT,
+    [Q_KEY_CODE_KP_ADD]        = ADB_KEY_KP_PLUS,
+    [Q_KEY_CODE_KP_ENTER]      = ADB_KEY_KP_ENTER,
+    [Q_KEY_CODE_KP_DECIMAL]    = ADB_KEY_KP_PERIOD,
+    [Q_KEY_CODE_KP_0]          = ADB_KEY_KP_0,
+    [Q_KEY_CODE_KP_1]          = ADB_KEY_KP_1,
+    [Q_KEY_CODE_KP_2]          = ADB_KEY_KP_2,
+    [Q_KEY_CODE_KP_3]          = ADB_KEY_KP_3,
+    [Q_KEY_CODE_KP_4]          = ADB_KEY_KP_4,
+    [Q_KEY_CODE_KP_5]          = ADB_KEY_KP_5,
+    [Q_KEY_CODE_KP_6]          = ADB_KEY_KP_6,
+    [Q_KEY_CODE_KP_7]          = ADB_KEY_KP_7,
+    [Q_KEY_CODE_KP_8]          = ADB_KEY_KP_8,
+    [Q_KEY_CODE_KP_9]          = ADB_KEY_KP_9,
+
+    [Q_KEY_CODE_UP]            = ADB_KEY_UP,
+    [Q_KEY_CODE_DOWN]          = ADB_KEY_DOWN,
+    [Q_KEY_CODE_LEFT]          = ADB_KEY_LEFT,
+    [Q_KEY_CODE_RIGHT]         = ADB_KEY_RIGHT,
+
+    [Q_KEY_CODE_HELP]          = ADB_KEY_HELP,
+    [Q_KEY_CODE_INSERT]        = ADB_KEY_HELP,
+    [Q_KEY_CODE_DELETE]        = ADB_KEY_FORWARD_DELETE,
+    [Q_KEY_CODE_HOME]          = ADB_KEY_HOME,
+    [Q_KEY_CODE_END]           = ADB_KEY_END,
+    [Q_KEY_CODE_PGUP]          = ADB_KEY_PAGE_UP,
+    [Q_KEY_CODE_PGDN]          = ADB_KEY_PAGE_DOWN,
+
+    [Q_KEY_CODE_POWER]         = ADB_KEY_POWER
 };
 
 static void adb_kbd_put_keycode(void *opaque, int keycode)
@@ -220,35 +328,40 @@ static void adb_kbd_put_keycode(void *opaque, int keycode)
 
 static int adb_kbd_poll(ADBDevice *d, uint8_t *obuf)
 {
-    static int ext_keycode;
     KBDState *s = ADB_KEYBOARD(d);
-    int adb_keycode, keycode;
+    int keycode;
     int olen;
 
     olen = 0;
-    for(;;) {
-        if (s->count == 0)
-            break;
-        keycode = s->data[s->rptr];
-        if (++s->rptr == sizeof(s->data))
-            s->rptr = 0;
-        s->count--;
-
-        if (keycode == 0xe0) {
-            ext_keycode = 1;
-        } else {
-            if (ext_keycode)
-                adb_keycode =  pc_to_adb_keycode[keycode | 0x80];
-            else
-                adb_keycode =  pc_to_adb_keycode[keycode & 0x7f];
-            obuf[0] = adb_keycode | (keycode & 0x80);
-            /* NOTE: could put a second keycode if needed */
-            obuf[1] = 0xff;
-            olen = 2;
-            ext_keycode = 0;
-            break;
-        }
+    if (s->count == 0) {
+        return 0;
     }
+    keycode = s->data[s->rptr];
+    s->rptr++;
+    if (s->rptr == sizeof(s->data)) {
+        s->rptr = 0;
+    }
+    s->count--;
+    /*
+     * The power key is the only two byte value key, so it is a special case.
+     * Since 0x7f is not a used keycode for ADB we overload it to indicate the
+     * power button when we're storing keycodes in our internal buffer, and
+     * expand it out to two bytes when we send to the guest.
+     */
+    if (keycode == 0x7f) {
+        obuf[0] = 0x7f;
+        obuf[1] = 0x7f;
+        olen = 2;
+    } else {
+        obuf[0] = keycode;
+        /* NOTE: the power key key-up is the two byte sequence 0xff 0xff;
+         * otherwise we could in theory send a second keycode in the second
+         * byte, but choose not to bother.
+         */
+        obuf[1] = 0xff;
+        olen = 2;
+    }
+
     return olen;
 }
 
@@ -283,9 +396,15 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf,
                 d->devaddr = buf[1] & 0xf;
                 break;
             default:
-                /* XXX: check this */
                 d->devaddr = buf[1] & 0xf;
-                d->handler = buf[2];
+                /* we support handlers:
+                 * 1: Apple Standard Keyboard
+                 * 2: Apple Extended Keyboard (LShift = RShift)
+                 * 3: Apple Extended Keyboard (LShift != RShift)
+                 */
+                if (buf[2] == 1 || buf[2] == 2 || buf[2] == 3) {
+                    d->handler = buf[2];
+                }
                 break;
             }
         }
@@ -313,6 +432,30 @@ static int adb_kbd_request(ADBDevice *d, uint8_t *obuf,
     return olen;
 }
 
+/* This is where keyboard events enter this file */
+static void adb_keyboard_event(DeviceState *dev, QemuConsole *src,
+                               InputEvent *evt)
+{
+    KBDState *s = (KBDState *)dev;
+    int qcode, keycode;
+
+    qcode = qemu_input_key_value_to_qcode(evt->u.key.data->key);
+    if (qcode >= ARRAY_SIZE(qcode_to_adb_keycode)) {
+        return;
+    }
+    /* FIXME: take handler into account when translating qcode */
+    keycode = qcode_to_adb_keycode[qcode];
+    if (keycode == NO_KEY) {  /* We don't want to send this to the guest */
+        ADB_DPRINTF("Ignoring NO_KEY\n");
+        return;
+    }
+    if (evt->u.key.data->down == false) { /* if key release event */
+        keycode = keycode | 0x80;   /* create keyboard break code */
+    }
+
+    adb_kbd_put_keycode(s, keycode);
+}
+
 static const VMStateDescription vmstate_adb_kbd = {
     .name = "adb_kbd",
     .version_id = 2,
@@ -340,14 +483,17 @@ static void adb_kbd_reset(DeviceState *dev)
     s->count = 0;
 }
 
+static QemuInputHandler adb_keyboard_handler = {
+    .name  = "QEMU ADB Keyboard",
+    .mask  = INPUT_EVENT_MASK_KEY,
+    .event = adb_keyboard_event,
+};
+
 static void adb_kbd_realizefn(DeviceState *dev, Error **errp)
 {
-    ADBDevice *d = ADB_DEVICE(dev);
     ADBKeyboardClass *akc = ADB_KEYBOARD_GET_CLASS(dev);
-
     akc->parent_realize(dev, errp);
-
-    qemu_add_kbd_event_handler(adb_kbd_put_keycode, d);
+    qemu_input_handler_register(dev, &adb_keyboard_handler);
 }
 
 static void adb_kbd_initfn(Object *obj)
@@ -492,8 +638,21 @@ static int adb_mouse_request(ADBDevice *d, uint8_t *obuf,
                 d->devaddr = buf[1] & 0xf;
                 break;
             default:
-                /* XXX: check this */
                 d->devaddr = buf[1] & 0xf;
+                /* we support handlers:
+                 * 0x01: Classic Apple Mouse Protocol / 100 cpi operations
+                 * 0x02: Classic Apple Mouse Protocol / 200 cpi operations
+                 * we don't support handlers (at least):
+                 * 0x03: Mouse systems A3 trackball
+                 * 0x04: Extended Apple Mouse Protocol
+                 * 0x2f: Microspeed mouse
+                 * 0x42: Macally
+                 * 0x5f: Microspeed mouse
+                 * 0x66: Microspeed mouse
+                 */
+                if (buf[2] == 1 || buf[2] == 2) {
+                    d->handler = buf[2];
+                }
                 break;
             }
         }
diff --git a/hw/input/hid.c b/hw/input/hid.c
index 5e2850e655..fa9cc4c616 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -46,7 +46,7 @@ static const uint8_t hid_usage_keys[0x100] = {
     0xe2, 0x2c, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e,
     0x3f, 0x40, 0x41, 0x42, 0x43, 0x53, 0x47, 0x5f,
     0x60, 0x61, 0x56, 0x5c, 0x5d, 0x5e, 0x57, 0x59,
-    0x5a, 0x5b, 0x62, 0x63, 0x00, 0x00, 0x64, 0x44,
+    0x5a, 0x5b, 0x62, 0x63, 0x46, 0x00, 0x64, 0x44,
     0x45, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e,
     0xe8, 0xe9, 0x71, 0x72, 0x73, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x85, 0x00, 0x00, 0x00, 0x00,
@@ -61,7 +61,7 @@ static const uint8_t hid_usage_keys[0x100] = {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x00, 0x46,
     0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x4a,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x48, 0x4a,
     0x52, 0x4b, 0x00, 0x50, 0x00, 0x4f, 0x00, 0x4d,
     0x51, 0x4e, 0x49, 0x4c, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0xe3, 0xe7, 0x65, 0x00, 0x00,
diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c
index dc57e2c762..d414288839 100644
--- a/hw/input/pckbd.c
+++ b/hw/input/pckbd.c
@@ -499,9 +499,9 @@ void i8042_isa_mouse_fake_event(void *opaque)
     ps2_mouse_fake_event(s->mouse);
 }
 
-void i8042_setup_a20_line(ISADevice *dev, qemu_irq *a20_out)
+void i8042_setup_a20_line(ISADevice *dev, qemu_irq a20_out)
 {
-    qdev_connect_gpio_out_named(DEVICE(dev), I8042_A20_LINE, 0, *a20_out);
+    qdev_connect_gpio_out_named(DEVICE(dev), I8042_A20_LINE, 0, a20_out);
 }
 
 static const VMStateDescription vmstate_kbd_isa = {
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index a8aa36f5c0..0d14de08a6 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "hw/hw.h"
 #include "hw/input/ps2.h"
 #include "ui/console.h"
@@ -94,12 +95,10 @@ typedef struct {
 typedef struct {
     PS2State common;
     int scan_enabled;
-    /* QEMU uses translated PC scancodes internally.  To avoid multiple
-       conversions we do the translation (if any) in the PS/2 emulation
-       not the keyboard controller.  */
     int translate;
     int scancode_set; /* 1=XT, 2=AT, 3=PS/2 */
     int ledstate;
+    bool need_high_bit;
 } PS2KbdState;
 
 typedef struct {
@@ -116,26 +115,430 @@ typedef struct {
     uint8_t mouse_buttons;
 } PS2MouseState;
 
-/* Table to convert from PC scancodes to raw scancodes.  */
-static const unsigned char ps2_raw_keycode[128] = {
-  0, 118,  22,  30,  38,  37,  46,  54,  61,  62,  70,  69,  78,  85, 102,  13,
- 21,  29,  36,  45,  44,  53,  60,  67,  68,  77,  84,  91,  90,  20,  28,  27,
- 35,  43,  52,  51,  59,  66,  75,  76,  82,  14,  18,  93,  26,  34,  33,  42,
- 50,  49,  58,  65,  73,  74,  89, 124,  17,  41,  88,   5,   6,   4,  12,   3,
- 11,   2,  10,   1,   9, 119, 126, 108, 117, 125, 123, 107, 115, 116, 121, 105,
-114, 122, 112, 113, 127,  96,  97, 120,   7,  15,  23,  31,  39,  47,  55,  63,
- 71,  79,  86,  94,   8,  16,  24,  32,  40,  48,  56,  64,  72,  80,  87, 111,
- 19,  25,  57,  81,  83,  92,  95,  98,  99, 100, 101, 103, 104, 106, 109, 110
+/* Table to convert from QEMU codes to scancodes.  */
+static const uint16_t qcode_to_keycode_set1[Q_KEY_CODE__MAX] = {
+    [0 ... Q_KEY_CODE__MAX - 1] = 0,
+
+    [Q_KEY_CODE_A] = 0x1e,
+    [Q_KEY_CODE_B] = 0x30,
+    [Q_KEY_CODE_C] = 0x2e,
+    [Q_KEY_CODE_D] = 0x20,
+    [Q_KEY_CODE_E] = 0x12,
+    [Q_KEY_CODE_F] = 0x21,
+    [Q_KEY_CODE_G] = 0x22,
+    [Q_KEY_CODE_H] = 0x23,
+    [Q_KEY_CODE_I] = 0x17,
+    [Q_KEY_CODE_J] = 0x24,
+    [Q_KEY_CODE_K] = 0x25,
+    [Q_KEY_CODE_L] = 0x26,
+    [Q_KEY_CODE_M] = 0x32,
+    [Q_KEY_CODE_N] = 0x31,
+    [Q_KEY_CODE_O] = 0x18,
+    [Q_KEY_CODE_P] = 0x19,
+    [Q_KEY_CODE_Q] = 0x10,
+    [Q_KEY_CODE_R] = 0x13,
+    [Q_KEY_CODE_S] = 0x1f,
+    [Q_KEY_CODE_T] = 0x14,
+    [Q_KEY_CODE_U] = 0x16,
+    [Q_KEY_CODE_V] = 0x2f,
+    [Q_KEY_CODE_W] = 0x11,
+    [Q_KEY_CODE_X] = 0x2d,
+    [Q_KEY_CODE_Y] = 0x15,
+    [Q_KEY_CODE_Z] = 0x2c,
+    [Q_KEY_CODE_0] = 0x0b,
+    [Q_KEY_CODE_1] = 0x02,
+    [Q_KEY_CODE_2] = 0x03,
+    [Q_KEY_CODE_3] = 0x04,
+    [Q_KEY_CODE_4] = 0x05,
+    [Q_KEY_CODE_5] = 0x06,
+    [Q_KEY_CODE_6] = 0x07,
+    [Q_KEY_CODE_7] = 0x08,
+    [Q_KEY_CODE_8] = 0x09,
+    [Q_KEY_CODE_9] = 0x0a,
+    [Q_KEY_CODE_GRAVE_ACCENT] = 0x29,
+    [Q_KEY_CODE_MINUS] = 0x0c,
+    [Q_KEY_CODE_EQUAL] = 0x0d,
+    [Q_KEY_CODE_BACKSLASH] = 0x2b,
+    [Q_KEY_CODE_BACKSPACE] = 0x0e,
+    [Q_KEY_CODE_SPC] = 0x39,
+    [Q_KEY_CODE_TAB] = 0x0f,
+    [Q_KEY_CODE_CAPS_LOCK] = 0x3a,
+    [Q_KEY_CODE_SHIFT] = 0x2a,
+    [Q_KEY_CODE_CTRL] = 0x1d,
+    [Q_KEY_CODE_META_L] = 0xe05b,
+    [Q_KEY_CODE_ALT] = 0x38,
+    [Q_KEY_CODE_SHIFT_R] = 0x36,
+    [Q_KEY_CODE_CTRL_R] = 0xe01d,
+    [Q_KEY_CODE_META_R] = 0xe05c,
+    [Q_KEY_CODE_ALT_R] = 0xe038,
+    [Q_KEY_CODE_MENU] = 0xe05d,
+    [Q_KEY_CODE_RET] = 0x1c,
+    [Q_KEY_CODE_ESC] = 0x01,
+    [Q_KEY_CODE_F1] = 0x3b,
+    [Q_KEY_CODE_F2] = 0x3c,
+    [Q_KEY_CODE_F3] = 0x3d,
+    [Q_KEY_CODE_F4] = 0x3e,
+    [Q_KEY_CODE_F5] = 0x3f,
+    [Q_KEY_CODE_F6] = 0x40,
+    [Q_KEY_CODE_F7] = 0x41,
+    [Q_KEY_CODE_F8] = 0x42,
+    [Q_KEY_CODE_F9] = 0x43,
+    [Q_KEY_CODE_F10] = 0x44,
+    [Q_KEY_CODE_F11] = 0x57,
+    [Q_KEY_CODE_F12] = 0x58,
+    /* special handling for Q_KEY_CODE_PRINT */
+    [Q_KEY_CODE_SCROLL_LOCK] = 0x46,
+    /* special handling for Q_KEY_CODE_PAUSE */
+    [Q_KEY_CODE_BRACKET_LEFT] = 0x1a,
+    [Q_KEY_CODE_INSERT] = 0xe052,
+    [Q_KEY_CODE_HOME] = 0xe047,
+    [Q_KEY_CODE_PGUP] = 0xe049,
+    [Q_KEY_CODE_DELETE] = 0xe053,
+    [Q_KEY_CODE_END] = 0xe04f,
+    [Q_KEY_CODE_PGDN] = 0xe051,
+    [Q_KEY_CODE_UP] = 0xe048,
+    [Q_KEY_CODE_LEFT] = 0xe04b,
+    [Q_KEY_CODE_DOWN] = 0xe050,
+    [Q_KEY_CODE_RIGHT] = 0xe04d,
+    [Q_KEY_CODE_NUM_LOCK] = 0x45,
+    [Q_KEY_CODE_KP_DIVIDE] = 0xe035,
+    [Q_KEY_CODE_KP_MULTIPLY] = 0x37,
+    [Q_KEY_CODE_KP_SUBTRACT] = 0x4a,
+    [Q_KEY_CODE_KP_ADD] = 0x4e,
+    [Q_KEY_CODE_KP_ENTER] = 0xe01c,
+    [Q_KEY_CODE_KP_DECIMAL] = 0x53,
+    [Q_KEY_CODE_KP_0] = 0x52,
+    [Q_KEY_CODE_KP_1] = 0x4f,
+    [Q_KEY_CODE_KP_2] = 0x50,
+    [Q_KEY_CODE_KP_3] = 0x51,
+    [Q_KEY_CODE_KP_4] = 0x4b,
+    [Q_KEY_CODE_KP_5] = 0x4c,
+    [Q_KEY_CODE_KP_6] = 0x4d,
+    [Q_KEY_CODE_KP_7] = 0x47,
+    [Q_KEY_CODE_KP_8] = 0x48,
+    [Q_KEY_CODE_KP_9] = 0x49,
+    [Q_KEY_CODE_BRACKET_RIGHT] = 0x1b,
+    [Q_KEY_CODE_SEMICOLON] = 0x27,
+    [Q_KEY_CODE_APOSTROPHE] = 0x28,
+    [Q_KEY_CODE_COMMA] = 0x33,
+    [Q_KEY_CODE_DOT] = 0x34,
+    [Q_KEY_CODE_SLASH] = 0x35,
+
+#if 0
+    [Q_KEY_CODE_POWER] = 0x0e5e,
+    [Q_KEY_CODE_SLEEP] = 0x0e5f,
+    [Q_KEY_CODE_WAKE] = 0x0e63,
+
+    [Q_KEY_CODE_AUDIONEXT] = 0xe019,
+    [Q_KEY_CODE_AUDIOPREV] = 0xe010,
+    [Q_KEY_CODE_AUDIOSTOP] = 0xe024,
+    [Q_KEY_CODE_AUDIOPLAY] = 0xe022,
+    [Q_KEY_CODE_AUDIOMUTE] = 0xe020,
+    [Q_KEY_CODE_VOLUMEUP] = 0xe030,
+    [Q_KEY_CODE_VOLUMEDOWN] = 0xe02e,
+    [Q_KEY_CODE_MEDIASELECT] = 0xe06d,
+    [Q_KEY_CODE_MAIL] = 0xe06c,
+    [Q_KEY_CODE_CALCULATOR] = 0xe021,
+    [Q_KEY_CODE_COMPUTER] = 0xe06b,
+    [Q_KEY_CODE_AC_SEARCH] = 0xe065,
+    [Q_KEY_CODE_AC_HOME] = 0xe032,
+    [Q_KEY_CODE_AC_BACK] = 0xe06a,
+    [Q_KEY_CODE_AC_FORWARD] = 0xe069,
+    [Q_KEY_CODE_AC_STOP] = 0xe068,
+    [Q_KEY_CODE_AC_REFRESH] = 0xe067,
+    [Q_KEY_CODE_AC_BOOKMARKS] = 0xe066,
+#endif
+
+    [Q_KEY_CODE_ASTERISK] = 0x37,
+    [Q_KEY_CODE_LESS] = 0x56,
+    [Q_KEY_CODE_RO] = 0x73,
+    [Q_KEY_CODE_KP_COMMA] = 0x7e,
 };
-static const unsigned char ps2_raw_keycode_set3[128] = {
-  0,   8,  22,  30,  38,  37,  46,  54,  61,  62,  70,  69,  78,  85, 102,  13,
- 21,  29,  36,  45,  44,  53,  60,  67,  68,  77,  84,  91,  90,  17,  28,  27,
- 35,  43,  52,  51,  59,  66,  75,  76,  82,  14,  18,  92,  26,  34,  33,  42,
- 50,  49,  58,  65,  73,  74,  89, 126,  25,  41,  20,   7,  15,  23,  31,  39,
- 47,   2,  63,  71,  79, 118,  95, 108, 117, 125, 132, 107, 115, 116, 124, 105,
-114, 122, 112, 113, 127,  96,  97,  86,  94,  15,  23,  31,  39,  47,  55,  63,
- 71,  79,  86,  94,   8,  16,  24,  32,  40,  48,  56,  64,  72,  80,  87, 111,
- 19,  25,  57,  81,  83,  92,  95,  98,  99, 100, 101, 103, 104, 106, 109, 110
+
+static const uint16_t qcode_to_keycode_set2[Q_KEY_CODE__MAX] = {
+    [0 ... Q_KEY_CODE__MAX - 1] = 0,
+
+    [Q_KEY_CODE_A] = 0x1c,
+    [Q_KEY_CODE_B] = 0x32,
+    [Q_KEY_CODE_C] = 0x21,
+    [Q_KEY_CODE_D] = 0x23,
+    [Q_KEY_CODE_E] = 0x24,
+    [Q_KEY_CODE_F] = 0x2b,
+    [Q_KEY_CODE_G] = 0x34,
+    [Q_KEY_CODE_H] = 0x33,
+    [Q_KEY_CODE_I] = 0x43,
+    [Q_KEY_CODE_J] = 0x3b,
+    [Q_KEY_CODE_K] = 0x42,
+    [Q_KEY_CODE_L] = 0x4b,
+    [Q_KEY_CODE_M] = 0x3a,
+    [Q_KEY_CODE_N] = 0x31,
+    [Q_KEY_CODE_O] = 0x44,
+    [Q_KEY_CODE_P] = 0x4d,
+    [Q_KEY_CODE_Q] = 0x15,
+    [Q_KEY_CODE_R] = 0x2d,
+    [Q_KEY_CODE_S] = 0x1b,
+    [Q_KEY_CODE_T] = 0x2c,
+    [Q_KEY_CODE_U] = 0x3c,
+    [Q_KEY_CODE_V] = 0x2a,
+    [Q_KEY_CODE_W] = 0x1d,
+    [Q_KEY_CODE_X] = 0x22,
+    [Q_KEY_CODE_Y] = 0x35,
+    [Q_KEY_CODE_Z] = 0x1a,
+    [Q_KEY_CODE_0] = 0x45,
+    [Q_KEY_CODE_1] = 0x16,
+    [Q_KEY_CODE_2] = 0x1e,
+    [Q_KEY_CODE_3] = 0x26,
+    [Q_KEY_CODE_4] = 0x25,
+    [Q_KEY_CODE_5] = 0x2e,
+    [Q_KEY_CODE_6] = 0x36,
+    [Q_KEY_CODE_7] = 0x3d,
+    [Q_KEY_CODE_8] = 0x3e,
+    [Q_KEY_CODE_9] = 0x46,
+    [Q_KEY_CODE_GRAVE_ACCENT] = 0x0e,
+    [Q_KEY_CODE_MINUS] = 0x4e,
+    [Q_KEY_CODE_EQUAL] = 0x55,
+    [Q_KEY_CODE_BACKSLASH] = 0x5d,
+    [Q_KEY_CODE_BACKSPACE] = 0x66,
+    [Q_KEY_CODE_SPC] = 0x29,
+    [Q_KEY_CODE_TAB] = 0x0d,
+    [Q_KEY_CODE_CAPS_LOCK] = 0x58,
+    [Q_KEY_CODE_SHIFT] = 0x12,
+    [Q_KEY_CODE_CTRL] = 0x14,
+    [Q_KEY_CODE_META_L] = 0xe01f,
+    [Q_KEY_CODE_ALT] = 0x11,
+    [Q_KEY_CODE_SHIFT_R] = 0x59,
+    [Q_KEY_CODE_CTRL_R] = 0xe014,
+    [Q_KEY_CODE_META_R] = 0xe027,
+    [Q_KEY_CODE_ALT_R] = 0xe011,
+    [Q_KEY_CODE_MENU] = 0xe02f,
+    [Q_KEY_CODE_RET] = 0x5a,
+    [Q_KEY_CODE_ESC] = 0x76,
+    [Q_KEY_CODE_F1] = 0x05,
+    [Q_KEY_CODE_F2] = 0x06,
+    [Q_KEY_CODE_F3] = 0x04,
+    [Q_KEY_CODE_F4] = 0x0c,
+    [Q_KEY_CODE_F5] = 0x03,
+    [Q_KEY_CODE_F6] = 0x0b,
+    [Q_KEY_CODE_F7] = 0x83,
+    [Q_KEY_CODE_F8] = 0x0a,
+    [Q_KEY_CODE_F9] = 0x01,
+    [Q_KEY_CODE_F10] = 0x09,
+    [Q_KEY_CODE_F11] = 0x78,
+    [Q_KEY_CODE_F12] = 0x07,
+    /* special handling for Q_KEY_CODE_PRINT */
+    [Q_KEY_CODE_SCROLL_LOCK] = 0x7e,
+    /* special handling for Q_KEY_CODE_PAUSE */
+    [Q_KEY_CODE_BRACKET_LEFT] = 0x54,
+    [Q_KEY_CODE_INSERT] = 0xe070,
+    [Q_KEY_CODE_HOME] = 0xe06c,
+    [Q_KEY_CODE_PGUP] = 0xe07d,
+    [Q_KEY_CODE_DELETE] = 0xe071,
+    [Q_KEY_CODE_END] = 0xe069,
+    [Q_KEY_CODE_PGDN] = 0xe07a,
+    [Q_KEY_CODE_UP] = 0xe075,
+    [Q_KEY_CODE_LEFT] = 0xe06b,
+    [Q_KEY_CODE_DOWN] = 0xe072,
+    [Q_KEY_CODE_RIGHT] = 0xe074,
+    [Q_KEY_CODE_NUM_LOCK] = 0x77,
+    [Q_KEY_CODE_KP_DIVIDE] = 0xe04a,
+    [Q_KEY_CODE_KP_MULTIPLY] = 0x7c,
+    [Q_KEY_CODE_KP_SUBTRACT] = 0x7b,
+    [Q_KEY_CODE_KP_ADD] = 0x79,
+    [Q_KEY_CODE_KP_ENTER] = 0xe05a,
+    [Q_KEY_CODE_KP_DECIMAL] = 0x71,
+    [Q_KEY_CODE_KP_0] = 0x70,
+    [Q_KEY_CODE_KP_1] = 0x69,
+    [Q_KEY_CODE_KP_2] = 0x72,
+    [Q_KEY_CODE_KP_3] = 0x7a,
+    [Q_KEY_CODE_KP_4] = 0x6b,
+    [Q_KEY_CODE_KP_5] = 0x73,
+    [Q_KEY_CODE_KP_6] = 0x74,
+    [Q_KEY_CODE_KP_7] = 0x6c,
+    [Q_KEY_CODE_KP_8] = 0x75,
+    [Q_KEY_CODE_KP_9] = 0x7d,
+    [Q_KEY_CODE_BRACKET_RIGHT] = 0x5b,
+    [Q_KEY_CODE_SEMICOLON] = 0x4c,
+    [Q_KEY_CODE_APOSTROPHE] = 0x52,
+    [Q_KEY_CODE_COMMA] = 0x41,
+    [Q_KEY_CODE_DOT] = 0x49,
+    [Q_KEY_CODE_SLASH] = 0x4a,
+
+#if 0
+    [Q_KEY_CODE_POWER] = 0x0e37,
+    [Q_KEY_CODE_SLEEP] = 0x0e3f,
+    [Q_KEY_CODE_WAKE] = 0x0e5e,
+
+    [Q_KEY_CODE_AUDIONEXT] = 0xe04d,
+    [Q_KEY_CODE_AUDIOPREV] = 0xe015,
+    [Q_KEY_CODE_AUDIOSTOP] = 0xe03b,
+    [Q_KEY_CODE_AUDIOPLAY] = 0xe034,
+    [Q_KEY_CODE_AUDIOMUTE] = 0xe023,
+    [Q_KEY_CODE_VOLUMEUP] = 0xe032,
+    [Q_KEY_CODE_VOLUMEDOWN] = 0xe021,
+    [Q_KEY_CODE_MEDIASELECT] = 0xe050,
+    [Q_KEY_CODE_MAIL] = 0xe048,
+    [Q_KEY_CODE_CALCULATOR] = 0xe02b,
+    [Q_KEY_CODE_COMPUTER] = 0xe040,
+    [Q_KEY_CODE_AC_SEARCH] = 0xe010,
+    [Q_KEY_CODE_AC_HOME] = 0xe03a,
+    [Q_KEY_CODE_AC_BACK] = 0xe038,
+    [Q_KEY_CODE_AC_FORWARD] = 0xe030,
+    [Q_KEY_CODE_AC_STOP] = 0xe028,
+    [Q_KEY_CODE_AC_REFRESH] = 0xe020,
+    [Q_KEY_CODE_AC_BOOKMARKS] = 0xe018,
+#endif
+
+    [Q_KEY_CODE_ALTGR] = 0x08,
+    [Q_KEY_CODE_ALTGR_R] = 0xe008,
+    [Q_KEY_CODE_ASTERISK] = 0x7c,
+    [Q_KEY_CODE_LESS] = 0x61,
+    [Q_KEY_CODE_SYSRQ] = 0x7f,
+    [Q_KEY_CODE_RO] = 0x51,
+    [Q_KEY_CODE_KP_COMMA] = 0x6d,
+};
+
+static const uint16_t qcode_to_keycode_set3[Q_KEY_CODE__MAX] = {
+    [0 ... Q_KEY_CODE__MAX - 1] = 0,
+
+    [Q_KEY_CODE_A] = 0x1c,
+    [Q_KEY_CODE_B] = 0x32,
+    [Q_KEY_CODE_C] = 0x21,
+    [Q_KEY_CODE_D] = 0x23,
+    [Q_KEY_CODE_E] = 0x24,
+    [Q_KEY_CODE_F] = 0x2b,
+    [Q_KEY_CODE_G] = 0x34,
+    [Q_KEY_CODE_H] = 0x33,
+    [Q_KEY_CODE_I] = 0x43,
+    [Q_KEY_CODE_J] = 0x3b,
+    [Q_KEY_CODE_K] = 0x42,
+    [Q_KEY_CODE_L] = 0x4b,
+    [Q_KEY_CODE_M] = 0x3a,
+    [Q_KEY_CODE_N] = 0x31,
+    [Q_KEY_CODE_O] = 0x44,
+    [Q_KEY_CODE_P] = 0x4d,
+    [Q_KEY_CODE_Q] = 0x15,
+    [Q_KEY_CODE_R] = 0x2d,
+    [Q_KEY_CODE_S] = 0x1b,
+    [Q_KEY_CODE_T] = 0x2c,
+    [Q_KEY_CODE_U] = 0x3c,
+    [Q_KEY_CODE_V] = 0x2a,
+    [Q_KEY_CODE_W] = 0x1d,
+    [Q_KEY_CODE_X] = 0x22,
+    [Q_KEY_CODE_Y] = 0x35,
+    [Q_KEY_CODE_Z] = 0x1a,
+    [Q_KEY_CODE_0] = 0x45,
+    [Q_KEY_CODE_1] = 0x16,
+    [Q_KEY_CODE_2] = 0x1e,
+    [Q_KEY_CODE_3] = 0x26,
+    [Q_KEY_CODE_4] = 0x25,
+    [Q_KEY_CODE_5] = 0x2e,
+    [Q_KEY_CODE_6] = 0x36,
+    [Q_KEY_CODE_7] = 0x3d,
+    [Q_KEY_CODE_8] = 0x3e,
+    [Q_KEY_CODE_9] = 0x46,
+    [Q_KEY_CODE_GRAVE_ACCENT] = 0x0e,
+    [Q_KEY_CODE_MINUS] = 0x4e,
+    [Q_KEY_CODE_EQUAL] = 0x55,
+    [Q_KEY_CODE_BACKSLASH] = 0x5c,
+    [Q_KEY_CODE_BACKSPACE] = 0x66,
+    [Q_KEY_CODE_SPC] = 0x29,
+    [Q_KEY_CODE_TAB] = 0x0d,
+    [Q_KEY_CODE_CAPS_LOCK] = 0x14,
+    [Q_KEY_CODE_SHIFT] = 0x12,
+    [Q_KEY_CODE_CTRL] = 0x11,
+    [Q_KEY_CODE_META_L] = 0x8b,
+    [Q_KEY_CODE_ALT] = 0x19,
+    [Q_KEY_CODE_SHIFT_R] = 0x59,
+    [Q_KEY_CODE_CTRL_R] = 0x58,
+    [Q_KEY_CODE_META_R] = 0x8c,
+    [Q_KEY_CODE_ALT_R] = 0x39,
+    [Q_KEY_CODE_MENU] = 0x8d,
+    [Q_KEY_CODE_RET] = 0x5a,
+    [Q_KEY_CODE_ESC] = 0x08,
+    [Q_KEY_CODE_F1] = 0x07,
+    [Q_KEY_CODE_F2] = 0x0f,
+    [Q_KEY_CODE_F3] = 0x17,
+    [Q_KEY_CODE_F4] = 0x1f,
+    [Q_KEY_CODE_F5] = 0x27,
+    [Q_KEY_CODE_F6] = 0x2f,
+    [Q_KEY_CODE_F7] = 0x37,
+    [Q_KEY_CODE_F8] = 0x3f,
+    [Q_KEY_CODE_F9] = 0x47,
+    [Q_KEY_CODE_F10] = 0x4f,
+    [Q_KEY_CODE_F11] = 0x56,
+    [Q_KEY_CODE_F12] = 0x5e,
+    [Q_KEY_CODE_PRINT] = 0x57,
+    [Q_KEY_CODE_SCROLL_LOCK] = 0x5f,
+    [Q_KEY_CODE_PAUSE] = 0x62,
+    [Q_KEY_CODE_BRACKET_LEFT] = 0x54,
+    [Q_KEY_CODE_INSERT] = 0x67,
+    [Q_KEY_CODE_HOME] = 0x6e,
+    [Q_KEY_CODE_PGUP] = 0x6f,
+    [Q_KEY_CODE_DELETE] = 0x64,
+    [Q_KEY_CODE_END] = 0x65,
+    [Q_KEY_CODE_PGDN] = 0x6d,
+    [Q_KEY_CODE_UP] = 0x63,
+    [Q_KEY_CODE_LEFT] = 0x61,
+    [Q_KEY_CODE_DOWN] = 0x60,
+    [Q_KEY_CODE_RIGHT] = 0x6a,
+    [Q_KEY_CODE_NUM_LOCK] = 0x76,
+    [Q_KEY_CODE_KP_DIVIDE] = 0x4a,
+    [Q_KEY_CODE_KP_MULTIPLY] = 0x7e,
+    [Q_KEY_CODE_KP_SUBTRACT] = 0x4e,
+    [Q_KEY_CODE_KP_ADD] = 0x7c,
+    [Q_KEY_CODE_KP_ENTER] = 0x79,
+    [Q_KEY_CODE_KP_DECIMAL] = 0x71,
+    [Q_KEY_CODE_KP_0] = 0x70,
+    [Q_KEY_CODE_KP_1] = 0x69,
+    [Q_KEY_CODE_KP_2] = 0x72,
+    [Q_KEY_CODE_KP_3] = 0x7a,
+    [Q_KEY_CODE_KP_4] = 0x6b,
+    [Q_KEY_CODE_KP_5] = 0x73,
+    [Q_KEY_CODE_KP_6] = 0x74,
+    [Q_KEY_CODE_KP_7] = 0x6c,
+    [Q_KEY_CODE_KP_8] = 0x75,
+    [Q_KEY_CODE_KP_9] = 0x7d,
+    [Q_KEY_CODE_BRACKET_RIGHT] = 0x5b,
+    [Q_KEY_CODE_SEMICOLON] = 0x4c,
+    [Q_KEY_CODE_APOSTROPHE] = 0x52,
+    [Q_KEY_CODE_COMMA] = 0x41,
+    [Q_KEY_CODE_DOT] = 0x49,
+    [Q_KEY_CODE_SLASH] = 0x4a,
+};
+
+static uint8_t translate_table[256] = {
+    0xff, 0x43, 0x41, 0x3f, 0x3d, 0x3b, 0x3c, 0x58,
+    0x64, 0x44, 0x42, 0x40, 0x3e, 0x0f, 0x29, 0x59,
+    0x65, 0x38, 0x2a, 0x70, 0x1d, 0x10, 0x02, 0x5a,
+    0x66, 0x71, 0x2c, 0x1f, 0x1e, 0x11, 0x03, 0x5b,
+    0x67, 0x2e, 0x2d, 0x20, 0x12, 0x05, 0x04, 0x5c,
+    0x68, 0x39, 0x2f, 0x21, 0x14, 0x13, 0x06, 0x5d,
+    0x69, 0x31, 0x30, 0x23, 0x22, 0x15, 0x07, 0x5e,
+    0x6a, 0x72, 0x32, 0x24, 0x16, 0x08, 0x09, 0x5f,
+    0x6b, 0x33, 0x25, 0x17, 0x18, 0x0b, 0x0a, 0x60,
+    0x6c, 0x34, 0x35, 0x26, 0x27, 0x19, 0x0c, 0x61,
+    0x6d, 0x73, 0x28, 0x74, 0x1a, 0x0d, 0x62, 0x6e,
+    0x3a, 0x36, 0x1c, 0x1b, 0x75, 0x2b, 0x63, 0x76,
+    0x55, 0x56, 0x77, 0x78, 0x79, 0x7a, 0x0e, 0x7b,
+    0x7c, 0x4f, 0x7d, 0x4b, 0x47, 0x7e, 0x7f, 0x6f,
+    0x52, 0x53, 0x50, 0x4c, 0x4d, 0x48, 0x01, 0x45,
+    0x57, 0x4e, 0x51, 0x4a, 0x37, 0x49, 0x46, 0x54,
+    0x80, 0x81, 0x82, 0x41, 0x54, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
 };
 
 void ps2_queue(void *opaque, int b)
@@ -152,44 +555,130 @@ void ps2_queue(void *opaque, int b)
     s->update_irq(s->update_arg, 1);
 }
 
-/*
-   keycode is expressed as follow:
-   bit 7    - 0 key pressed, 1 = key released
-   bits 6-0 - translated scancode set 2
- */
+/* keycode is the untranslated scancode in the current scancode set. */
 static void ps2_put_keycode(void *opaque, int keycode)
 {
     PS2KbdState *s = opaque;
 
     trace_ps2_put_keycode(opaque, keycode);
     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-    /* XXX: add support for scancode set 1 */
-    if (!s->translate && keycode < 0xe0 && s->scancode_set > 1) {
-        if (keycode & 0x80) {
-            ps2_queue(&s->common, 0xf0);
-        }
-        if (s->scancode_set == 2) {
-            keycode = ps2_raw_keycode[keycode & 0x7f];
-        } else if (s->scancode_set == 3) {
-            keycode = ps2_raw_keycode_set3[keycode & 0x7f];
+
+    if (s->translate) {
+        if (keycode == 0xf0) {
+            s->need_high_bit = true;
+        } else if (s->need_high_bit) {
+            ps2_queue(&s->common, translate_table[keycode] | 0x80);
+            s->need_high_bit = false;
+        } else {
+            ps2_queue(&s->common, translate_table[keycode]);
         }
-      }
-    ps2_queue(&s->common, keycode);
+    } else {
+        ps2_queue(&s->common, keycode);
+    }
 }
 
 static void ps2_keyboard_event(DeviceState *dev, QemuConsole *src,
                                InputEvent *evt)
 {
     PS2KbdState *s = (PS2KbdState *)dev;
-    int scancodes[3], i, count;
     InputKeyEvent *key = evt->u.key.data;
+    int qcode;
+    uint16_t keycode;
 
     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
-    count = qemu_input_key_value_to_scancode(key->key,
-                                             key->down,
-                                             scancodes);
-    for (i = 0; i < count; i++) {
-        ps2_put_keycode(s, scancodes[i]);
+    assert(evt->type == INPUT_EVENT_KIND_KEY);
+    qcode = qemu_input_key_value_to_qcode(key->key);
+
+    if (s->scancode_set == 1) {
+        if (qcode == Q_KEY_CODE_PAUSE) {
+            if (key->down) {
+                ps2_put_keycode(s, 0xe1);
+                ps2_put_keycode(s, 0x1d);
+                ps2_put_keycode(s, 0x45);
+                ps2_put_keycode(s, 0x91);
+                ps2_put_keycode(s, 0x9d);
+                ps2_put_keycode(s, 0xc5);
+            }
+        } else if (qcode == Q_KEY_CODE_PRINT) {
+            if (key->down) {
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0x2a);
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0x37);
+            } else {
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0xb7);
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0xaa);
+            }
+        } else {
+            keycode = qcode_to_keycode_set1[qcode];
+            if (keycode) {
+                if (keycode & 0xff00) {
+                    ps2_put_keycode(s, keycode >> 8);
+                }
+                if (!key->down) {
+                    keycode |= 0x80;
+                }
+                ps2_put_keycode(s, keycode & 0xff);
+            } else {
+                qemu_log_mask(LOG_UNIMP,
+                              "ps2: ignoring key with qcode %d\n", qcode);
+            }
+        }
+    } else if (s->scancode_set == 2) {
+        if (qcode == Q_KEY_CODE_PAUSE) {
+            if (key->down) {
+                ps2_put_keycode(s, 0xe1);
+                ps2_put_keycode(s, 0x14);
+                ps2_put_keycode(s, 0x77);
+                ps2_put_keycode(s, 0xe1);
+                ps2_put_keycode(s, 0xf0);
+                ps2_put_keycode(s, 0x14);
+                ps2_put_keycode(s, 0xf0);
+                ps2_put_keycode(s, 0x77);
+            }
+        } else if (qcode == Q_KEY_CODE_PRINT) {
+            if (key->down) {
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0x12);
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0x7c);
+            } else {
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0xf0);
+                ps2_put_keycode(s, 0x7c);
+                ps2_put_keycode(s, 0xe0);
+                ps2_put_keycode(s, 0xf0);
+                ps2_put_keycode(s, 0x12);
+            }
+        } else {
+            keycode = qcode_to_keycode_set2[qcode];
+            if (keycode) {
+                if (keycode & 0xff00) {
+                    ps2_put_keycode(s, keycode >> 8);
+                }
+                if (!key->down) {
+                    ps2_put_keycode(s, 0xf0);
+                }
+                ps2_put_keycode(s, keycode & 0xff);
+            } else {
+                qemu_log_mask(LOG_UNIMP,
+                              "ps2: ignoring key with qcode %d\n", qcode);
+            }
+        }
+    } else if (s->scancode_set == 3) {
+        keycode = qcode_to_keycode_set3[qcode];
+        if (keycode) {
+            /* FIXME: break code should be configured on a key by key basis */
+            if (!key->down) {
+                ps2_put_keycode(s, 0xf0);
+            }
+            ps2_put_keycode(s, keycode);
+        } else {
+            qemu_log_mask(LOG_UNIMP,
+                          "ps2: ignoring key with qcode %d\n", qcode);
+        }
     }
 }
 
@@ -290,22 +779,19 @@ void ps2_write_keyboard(void *opaque, int val)
             ps2_queue(&s->common, KBD_REPLY_POR);
             break;
         default:
-            ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_queue(&s->common, KBD_REPLY_RESEND);
             break;
         }
         break;
     case KBD_CMD_SCANCODE:
         if (val == 0) {
-            if (s->scancode_set == 1)
-                ps2_put_keycode(s, 0x43);
-            else if (s->scancode_set == 2)
-                ps2_put_keycode(s, 0x41);
-            else if (s->scancode_set == 3)
-                ps2_put_keycode(s, 0x3f);
-        } else {
-            if (val >= 1 && val <= 3)
-                s->scancode_set = val;
             ps2_queue(&s->common, KBD_REPLY_ACK);
+            ps2_put_keycode(s, s->scancode_set);
+        } else if (val >= 1 && val <= 3) {
+            s->scancode_set = val;
+            ps2_queue(&s->common, KBD_REPLY_ACK);
+        } else {
+            ps2_queue(&s->common, KBD_REPLY_RESEND);
         }
         s->common.write_cmd = -1;
         break;
@@ -690,6 +1176,23 @@ static const VMStateDescription vmstate_ps2_keyboard_ledstate = {
     }
 };
 
+static bool ps2_keyboard_need_high_bit_needed(void *opaque)
+{
+    PS2KbdState *s = opaque;
+    return s->need_high_bit != 0; /* 0 is the usual state */
+}
+
+static const VMStateDescription vmstate_ps2_keyboard_need_high_bit = {
+    .name = "ps2kbd/need_high_bit",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = ps2_keyboard_need_high_bit_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(need_high_bit, PS2KbdState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static int ps2_kbd_post_load(void* opaque, int version_id)
 {
     PS2KbdState *s = (PS2KbdState*)opaque;
@@ -726,6 +1229,7 @@ static const VMStateDescription vmstate_ps2_keyboard = {
     },
     .subsections = (const VMStateDescription*[]) {
         &vmstate_ps2_keyboard_ledstate,
+        &vmstate_ps2_keyboard_need_high_bit,
         NULL
     }
 };
diff --git a/hw/input/tsc2005.c b/hw/input/tsc2005.c
index 9b359aaec0..eb5320af40 100644
--- a/hw/input/tsc2005.c
+++ b/hw/input/tsc2005.c
@@ -31,30 +31,31 @@ typedef struct {
     QEMUTimer *timer;
     uint16_t model;
 
-    int x, y;
-    int pressure;
+    int32_t x, y;
+    bool pressure;
 
-    int state, reg, irq, command;
+    uint8_t reg, state;
+    bool irq, command;
     uint16_t data, dav;
 
-    int busy;
-    int enabled;
-    int host_mode;
-    int function;
-    int nextfunction;
-    int precision;
-    int nextprecision;
-    int filter;
-    int pin_func;
-    int timing[2];
-    int noise;
-    int reset;
-    int pdst;
-    int pnd0;
+    bool busy;
+    bool enabled;
+    bool host_mode;
+    int8_t function;
+    int8_t nextfunction;
+    bool precision;
+    bool nextprecision;
+    uint16_t filter;
+    uint8_t pin_func;
+    uint16_t timing[2];
+    uint8_t noise;
+    bool reset;
+    bool pdst;
+    bool pnd0;
     uint16_t temp_thr[2];
     uint16_t aux_thr[2];
 
-    int tr[8];
+    int32_t tr[8];
 } TSC2005State;
 
 enum {
@@ -149,7 +150,7 @@ static uint16_t tsc2005_read(TSC2005State *s, int reg)
         ret = s->dav | (s->reset << 7) | (s->pdst << 2) | 0x0;
         s->dav &= ~(mode_regs[TSC_MODE_X_TEST] | mode_regs[TSC_MODE_Y_TEST] |
                         mode_regs[TSC_MODE_TS_TEST]);
-        s->reset = 1;
+        s->reset = true;
         return ret;
 
     case 0x8:	/* AUX high treshold */
@@ -196,14 +197,14 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
         break;
 
     case 0xc:	/* CFR0 */
-        s->host_mode = data >> 15;
+        s->host_mode = (data >> 15) != 0;
         if (s->enabled != !(data & 0x4000)) {
             s->enabled = !(data & 0x4000);
             fprintf(stderr, "%s: touchscreen sense %sabled\n",
                             __FUNCTION__, s->enabled ? "en" : "dis");
             if (s->busy && !s->enabled)
                 timer_del(s->timer);
-            s->busy &= s->enabled;
+            s->busy = s->busy && s->enabled;
         }
         s->nextprecision = (data >> 13) & 1;
         s->timing[0] = data & 0x1fff;
@@ -229,7 +230,7 @@ static void tsc2005_write(TSC2005State *s, int reg, uint16_t data)
 static void tsc2005_pin_update(TSC2005State *s)
 {
     int64_t expires;
-    int pin_state;
+    bool pin_state;
 
     switch (s->pin_func) {
     case 0:
@@ -253,7 +254,7 @@ static void tsc2005_pin_update(TSC2005State *s)
     case TSC_MODE_XYZ_SCAN:
     case TSC_MODE_XY_SCAN:
         if (!s->host_mode && s->dav)
-            s->enabled = 0;
+            s->enabled = false;
         if (!s->pressure)
             return;
         /* Fall through */
@@ -273,7 +274,7 @@ static void tsc2005_pin_update(TSC2005State *s)
     case TSC_MODE_Y_TEST:
     case TSC_MODE_TS_TEST:
         if (s->dav)
-            s->enabled = 0;
+            s->enabled = false;
         break;
 
     case TSC_MODE_RESERVED:
@@ -287,7 +288,7 @@ static void tsc2005_pin_update(TSC2005State *s)
     if (!s->enabled || s->busy)
         return;
 
-    s->busy = 1;
+    s->busy = true;
     s->precision = s->nextprecision;
     s->function = s->nextfunction;
     s->pdst = !s->pnd0;	/* Synchronised on internal clock */
@@ -300,17 +301,17 @@ static void tsc2005_reset(TSC2005State *s)
 {
     s->state = 0;
     s->pin_func = 0;
-    s->enabled = 0;
-    s->busy = 0;
-    s->nextprecision = 0;
+    s->enabled = false;
+    s->busy = false;
+    s->nextprecision = false;
     s->nextfunction = 0;
     s->timing[0] = 0;
     s->timing[1] = 0;
-    s->irq = 0;
+    s->irq = false;
     s->dav = 0;
-    s->reset = 0;
-    s->pdst = 1;
-    s->pnd0 = 0;
+    s->reset = false;
+    s->pdst = true;
+    s->pnd0 = false;
     s->function = -1;
     s->temp_thr[0] = 0x000;
     s->temp_thr[1] = 0xfff;
@@ -340,7 +341,7 @@ static uint8_t tsc2005_txrx_word(void *opaque, uint8_t value)
                                     __FUNCTION__, s->enabled ? "en" : "dis");
                     if (s->busy && !s->enabled)
                         timer_del(s->timer);
-                    s->busy &= s->enabled;
+                    s->busy = s->busy && s->enabled;
                 }
                 tsc2005_pin_update(s);
             }
@@ -407,7 +408,7 @@ static void tsc2005_timer_tick(void *opaque)
     if (!s->busy)
         return;
 
-    s->busy = 0;
+    s->busy = false;
     s->dav |= mode_regs[s->function];
     s->function = -1;
     tsc2005_pin_update(s);
@@ -434,86 +435,9 @@ static void tsc2005_touchscreen_event(void *opaque,
         tsc2005_pin_update(s);
 }
 
-static void tsc2005_save(QEMUFile *f, void *opaque)
+static int tsc2005_post_load(void *opaque, int version_id)
 {
     TSC2005State *s = (TSC2005State *) opaque;
-    int i;
-
-    qemu_put_be16(f, s->x);
-    qemu_put_be16(f, s->y);
-    qemu_put_byte(f, s->pressure);
-
-    qemu_put_byte(f, s->state);
-    qemu_put_byte(f, s->reg);
-    qemu_put_byte(f, s->command);
-
-    qemu_put_byte(f, s->irq);
-    qemu_put_be16s(f, &s->dav);
-    qemu_put_be16s(f, &s->data);
-
-    timer_put(f, s->timer);
-    qemu_put_byte(f, s->enabled);
-    qemu_put_byte(f, s->host_mode);
-    qemu_put_byte(f, s->function);
-    qemu_put_byte(f, s->nextfunction);
-    qemu_put_byte(f, s->precision);
-    qemu_put_byte(f, s->nextprecision);
-    qemu_put_be16(f, s->filter);
-    qemu_put_byte(f, s->pin_func);
-    qemu_put_be16(f, s->timing[0]);
-    qemu_put_be16(f, s->timing[1]);
-    qemu_put_be16s(f, &s->temp_thr[0]);
-    qemu_put_be16s(f, &s->temp_thr[1]);
-    qemu_put_be16s(f, &s->aux_thr[0]);
-    qemu_put_be16s(f, &s->aux_thr[1]);
-    qemu_put_be32(f, s->noise);
-    qemu_put_byte(f, s->reset);
-    qemu_put_byte(f, s->pdst);
-    qemu_put_byte(f, s->pnd0);
-
-    for (i = 0; i < 8; i ++)
-        qemu_put_be32(f, s->tr[i]);
-}
-
-static int tsc2005_load(QEMUFile *f, void *opaque, int version_id)
-{
-    TSC2005State *s = (TSC2005State *) opaque;
-    int i;
-
-    s->x = qemu_get_be16(f);
-    s->y = qemu_get_be16(f);
-    s->pressure = qemu_get_byte(f);
-
-    s->state = qemu_get_byte(f);
-    s->reg = qemu_get_byte(f);
-    s->command = qemu_get_byte(f);
-
-    s->irq = qemu_get_byte(f);
-    qemu_get_be16s(f, &s->dav);
-    qemu_get_be16s(f, &s->data);
-
-    timer_get(f, s->timer);
-    s->enabled = qemu_get_byte(f);
-    s->host_mode = qemu_get_byte(f);
-    s->function = qemu_get_byte(f);
-    s->nextfunction = qemu_get_byte(f);
-    s->precision = qemu_get_byte(f);
-    s->nextprecision = qemu_get_byte(f);
-    s->filter = qemu_get_be16(f);
-    s->pin_func = qemu_get_byte(f);
-    s->timing[0] = qemu_get_be16(f);
-    s->timing[1] = qemu_get_be16(f);
-    qemu_get_be16s(f, &s->temp_thr[0]);
-    qemu_get_be16s(f, &s->temp_thr[1]);
-    qemu_get_be16s(f, &s->aux_thr[0]);
-    qemu_get_be16s(f, &s->aux_thr[1]);
-    s->noise = qemu_get_be32(f);
-    s->reset = qemu_get_byte(f);
-    s->pdst = qemu_get_byte(f);
-    s->pnd0 = qemu_get_byte(f);
-
-    for (i = 0; i < 8; i ++)
-        s->tr[i] = qemu_get_be32(f);
 
     s->busy = timer_pending(s->timer);
     tsc2005_pin_update(s);
@@ -521,6 +445,42 @@ static int tsc2005_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+static const VMStateDescription vmstate_tsc2005 = {
+    .name = "tsc2005",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .post_load = tsc2005_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_BOOL(pressure, TSC2005State),
+        VMSTATE_BOOL(irq, TSC2005State),
+        VMSTATE_BOOL(command, TSC2005State),
+        VMSTATE_BOOL(enabled, TSC2005State),
+        VMSTATE_BOOL(host_mode, TSC2005State),
+        VMSTATE_BOOL(reset, TSC2005State),
+        VMSTATE_BOOL(pdst, TSC2005State),
+        VMSTATE_BOOL(pnd0, TSC2005State),
+        VMSTATE_BOOL(precision, TSC2005State),
+        VMSTATE_BOOL(nextprecision, TSC2005State),
+        VMSTATE_UINT8(reg, TSC2005State),
+        VMSTATE_UINT8(state, TSC2005State),
+        VMSTATE_UINT16(data, TSC2005State),
+        VMSTATE_UINT16(dav, TSC2005State),
+        VMSTATE_UINT16(filter, TSC2005State),
+        VMSTATE_INT8(nextfunction, TSC2005State),
+        VMSTATE_INT8(function, TSC2005State),
+        VMSTATE_INT32(x, TSC2005State),
+        VMSTATE_INT32(y, TSC2005State),
+        VMSTATE_TIMER_PTR(timer, TSC2005State),
+        VMSTATE_UINT8(pin_func, TSC2005State),
+        VMSTATE_UINT16_ARRAY(timing, TSC2005State, 2),
+        VMSTATE_UINT8(noise, TSC2005State),
+        VMSTATE_UINT16_ARRAY(temp_thr, TSC2005State, 2),
+        VMSTATE_UINT16_ARRAY(aux_thr, TSC2005State, 2),
+        VMSTATE_INT32_ARRAY(tr, TSC2005State, 8),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 void *tsc2005_init(qemu_irq pintdav)
 {
     TSC2005State *s;
@@ -529,8 +489,8 @@ void *tsc2005_init(qemu_irq pintdav)
             g_malloc0(sizeof(TSC2005State));
     s->x = 400;
     s->y = 240;
-    s->pressure = 0;
-    s->precision = s->nextprecision = 0;
+    s->pressure = false;
+    s->precision = s->nextprecision = false;
     s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, tsc2005_timer_tick, s);
     s->pint = pintdav;
     s->model = 0x2005;
@@ -550,7 +510,7 @@ void *tsc2005_init(qemu_irq pintdav)
                     "QEMU TSC2005-driven Touchscreen");
 
     qemu_register_reset((void *) tsc2005_reset, s);
-    register_savevm(NULL, "tsc2005", -1, 0, tsc2005_save, tsc2005_load, s);
+    vmstate_register(NULL, 0, &vmstate_tsc2005, s);
 
     return s;
 }
diff --git a/hw/input/tsc210x.c b/hw/input/tsc210x.c
index 93ca374fcd..b068343771 100644
--- a/hw/input/tsc210x.c
+++ b/hw/input/tsc210x.c
@@ -47,24 +47,25 @@ typedef struct {
     uint8_t out_fifo[16384];
     uint16_t model;
 
-    int x, y;
-    int pressure;
-
-    int state, page, offset, irq;
-    uint16_t command, dav;
-
-    int busy;
-    int enabled;
-    int host_mode;
-    int function;
-    int nextfunction;
-    int precision;
-    int nextprecision;
-    int filter;
-    int pin_func;
-    int ref;
-    int timing;
-    int noise;
+    int32_t x, y;
+    bool pressure;
+
+    uint8_t page, offset;
+    uint16_t dav;
+
+    bool state;
+    bool irq;
+    bool command;
+    bool busy;
+    bool enabled;
+    bool host_mode;
+    uint8_t function, nextfunction;
+    uint8_t precision, nextprecision;
+    uint8_t filter;
+    uint8_t pin_func;
+    uint8_t ref;
+    uint8_t timing;
+    uint8_t noise;
 
     uint16_t audio_ctrl1;
     uint16_t audio_ctrl2;
@@ -72,7 +73,7 @@ typedef struct {
     uint16_t pll[3];
     uint16_t volume;
     int64_t volume_change;
-    int softstep;
+    bool softstep;
     uint16_t dac_power;
     int64_t powerdown;
     uint16_t filter_data[0x14];
@@ -93,6 +94,7 @@ typedef struct {
         int mode;
         int intr;
     } kb;
+    int64_t now; /* Time at migration */
 } TSC210xState;
 
 static const int resolution[4] = { 12, 8, 10, 12 };
@@ -154,14 +156,14 @@ static const uint16_t mode_regs[16] = {
 
 static void tsc210x_reset(TSC210xState *s)
 {
-    s->state = 0;
+    s->state = false;
     s->pin_func = 2;
-    s->enabled = 0;
-    s->busy = 0;
+    s->enabled = false;
+    s->busy = false;
     s->nextfunction = 0;
     s->ref = 0;
     s->timing = 0;
-    s->irq = 0;
+    s->irq = false;
     s->dav = 0;
 
     s->audio_ctrl1 = 0x0000;
@@ -172,7 +174,7 @@ static void tsc210x_reset(TSC210xState *s)
     s->pll[2] = 0x1fff;
     s->volume = 0xffff;
     s->dac_power = 0x8540;
-    s->softstep = 1;
+    s->softstep = true;
     s->volume_change = 0;
     s->powerdown = 0;
     s->filter_data[0x00] = 0x6be3;
@@ -566,7 +568,7 @@ static void tsc2102_control_register_write(
         s->enabled = !(value & 0x4000);
         if (s->busy && !s->enabled)
             timer_del(s->timer);
-        s->busy &= s->enabled;
+        s->busy = s->busy && s->enabled;
         s->nextfunction = (value >> 10) & 0xf;
         s->nextprecision = (value >> 8) & 3;
         s->filter = value & 0xff;
@@ -773,7 +775,7 @@ static void tsc2102_audio_register_write(
 static void tsc210x_pin_update(TSC210xState *s)
 {
     int64_t expires;
-    int pin_state;
+    bool pin_state;
 
     switch (s->pin_func) {
     case 0:
@@ -788,7 +790,7 @@ static void tsc210x_pin_update(TSC210xState *s)
     }
 
     if (!s->enabled)
-        pin_state = 0;
+        pin_state = false;
 
     if (pin_state != s->irq) {
         s->irq = pin_state;
@@ -814,7 +816,7 @@ static void tsc210x_pin_update(TSC210xState *s)
     case TSC_MODE_TEMP1:
     case TSC_MODE_TEMP2:
         if (s->dav)
-            s->enabled = 0;
+            s->enabled = false;
         break;
 
     case TSC_MODE_AUX_SCAN:
@@ -832,7 +834,7 @@ static void tsc210x_pin_update(TSC210xState *s)
     if (!s->enabled || s->busy || s->dav)
         return;
 
-    s->busy = 1;
+    s->busy = true;
     s->precision = s->nextprecision;
     s->function = s->nextfunction;
     expires = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
@@ -867,7 +869,7 @@ static uint16_t tsc210x_read(TSC210xState *s)
 
     /* Allow sequential reads.  */
     s->offset ++;
-    s->state = 0;
+    s->state = false;
     return ret;
 }
 
@@ -878,10 +880,10 @@ static void tsc210x_write(TSC210xState *s, uint16_t value)
      * command and data every second time.
      */
     if (!s->state) {
-        s->command = value >> 15;
+        s->command = (value >> 15) != 0;
         s->page = (value >> 11) & 0x0f;
         s->offset = (value >> 5) & 0x3f;
-        s->state = 1;
+        s->state = true;
     } else {
         if (s->command)
             fprintf(stderr, "tsc210x_write: SPI overrun!\n");
@@ -901,7 +903,7 @@ static void tsc210x_write(TSC210xState *s, uint16_t value)
             }
 
         tsc210x_pin_update(s);
-        s->state = 0;
+        s->state = false;
     }
 }
 
@@ -933,7 +935,7 @@ static void tsc210x_timer_tick(void *opaque)
     if (!s->busy)
         return;
 
-    s->busy = 0;
+    s->busy = false;
     s->dav |= mode_regs[s->function];
     tsc210x_pin_update(s);
     qemu_irq_lower(s->davint);
@@ -974,108 +976,34 @@ static void tsc210x_i2s_set_rate(TSC210xState *s, int in, int out)
     s->i2s_rx_rate = in;
 }
 
-static void tsc210x_save(QEMUFile *f, void *opaque)
+static void tsc210x_pre_save(void *opaque)
 {
     TSC210xState *s = (TSC210xState *) opaque;
-    int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    int i;
-
-    qemu_put_be16(f, s->x);
-    qemu_put_be16(f, s->y);
-    qemu_put_byte(f, s->pressure);
-
-    qemu_put_byte(f, s->state);
-    qemu_put_byte(f, s->page);
-    qemu_put_byte(f, s->offset);
-    qemu_put_byte(f, s->command);
-
-    qemu_put_byte(f, s->irq);
-    qemu_put_be16s(f, &s->dav);
-
-    timer_put(f, s->timer);
-    qemu_put_byte(f, s->enabled);
-    qemu_put_byte(f, s->host_mode);
-    qemu_put_byte(f, s->function);
-    qemu_put_byte(f, s->nextfunction);
-    qemu_put_byte(f, s->precision);
-    qemu_put_byte(f, s->nextprecision);
-    qemu_put_byte(f, s->filter);
-    qemu_put_byte(f, s->pin_func);
-    qemu_put_byte(f, s->ref);
-    qemu_put_byte(f, s->timing);
-    qemu_put_be32(f, s->noise);
-
-    qemu_put_be16s(f, &s->audio_ctrl1);
-    qemu_put_be16s(f, &s->audio_ctrl2);
-    qemu_put_be16s(f, &s->audio_ctrl3);
-    qemu_put_be16s(f, &s->pll[0]);
-    qemu_put_be16s(f, &s->pll[1]);
-    qemu_put_be16s(f, &s->volume);
-    qemu_put_sbe64(f, (s->volume_change - now));
-    qemu_put_sbe64(f, (s->powerdown - now));
-    qemu_put_byte(f, s->softstep);
-    qemu_put_be16s(f, &s->dac_power);
-
-    for (i = 0; i < 0x14; i ++)
-        qemu_put_be16s(f, &s->filter_data[i]);
+    s->now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 }
 
-static int tsc210x_load(QEMUFile *f, void *opaque, int version_id)
+static int tsc210x_post_load(void *opaque, int version_id)
 {
     TSC210xState *s = (TSC210xState *) opaque;
     int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-    int i;
-
-    s->x = qemu_get_be16(f);
-    s->y = qemu_get_be16(f);
-    s->pressure = qemu_get_byte(f);
-
-    s->state = qemu_get_byte(f);
-    s->page = qemu_get_byte(f);
-    s->offset = qemu_get_byte(f);
-    s->command = qemu_get_byte(f);
 
-    s->irq = qemu_get_byte(f);
-    qemu_get_be16s(f, &s->dav);
-
-    timer_get(f, s->timer);
-    s->enabled = qemu_get_byte(f);
-    s->host_mode = qemu_get_byte(f);
-    s->function = qemu_get_byte(f);
-    if (s->function < 0 || s->function >= ARRAY_SIZE(mode_regs)) {
+    if (s->function >= ARRAY_SIZE(mode_regs)) {
         return -EINVAL;
     }
-    s->nextfunction = qemu_get_byte(f);
-    if (s->nextfunction < 0 || s->nextfunction >= ARRAY_SIZE(mode_regs)) {
+    if (s->nextfunction >= ARRAY_SIZE(mode_regs)) {
         return -EINVAL;
     }
-    s->precision = qemu_get_byte(f);
-    if (s->precision < 0 || s->precision >= ARRAY_SIZE(resolution)) {
+    if (s->precision >= ARRAY_SIZE(resolution)) {
         return -EINVAL;
     }
-    s->nextprecision = qemu_get_byte(f);
-    if (s->nextprecision < 0 || s->nextprecision >= ARRAY_SIZE(resolution)) {
+    if (s->nextprecision >= ARRAY_SIZE(resolution)) {
         return -EINVAL;
     }
-    s->filter = qemu_get_byte(f);
-    s->pin_func = qemu_get_byte(f);
-    s->ref = qemu_get_byte(f);
-    s->timing = qemu_get_byte(f);
-    s->noise = qemu_get_be32(f);
-
-    qemu_get_be16s(f, &s->audio_ctrl1);
-    qemu_get_be16s(f, &s->audio_ctrl2);
-    qemu_get_be16s(f, &s->audio_ctrl3);
-    qemu_get_be16s(f, &s->pll[0]);
-    qemu_get_be16s(f, &s->pll[1]);
-    qemu_get_be16s(f, &s->volume);
-    s->volume_change = qemu_get_sbe64(f) + now;
-    s->powerdown = qemu_get_sbe64(f) + now;
-    s->softstep = qemu_get_byte(f);
-    qemu_get_be16s(f, &s->dac_power);
-
-    for (i = 0; i < 0x14; i ++)
-        qemu_get_be16s(f, &s->filter_data[i]);
+
+    s->volume_change -= s->now;
+    s->volume_change += now;
+    s->powerdown -= s->now;
+    s->powerdown += now;
 
     s->busy = timer_pending(s->timer);
     qemu_set_irq(s->pint, !s->irq);
@@ -1084,6 +1012,60 @@ static int tsc210x_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }
 
+static VMStateField vmstatefields_tsc210x[] = {
+    VMSTATE_BOOL(enabled, TSC210xState),
+    VMSTATE_BOOL(host_mode, TSC210xState),
+    VMSTATE_BOOL(irq, TSC210xState),
+    VMSTATE_BOOL(command, TSC210xState),
+    VMSTATE_BOOL(pressure, TSC210xState),
+    VMSTATE_BOOL(softstep, TSC210xState),
+    VMSTATE_BOOL(state, TSC210xState),
+    VMSTATE_UINT16(dav, TSC210xState),
+    VMSTATE_INT32(x, TSC210xState),
+    VMSTATE_INT32(y, TSC210xState),
+    VMSTATE_UINT8(offset, TSC210xState),
+    VMSTATE_UINT8(page, TSC210xState),
+    VMSTATE_UINT8(filter, TSC210xState),
+    VMSTATE_UINT8(pin_func, TSC210xState),
+    VMSTATE_UINT8(ref, TSC210xState),
+    VMSTATE_UINT8(timing, TSC210xState),
+    VMSTATE_UINT8(noise, TSC210xState),
+    VMSTATE_UINT8(function, TSC210xState),
+    VMSTATE_UINT8(nextfunction, TSC210xState),
+    VMSTATE_UINT8(precision, TSC210xState),
+    VMSTATE_UINT8(nextprecision, TSC210xState),
+    VMSTATE_UINT16(audio_ctrl1, TSC210xState),
+    VMSTATE_UINT16(audio_ctrl2, TSC210xState),
+    VMSTATE_UINT16(audio_ctrl3, TSC210xState),
+    VMSTATE_UINT16_ARRAY(pll, TSC210xState, 3),
+    VMSTATE_UINT16(volume, TSC210xState),
+    VMSTATE_UINT16(dac_power, TSC210xState),
+    VMSTATE_INT64(volume_change, TSC210xState),
+    VMSTATE_INT64(powerdown, TSC210xState),
+    VMSTATE_INT64(now, TSC210xState),
+    VMSTATE_UINT16_ARRAY(filter_data, TSC210xState, 0x14),
+    VMSTATE_TIMER_PTR(timer, TSC210xState),
+    VMSTATE_END_OF_LIST()
+};
+
+static const VMStateDescription vmstate_tsc2102 = {
+    .name = "tsc2102",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = tsc210x_pre_save,
+    .post_load = tsc210x_post_load,
+    .fields = vmstatefields_tsc210x,
+};
+
+static const VMStateDescription vmstate_tsc2301 = {
+    .name = "tsc2301",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = tsc210x_pre_save,
+    .post_load = tsc210x_post_load,
+    .fields = vmstatefields_tsc210x,
+};
+
 uWireSlave *tsc2102_init(qemu_irq pint)
 {
     TSC210xState *s;
@@ -1125,8 +1107,7 @@ uWireSlave *tsc2102_init(qemu_irq pint)
     AUD_register_card(s->name, &s->card);
 
     qemu_register_reset((void *) tsc210x_reset, s);
-    register_savevm(NULL, s->name, -1, 0,
-                    tsc210x_save, tsc210x_load, s);
+    vmstate_register(NULL, 0, &vmstate_tsc2102, s);
 
     return &s->chip;
 }
@@ -1174,7 +1155,7 @@ uWireSlave *tsc2301_init(qemu_irq penirq, qemu_irq kbirq, qemu_irq dav)
     AUD_register_card(s->name, &s->card);
 
     qemu_register_reset((void *) tsc210x_reset, s);
-    register_savevm(NULL, s->name, -1, 0, tsc210x_save, tsc210x_load, s);
+    vmstate_register(NULL, 0, &vmstate_tsc2301, s);
 
     return &s->chip;
 }
diff --git a/hw/input/virtio-input.c b/hw/input/virtio-input.c
index ccdf7308a5..b678ee9f20 100644
--- a/hw/input/virtio-input.c
+++ b/hw/input/virtio-input.c
@@ -217,19 +217,12 @@ static void virtio_input_reset(VirtIODevice *vdev)
     }
 }
 
-static int virtio_input_load(QEMUFile *f, void *opaque, size_t size)
+static int virtio_input_post_load(void *opaque, int version_id)
 {
     VirtIOInput *vinput = opaque;
     VirtIOInputClass *vic = VIRTIO_INPUT_GET_CLASS(vinput);
     VirtIODevice *vdev = VIRTIO_DEVICE(vinput);
-    int ret;
 
-    ret = virtio_load(vdev, f, VIRTIO_INPUT_VM_VERSION);
-    if (ret) {
-        return ret;
-    }
-
-    /* post_load() */
     vinput->active = vdev->status & VIRTIO_CONFIG_S_DRIVER_OK;
     if (vic->change_active) {
         vic->change_active(vinput);
@@ -296,8 +289,16 @@ static void virtio_input_device_unrealize(DeviceState *dev, Error **errp)
     virtio_cleanup(vdev);
 }
 
-VMSTATE_VIRTIO_DEVICE(input, VIRTIO_INPUT_VM_VERSION, virtio_input_load,
-                      virtio_vmstate_save);
+static const VMStateDescription vmstate_virtio_input = {
+    .name = "virtio-input",
+    .minimum_version_id = VIRTIO_INPUT_VM_VERSION,
+    .version_id = VIRTIO_INPUT_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .post_load = virtio_input_post_load,
+};
 
 static Property virtio_input_properties[] = {
     DEFINE_PROP_STRING("serial", VirtIOInput, serial),
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 22776ea9ea..71ded9ba61 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -16,11 +16,14 @@ common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gicv3.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_dist.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_redist.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv3_its_common.o
 common-obj-$(CONFIG_OPENPIC) += openpic.o
+common-obj-y += intc.o
 
 obj-$(CONFIG_APIC) += apic.o apic_common.o
 obj-$(CONFIG_ARM_GIC_KVM) += arm_gic_kvm.o
 obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_kvm.o
+obj-$(call land,$(CONFIG_ARM_GIC_KVM),$(TARGET_AARCH64)) += arm_gicv3_its_kvm.o
 ifeq ($(CONFIG_GNU_ARM_ECLIPSE),n)
 obj-$(CONFIG_STELLARIS) += armv7m_nvic.o
 endif
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 45887d99c0..fe15fb6024 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -39,6 +39,10 @@
 
 static APICCommonState *local_apics[MAX_APICS + 1];
 
+#define TYPE_APIC "apic"
+#define APIC(obj) \
+    OBJECT_CHECK(APICCommonState, (obj), TYPE_APIC)
+
 static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode);
 static void apic_update_irq(APICCommonState *s);
 static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask,
@@ -163,7 +167,7 @@ static void apic_local_deliver(APICCommonState *s, int vector)
 
 void apic_deliver_pic_intr(DeviceState *dev, int level)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
 
     if (level) {
         apic_local_deliver(s, APIC_LVT_LINT0);
@@ -373,7 +377,7 @@ static void apic_update_irq(APICCommonState *s)
 
 void apic_poll_irq(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
 
     apic_sync_vapic(s, SYNC_FROM_VAPIC);
     apic_update_irq(s);
@@ -479,7 +483,7 @@ static void apic_startup(APICCommonState *s, int vector_num)
 
 void apic_sipi(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
 
     cpu_reset_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
 
@@ -493,7 +497,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode,
                          uint8_t delivery_mode, uint8_t vector_num,
                          uint8_t trigger_mode)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
     uint32_t deliver_bitmask[MAX_APIC_WORDS];
     int dest_shorthand = (s->icr[0] >> 18) & 3;
     APICCommonState *apic_iter;
@@ -550,7 +554,7 @@ static bool apic_check_pic(APICCommonState *s)
 
 int apic_get_interrupt(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
     int intno;
 
     /* if the APIC is installed or enabled, we let the 8259 handle the
@@ -584,7 +588,7 @@ int apic_get_interrupt(DeviceState *dev)
 
 int apic_accept_pic_intr(DeviceState *dev)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
     uint32_t lvt0;
 
     if (!s)
@@ -663,7 +667,7 @@ static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
     if (!dev) {
         return 0;
     }
-    s = APIC_COMMON(dev);
+    s = APIC(dev);
 
     index = (addr >> 4) & 0xff;
     switch(index) {
@@ -736,8 +740,10 @@ static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
     return val;
 }
 
-static void apic_send_msi(hwaddr addr, uint32_t data)
+static void apic_send_msi(MSIMessage *msi)
 {
+    uint64_t addr = msi->address;
+    uint32_t data = msi->data;
     uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
     uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
     uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
@@ -758,7 +764,8 @@ static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
          * APIC is connected directly to the CPU.
          * Mapping them on the global bus happens to work because
          * MSI registers are reserved in APIC MMIO and vice versa. */
-        apic_send_msi(addr, val);
+        MSIMessage msi = { .address = addr, .data = val };
+        apic_send_msi(&msi);
         return;
     }
 
@@ -766,7 +773,7 @@ static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
     if (!dev) {
         return;
     }
-    s = APIC_COMMON(dev);
+    s = APIC(dev);
 
     trace_apic_mem_writel(addr, val);
 
@@ -870,7 +877,7 @@ static const MemoryRegionOps apic_io_ops = {
 
 static void apic_realize(DeviceState *dev, Error **errp)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
 
     if (s->id >= MAX_APICS) {
         error_setg(errp, "%s initialization failed. APIC ID %d is invalid",
@@ -889,7 +896,7 @@ static void apic_realize(DeviceState *dev, Error **errp)
 
 static void apic_unrealize(DeviceState *dev, Error **errp)
 {
-    APICCommonState *s = APIC_COMMON(dev);
+    APICCommonState *s = APIC(dev);
 
     timer_del(s->timer);
     timer_free(s->timer);
@@ -909,10 +916,11 @@ static void apic_class_init(ObjectClass *klass, void *data)
     k->external_nmi = apic_external_nmi;
     k->pre_save = apic_pre_save;
     k->post_load = apic_post_load;
+    k->send_msi = apic_send_msi;
 }
 
 static const TypeInfo apic_info = {
-    .name          = "apic",
+    .name          = TYPE_APIC,
     .instance_size = sizeof(APICCommonState),
     .parent        = TYPE_APIC_COMMON,
     .class_init    = apic_class_init,
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 14ac43c186..d78c885509 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -18,9 +18,11 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>
  */
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
+#include "qapi/visitor.h"
 #include "hw/i386/apic.h"
 #include "hw/i386/apic_internal.h"
 #include "trace.h"
@@ -38,6 +40,11 @@ void cpu_set_apic_base(DeviceState *dev, uint64_t val)
     if (dev) {
         APICCommonState *s = APIC_COMMON(dev);
         APICCommonClass *info = APIC_COMMON_GET_CLASS(s);
+        /* switching to x2APIC, reset possibly modified xAPIC ID */
+        if (!(s->apicbase & MSR_IA32_APICBASE_EXTD) &&
+            (val & MSR_IA32_APICBASE_EXTD)) {
+            s->id = s->initial_apic_id;
+        }
         info->set_base(s, val);
     }
 }
@@ -241,6 +248,7 @@ static void apic_reset_common(DeviceState *dev)
 
     bsp = s->apicbase & MSR_IA32_APICBASE_BSP;
     s->apicbase = APIC_DEFAULT_ADDRESS | bsp | MSR_IA32_APICBASE_ENABLE;
+    s->id = s->initial_apic_id;
 
     s->vapic_paddr = 0;
     info->vapic_base_update(s);
@@ -427,7 +435,6 @@ static const VMStateDescription vmstate_apic_common = {
 };
 
 static Property apic_properties_common[] = {
-    DEFINE_PROP_UINT8("id", APICCommonState, id, -1),
     DEFINE_PROP_UINT8("version", APICCommonState, version, 0x14),
     DEFINE_PROP_BIT("vapic", APICCommonState, vapic_control, VAPIC_ENABLE_BIT,
                     true),
@@ -436,6 +443,49 @@ static Property apic_properties_common[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static void apic_common_get_id(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    APICCommonState *s = APIC_COMMON(obj);
+    int64_t value;
+
+    value = s->apicbase & MSR_IA32_APICBASE_EXTD ? s->initial_apic_id : s->id;
+    visit_type_int(v, name, &value, errp);
+}
+
+static void apic_common_set_id(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    APICCommonState *s = APIC_COMMON(obj);
+    DeviceState *dev = DEVICE(obj);
+    Error *local_err = NULL;
+    int64_t value;
+
+    if (dev->realized) {
+        qdev_prop_set_after_realize(dev, name, errp);
+        return;
+    }
+
+    visit_type_int(v, name, &value, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    s->initial_apic_id = value;
+    s->id = (uint8_t)value;
+}
+
+static void apic_common_initfn(Object *obj)
+{
+    APICCommonState *s = APIC_COMMON(obj);
+
+    s->id = s->initial_apic_id = -1;
+    object_property_add(obj, "id", "int",
+                        apic_common_get_id,
+                        apic_common_set_id, NULL, NULL, NULL);
+}
+
 static void apic_common_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -455,6 +505,7 @@ static const TypeInfo apic_common_type = {
     .name = TYPE_APIC_COMMON,
     .parent = TYPE_DEVICE,
     .instance_size = sizeof(APICCommonState),
+    .instance_init = apic_common_initfn,
     .class_size = sizeof(APICCommonClass),
     .class_init = apic_common_class_init,
     .abstract = true,
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index 8e3a9d2992..5595dec6de 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -175,6 +175,17 @@ static void gic_set_irq_11mpcore(GICState *s, int irq, int level,
     }
 }
 
+static void gic_set_irq_nvic(GICState *s, int irq, int level,
+                                 int cm, int target)
+{
+    if (level) {
+        GIC_SET_LEVEL(irq, cm);
+        GIC_SET_PENDING(irq, target);
+    } else {
+        GIC_CLEAR_LEVEL(irq, cm);
+    }
+}
+
 static void gic_set_irq_generic(GICState *s, int irq, int level,
                                 int cm, int target)
 {
@@ -220,8 +231,10 @@ static void gic_set_irq(void *opaque, int irq, int level)
         return;
     }
 
-    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+    if (s->revision == REV_11MPCORE) {
         gic_set_irq_11mpcore(s, irq, level, cm, target);
+    } else if (s->revision == REV_NVIC) {
+        gic_set_irq_nvic(s, irq, level, cm, target);
     } else {
         gic_set_irq_generic(s, irq, level, cm, target);
     }
@@ -587,7 +600,7 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs)
         return; /* No active IRQ.  */
     }
 
-    if (s->revision == REV_11MPCORE || s->revision == REV_NVIC) {
+    if (s->revision == REV_11MPCORE) {
         /* Mark level triggered interrupts as pending if they are still
            raised.  */
         if (!GIC_TEST_EDGE_TRIGGER(irq) && GIC_TEST_ENABLED(irq, cm)
@@ -595,6 +608,11 @@ void gic_complete_irq(GICState *s, int cpu, int irq, MemTxAttrs attrs)
             DPRINTF("Set %d pending mask %x\n", irq, cm);
             GIC_SET_PENDING(irq, cm);
         }
+    } else if (s->revision == REV_NVIC) {
+        if (GIC_TEST_LEVEL(irq, cm)) {
+            DPRINTF("Set nvic %d pending mask %x\n", irq, cm);
+            GIC_SET_PENDING(irq, cm);
+        }
     }
 
     group = gic_has_groups(s) && GIC_TEST_GROUP(irq, cm);
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 5593cdb3e4..11729ee902 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -30,20 +30,6 @@
 #include "gic_internal.h"
 #include "vgic_common.h"
 
-//#define DEBUG_GIC_KVM
-
-#ifdef DEBUG_GIC_KVM
-static const int debug_gic_kvm = 1;
-#else
-static const int debug_gic_kvm = 0;
-#endif
-
-#define DPRINTF(fmt, ...) do { \
-        if (debug_gic_kvm) { \
-            printf("arm_gic: " fmt , ## __VA_ARGS__); \
-        } \
-    } while (0)
-
 #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
 #define KVM_ARM_GIC(obj) \
      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
@@ -577,6 +563,18 @@ static void kvm_arm_gic_realize(DeviceState *dev, Error **errp)
                                           "not support vGICv2 migration");
         migrate_add_blocker(s->migration_blocker);
     }
+
+    if (kvm_has_gsi_routing()) {
+        /* set up irq routing */
+        kvm_init_irq_routing(kvm_state);
+        for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
+            kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
+        }
+
+        kvm_gsi_routing_allowed = true;
+
+        kvm_irqchip_commit_routes(kvm_state);
+    }
 }
 
 static void kvm_arm_gic_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 4633172bec..bca30c49da 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -454,7 +454,8 @@ static void icc_eoir_write(CPUARMState *env, const ARMCPRegInfo *ri,
     int irq = value & 0xffffff;
     int grp;
 
-    trace_gicv3_icc_eoir_write(gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_eoir_write(ri->crm == 8 ? 0 : 1,
+                               gicv3_redist_affid(cs), value);
 
     if (ri->crm == 8) {
         /* EOIR0 */
@@ -542,7 +543,7 @@ static uint64_t icc_bpr_read(CPUARMState *env, const ARMCPRegInfo *ri)
         bpr = MIN(bpr, 7);
     }
 
-    trace_gicv3_icc_bpr_read(gicv3_redist_affid(cs), bpr);
+    trace_gicv3_icc_bpr_read(ri->crm == 8 ? 0 : 1, gicv3_redist_affid(cs), bpr);
 
     return bpr;
 }
@@ -553,7 +554,8 @@ static void icc_bpr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     GICv3CPUState *cs = icc_cs_from_env(env);
     int grp = (ri->crm == 8) ? GICV3_G0 : GICV3_G1;
 
-    trace_gicv3_icc_pmr_write(gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_bpr_write(ri->crm == 8 ? 0 : 1,
+                              gicv3_redist_affid(cs), value);
 
     if (grp == GICV3_G1 && gicv3_use_ns_bank(env)) {
         grp = GICV3_G1NS;
@@ -591,7 +593,7 @@ static uint64_t icc_ap_read(CPUARMState *env, const ARMCPRegInfo *ri)
 
     value = cs->icc_apr[grp][regno];
 
-    trace_gicv3_icc_ap_read(regno, gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_ap_read(ri->crm & 1, regno, gicv3_redist_affid(cs), value);
     return value;
 }
 
@@ -603,7 +605,7 @@ static void icc_ap_write(CPUARMState *env, const ARMCPRegInfo *ri,
     int regno = ri->opc2 & 3;
     int grp = ri->crm & 1 ? GICV3_G0 : GICV3_G1;
 
-    trace_gicv3_icc_ap_write(regno, gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_ap_write(ri->crm & 1, regno, gicv3_redist_affid(cs), value);
 
     if (grp == GICV3_G1 && gicv3_use_ns_bank(env)) {
         grp = GICV3_G1NS;
@@ -820,7 +822,8 @@ static uint64_t icc_igrpen_read(CPUARMState *env, const ARMCPRegInfo *ri)
     }
 
     value = cs->icc_igrpen[grp];
-    trace_gicv3_icc_igrpen_read(gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_igrpen_read(ri->opc2 & 1 ? 1 : 0,
+                                gicv3_redist_affid(cs), value);
     return value;
 }
 
@@ -830,7 +833,8 @@ static void icc_igrpen_write(CPUARMState *env, const ARMCPRegInfo *ri,
     GICv3CPUState *cs = icc_cs_from_env(env);
     int grp = ri->opc2 & 1 ? GICV3_G1 : GICV3_G0;
 
-    trace_gicv3_icc_igrpen_write(gicv3_redist_affid(cs), value);
+    trace_gicv3_icc_igrpen_write(ri->opc2 & 1 ? 1 : 0,
+                                 gicv3_redist_affid(cs), value);
 
     if (grp == GICV3_G1 && gicv3_use_ns_bank(env)) {
         grp = GICV3_G1NS;
@@ -843,9 +847,12 @@ static void icc_igrpen_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static uint64_t icc_igrpen1_el3_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
     GICv3CPUState *cs = icc_cs_from_env(env);
+    uint64_t value;
 
     /* IGRPEN1_EL3 bits 0 and 1 are r/w aliases into IGRPEN1_EL1 NS and S */
-    return cs->icc_igrpen[GICV3_G1NS] | (cs->icc_igrpen[GICV3_G1] << 1);
+    value = cs->icc_igrpen[GICV3_G1NS] | (cs->icc_igrpen[GICV3_G1] << 1);
+    trace_gicv3_icc_igrpen1_el3_read(gicv3_redist_affid(cs), value);
+    return value;
 }
 
 static void icc_igrpen1_el3_write(CPUARMState *env, const ARMCPRegInfo *ri,
diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
new file mode 100644
index 0000000000..51d4b71f0b
--- /dev/null
+++ b/hw/intc/arm_gicv3_its_common.c
@@ -0,0 +1,150 @@
+/*
+ * ITS base class for a GICv3-based system
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Pavel Fedin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/pci/msi.h"
+#include "hw/intc/arm_gicv3_its_common.h"
+#include "qemu/log.h"
+
+static void gicv3_its_pre_save(void *opaque)
+{
+    GICv3ITSState *s = (GICv3ITSState *)opaque;
+    GICv3ITSCommonClass *c = ARM_GICV3_ITS_COMMON_GET_CLASS(s);
+
+    if (c->pre_save) {
+        c->pre_save(s);
+    }
+}
+
+static int gicv3_its_post_load(void *opaque, int version_id)
+{
+    GICv3ITSState *s = (GICv3ITSState *)opaque;
+    GICv3ITSCommonClass *c = ARM_GICV3_ITS_COMMON_GET_CLASS(s);
+
+    if (c->post_load) {
+        c->post_load(s);
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_its = {
+    .name = "arm_gicv3_its",
+    .pre_save = gicv3_its_pre_save,
+    .post_load = gicv3_its_post_load,
+    .unmigratable = true,
+};
+
+static MemTxResult gicv3_its_trans_read(void *opaque, hwaddr offset,
+                                        uint64_t *data, unsigned size,
+                                        MemTxAttrs attrs)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "ITS read at offset 0x%"PRIx64"\n", offset);
+    return MEMTX_ERROR;
+}
+
+static MemTxResult gicv3_its_trans_write(void *opaque, hwaddr offset,
+                                         uint64_t value, unsigned size,
+                                         MemTxAttrs attrs)
+{
+    if (offset == 0x0040 && ((size == 2) || (size == 4))) {
+        GICv3ITSState *s = ARM_GICV3_ITS_COMMON(opaque);
+        GICv3ITSCommonClass *c = ARM_GICV3_ITS_COMMON_GET_CLASS(s);
+        int ret = c->send_msi(s, le64_to_cpu(value), attrs.requester_id);
+
+        if (ret <= 0) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "ITS: Error sending MSI: %s\n", strerror(-ret));
+            return MEMTX_DECODE_ERROR;
+        }
+
+        return MEMTX_OK;
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "ITS write at bad offset 0x%"PRIx64"\n", offset);
+        return MEMTX_DECODE_ERROR;
+    }
+}
+
+static const MemoryRegionOps gicv3_its_trans_ops = {
+    .read_with_attrs = gicv3_its_trans_read,
+    .write_with_attrs = gicv3_its_trans_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(s);
+
+    memory_region_init_io(&s->iomem_its_cntrl, OBJECT(s), ops, s,
+                          "control", ITS_CONTROL_SIZE);
+    memory_region_init_io(&s->iomem_its_translation, OBJECT(s),
+                          &gicv3_its_trans_ops, s,
+                          "translation", ITS_TRANS_SIZE);
+
+    /* Our two regions are always adjacent, therefore we now combine them
+     * into a single one in order to make our users' life easier.
+     */
+    memory_region_init(&s->iomem_main, OBJECT(s), "gicv3_its", ITS_SIZE);
+    memory_region_add_subregion(&s->iomem_main, 0, &s->iomem_its_cntrl);
+    memory_region_add_subregion(&s->iomem_main, ITS_CONTROL_SIZE,
+                                &s->iomem_its_translation);
+    sysbus_init_mmio(sbd, &s->iomem_main);
+
+#if !defined(CONFIG_GNU_ARM_ECLIPSE)
+    msi_nonbroken = true;
+#endif
+}
+
+static void gicv3_its_common_reset(DeviceState *dev)
+{
+    GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
+
+    s->ctlr = 0;
+    s->cbaser = 0;
+    s->cwriter = 0;
+    s->creadr = 0;
+    memset(&s->baser, 0, sizeof(s->baser));
+
+    gicv3_its_post_load(s, 0);
+}
+
+static void gicv3_its_common_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = gicv3_its_common_reset;
+    dc->vmsd = &vmstate_its;
+}
+
+static const TypeInfo gicv3_its_common_info = {
+    .name = TYPE_ARM_GICV3_ITS_COMMON,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(GICv3ITSState),
+    .class_size = sizeof(GICv3ITSCommonClass),
+    .class_init = gicv3_its_common_class_init,
+    .abstract = true,
+};
+
+static void gicv3_its_common_register_types(void)
+{
+    type_register_static(&gicv3_its_common_info);
+}
+
+type_init(gicv3_its_common_register_types)
diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
new file mode 100644
index 0000000000..fc246e0cb5
--- /dev/null
+++ b/hw/intc/arm_gicv3_its_kvm.c
@@ -0,0 +1,121 @@
+/*
+ * KVM-based ITS implementation for a GICv3-based system
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Pavel Fedin <p.fedin@samsung.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/intc/arm_gicv3_its_common.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+#include "kvm_arm.h"
+#include "migration/migration.h"
+
+#define TYPE_KVM_ARM_ITS "arm-its-kvm"
+#define KVM_ARM_ITS(obj) OBJECT_CHECK(GICv3ITSState, (obj), TYPE_KVM_ARM_ITS)
+
+static int kvm_its_send_msi(GICv3ITSState *s, uint32_t value, uint16_t devid)
+{
+    struct kvm_msi msi;
+
+    if (unlikely(!s->translater_gpa_known)) {
+        MemoryRegion *mr = &s->iomem_its_translation;
+        MemoryRegionSection mrs;
+
+        mrs = memory_region_find(mr, 0, 1);
+        memory_region_unref(mrs.mr);
+        s->gits_translater_gpa = mrs.offset_within_address_space + 0x40;
+        s->translater_gpa_known = true;
+    }
+
+    msi.address_lo = extract64(s->gits_translater_gpa, 0, 32);
+    msi.address_hi = extract64(s->gits_translater_gpa, 32, 32);
+    msi.data = le32_to_cpu(value);
+    msi.flags = KVM_MSI_VALID_DEVID;
+    msi.devid = devid;
+    memset(msi.pad, 0, sizeof(msi.pad));
+
+    return kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
+}
+
+static void kvm_arm_its_realize(DeviceState *dev, Error **errp)
+{
+    GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
+
+    s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_ITS, false);
+    if (s->dev_fd < 0) {
+        error_setg_errno(errp, -s->dev_fd, "error creating in-kernel ITS");
+        return;
+    }
+
+    /* explicit init of the ITS */
+    kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+                      KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+    /* register the base address */
+    kvm_arm_register_device(&s->iomem_its_cntrl, -1, KVM_DEV_ARM_VGIC_GRP_ADDR,
+                            KVM_VGIC_ITS_ADDR_TYPE, s->dev_fd);
+
+    gicv3_its_init_mmio(s, NULL);
+
+    /*
+     * Block migration of a KVM GICv3 ITS device: the API for saving and
+     * restoring the state in the kernel is not yet available
+     */
+    error_setg(&s->migration_blocker, "vITS migration is not implemented");
+    migrate_add_blocker(s->migration_blocker);
+
+    kvm_msi_use_devid = true;
+    kvm_gsi_direct_mapping = false;
+    kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
+}
+
+static void kvm_arm_its_init(Object *obj)
+{
+    GICv3ITSState *s = KVM_ARM_ITS(obj);
+
+    object_property_add_link(obj, "parent-gicv3",
+                             "kvm-arm-gicv3", (Object **)&s->gicv3,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
+                             &error_abort);
+}
+
+static void kvm_arm_its_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    GICv3ITSCommonClass *icc = ARM_GICV3_ITS_COMMON_CLASS(klass);
+
+    dc->realize = kvm_arm_its_realize;
+    icc->send_msi = kvm_its_send_msi;
+}
+
+static const TypeInfo kvm_arm_its_info = {
+    .name = TYPE_KVM_ARM_ITS,
+    .parent = TYPE_ARM_GICV3_ITS_COMMON,
+    .instance_size = sizeof(GICv3ITSState),
+    .instance_init = kvm_arm_its_init,
+    .class_init = kvm_arm_its_class_init,
+};
+
+static void kvm_arm_its_register_types(void)
+{
+    type_register_static(&kvm_arm_its_info);
+}
+
+type_init(kvm_arm_its_register_types)
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 711fde38f3..199a439ccf 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -85,6 +85,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
     GICv3State *s = KVM_ARM_GICV3(dev);
     KVMARMGICv3Class *kgc = KVM_ARM_GICV3_GET_CLASS(s);
     Error *local_err = NULL;
+    int i;
 
     DPRINTF("kvm_arm_gicv3_realize\n");
 
@@ -127,6 +128,18 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
      */
     error_setg(&s->migration_blocker, "vGICv3 migration is not implemented");
     migrate_add_blocker(s->migration_blocker);
+
+    if (kvm_has_gsi_routing()) {
+        /* set up irq routing */
+        kvm_init_irq_routing(kvm_state);
+        for (i = 0; i < s->num_irq - GIC_INTERNAL; ++i) {
+            kvm_irqchip_add_irq_route(kvm_state, i, 0, i);
+        }
+
+        kvm_gsi_routing_allowed = true;
+
+        kvm_irqchip_commit_routes(kvm_state);
+    }
 }
 
 static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/i8259.c b/hw/intc/i8259.c
index c2607a5868..fe9ecd6bd4 100644
--- a/hw/intc/i8259.c
+++ b/hw/intc/i8259.c
@@ -29,6 +29,7 @@
 #include "qemu/timer.h"
 #include "qemu/log.h"
 #include "hw/isa/i8259_internal.h"
+#include "hw/intc/intc.h"
 
 /* debug PIC */
 //#define DEBUG_PIC
@@ -251,6 +252,35 @@ static void pic_reset(DeviceState *dev)
     pic_init_reset(s);
 }
 
+static bool pic_get_statistics(InterruptStatsProvider *obj,
+                               uint64_t **irq_counts, unsigned int *nb_irqs)
+{
+    PICCommonState *s = PIC_COMMON(obj);
+
+    if (s->master) {
+#ifdef DEBUG_IRQ_COUNT
+        *irq_counts = irq_count;
+        *nb_irqs = ARRAY_SIZE(irq_count);
+#else
+        return false;
+#endif
+    } else {
+        *irq_counts = NULL;
+        *nb_irqs = 0;
+    }
+    return true;
+}
+
+static void pic_print_info(InterruptStatsProvider *obj, Monitor *mon)
+{
+    PICCommonState *s = PIC_COMMON(obj);
+    monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d "
+                   "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n",
+                   s->master ? 0 : 1, s->irr, s->imr, s->isr, s->priority_add,
+                   s->irq_base, s->read_reg_select, s->elcr,
+                   s->special_fully_nested_mode);
+}
+
 static void pic_ioport_write(void *opaque, hwaddr addr64,
                              uint64_t val64, unsigned size)
 {
@@ -431,42 +461,6 @@ static void pic_realize(DeviceState *dev, Error **errp)
     pc->parent_realize(dev, errp);
 }
 
-void hmp_info_pic(Monitor *mon, const QDict *qdict)
-{
-    int i;
-    PICCommonState *s;
-
-    if (!isa_pic) {
-        return;
-    }
-    for (i = 0; i < 2; i++) {
-        s = i == 0 ? PIC_COMMON(isa_pic) : slave_pic;
-        monitor_printf(mon, "pic%d: irr=%02x imr=%02x isr=%02x hprio=%d "
-                       "irq_base=%02x rr_sel=%d elcr=%02x fnm=%d\n",
-                       i, s->irr, s->imr, s->isr, s->priority_add,
-                       s->irq_base, s->read_reg_select, s->elcr,
-                       s->special_fully_nested_mode);
-    }
-}
-
-void hmp_info_irq(Monitor *mon, const QDict *qdict)
-{
-#ifndef DEBUG_IRQ_COUNT
-    monitor_printf(mon, "irq statistic code not compiled.\n");
-#else
-    int i;
-    int64_t count;
-
-    monitor_printf(mon, "IRQ statistics:\n");
-    for (i = 0; i < 16; i++) {
-        count = irq_count[i];
-        if (count > 0) {
-            monitor_printf(mon, "%2d: %" PRId64 "\n", i, count);
-        }
-    }
-#endif
-}
-
 qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq)
 {
     qemu_irq *irq_set;
@@ -503,10 +497,13 @@ static void i8259_class_init(ObjectClass *klass, void *data)
 {
     PICClass *k = PIC_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
+    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass);
 
     k->parent_realize = dc->realize;
     dc->realize = pic_realize;
     dc->reset = pic_reset;
+    ic->get_statistics = pic_get_statistics;
+    ic->print_info = pic_print_info;
 }
 
 static const TypeInfo i8259_info = {
@@ -515,6 +512,10 @@ static const TypeInfo i8259_info = {
     .parent     = TYPE_PIC_COMMON,
     .class_init = i8259_class_init,
     .class_size = sizeof(PICClass),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { }
+    },
 };
 
 static void pic_register_types(void)
diff --git a/hw/intc/i8259_common.c b/hw/intc/i8259_common.c
index 3a850b0c66..d9a5e8b217 100644
--- a/hw/intc/i8259_common.c
+++ b/hw/intc/i8259_common.c
@@ -70,10 +70,11 @@ static int pic_dispatch_post_load(void *opaque, int version_id)
 static void pic_common_realize(DeviceState *dev, Error **errp)
 {
     PICCommonState *s = PIC_COMMON(dev);
+    ISADevice *isa = ISA_DEVICE(dev);
 
-    isa_register_ioport(NULL, &s->base_io, s->iobase);
+    isa_register_ioport(isa, &s->base_io, s->iobase);
     if (s->elcr_addr != -1) {
-        isa_register_ioport(NULL, &s->elcr_io, s->elcr_addr);
+        isa_register_ioport(isa, &s->elcr_io, s->elcr_addr);
     }
 
     qdev_set_legacy_instance_id(dev, s->iobase, 1);
diff --git a/hw/intc/intc.c b/hw/intc/intc.c
new file mode 100644
index 0000000000..2e1e29e753
--- /dev/null
+++ b/hw/intc/intc.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU Generic Interrupt Controller
+ *
+ * Copyright (c) 2016 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/intc/intc.h"
+#include "qemu/module.h"
+
+static const TypeInfo intctrl_info = {
+    .name = TYPE_INTERRUPT_STATS_PROVIDER,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(InterruptStatsProviderClass),
+};
+
+static void intc_register_types(void)
+{
+    type_register_static(&intctrl_info);
+}
+
+type_init(intc_register_types)
+
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 31791b0986..fd9208fde0 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -416,7 +416,7 @@ static void ioapic_realize(DeviceState *dev, Error **errp)
 }
 
 static Property ioapic_properties[] = {
-    DEFINE_PROP_UINT8("version", IOAPICCommonState, version, 0x11),
+    DEFINE_PROP_UINT8("version", IOAPICCommonState, version, 0x20),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/intc/lm32_pic.c b/hw/intc/lm32_pic.c
index 3dad01c5ba..09e15115fb 100644
--- a/hw/intc/lm32_pic.c
+++ b/hw/intc/lm32_pic.c
@@ -25,6 +25,7 @@
 #include "hw/sysbus.h"
 #include "trace.h"
 #include "hw/lm32/lm32_pic.h"
+#include "hw/intc/intc.h"
 
 #define TYPE_LM32_PIC "lm32-pic"
 #define LM32_PIC(obj) OBJECT_CHECK(LM32PicState, (obj), TYPE_LM32_PIC)
@@ -38,39 +39,10 @@ struct LM32PicState {
     uint32_t irq_state;
 
     /* statistics */
-    uint32_t stats_irq_count[32];
+    uint64_t stats_irq_count[32];
 };
 typedef struct LM32PicState LM32PicState;
 
-static LM32PicState *pic;
-void lm32_hmp_info_pic(Monitor *mon, const QDict *qdict)
-{
-    if (pic == NULL) {
-        return;
-    }
-
-    monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n",
-            pic->im, pic->ip, pic->irq_state);
-}
-
-void lm32_hmp_info_irq(Monitor *mon, const QDict *qdict)
-{
-    int i;
-    uint32_t count;
-
-    if (pic == NULL) {
-        return;
-    }
-
-    monitor_printf(mon, "IRQ statistics:\n");
-    for (i = 0; i < 32; i++) {
-        count = pic->stats_irq_count[i];
-        if (count > 0) {
-            monitor_printf(mon, "%2d: %u\n", i, count);
-        }
-    }
-}
-
 static void update_irq(LM32PicState *s)
 {
     s->ip |= s->irq_state;
@@ -152,6 +124,22 @@ static void pic_reset(DeviceState *d)
     }
 }
 
+static bool lm32_get_statistics(InterruptStatsProvider *obj,
+                                uint64_t **irq_counts, unsigned int *nb_irqs)
+{
+    LM32PicState *s = LM32_PIC(obj);
+    *irq_counts = s->stats_irq_count;
+    *nb_irqs = ARRAY_SIZE(s->stats_irq_count);
+    return true;
+}
+
+static void lm32_print_info(InterruptStatsProvider *obj, Monitor *mon)
+{
+    LM32PicState *s = LM32_PIC(obj);
+    monitor_printf(mon, "lm32-pic: im=%08x ip=%08x irq_state=%08x\n",
+            s->im, s->ip, s->irq_state);
+}
+
 static void lm32_pic_init(Object *obj)
 {
     DeviceState *dev = DEVICE(obj);
@@ -160,19 +148,17 @@ static void lm32_pic_init(Object *obj)
 
     qdev_init_gpio_in(dev, irq_handler, 32);
     sysbus_init_irq(sbd, &s->parent_irq);
-
-    pic = s;
 }
 
 static const VMStateDescription vmstate_lm32_pic = {
     .name = "lm32-pic",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32(im, LM32PicState),
         VMSTATE_UINT32(ip, LM32PicState),
         VMSTATE_UINT32(irq_state, LM32PicState),
-        VMSTATE_UINT32_ARRAY(stats_irq_count, LM32PicState, 32),
+        VMSTATE_UINT64_ARRAY(stats_irq_count, LM32PicState, 32),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -180,9 +166,12 @@ static const VMStateDescription vmstate_lm32_pic = {
 static void lm32_pic_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass);
 
     dc->reset = pic_reset;
     dc->vmsd = &vmstate_lm32_pic;
+    ic->get_statistics = lm32_get_statistics;
+    ic->print_info = lm32_print_info;
 }
 
 static const TypeInfo lm32_pic_info = {
@@ -191,6 +180,10 @@ static const TypeInfo lm32_pic_info = {
     .instance_size = sizeof(LM32PicState),
     .instance_init = lm32_pic_init,
     .class_init    = lm32_pic_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { }
+    },
 };
 
 static void lm32_pic_register_types(void)
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index fef808011f..21ac2e2dcd 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -280,12 +280,13 @@ static void kvm_s390_release_adapter_routes(S390FLICState *fs,
  * kvm_flic_save - Save pending floating interrupts
  * @f: QEMUFile containing migration state
  * @opaque: pointer to flic device state
+ * @size: ignored
  *
  * Note: Pass buf and len to kernel. Start with one page and
  * increase until buffer is sufficient or maxium size is
  * reached
  */
-static void kvm_flic_save(QEMUFile *f, void *opaque)
+static void kvm_flic_save(QEMUFile *f, void *opaque, size_t size)
 {
     KVMS390FLICState *flic = opaque;
     int len = FLIC_SAVE_INITIAL_SIZE;
@@ -324,24 +325,19 @@ static void kvm_flic_save(QEMUFile *f, void *opaque)
  * kvm_flic_load - Load pending floating interrupts
  * @f: QEMUFile containing migration state
  * @opaque: pointer to flic device state
- * @version_id: version id for migration
+ * @size: ignored
  *
  * Returns: value of flic_enqueue_irqs, -EINVAL on error
  * Note: Do nothing when no interrupts where stored
  * in QEMUFile
  */
-static int kvm_flic_load(QEMUFile *f, void *opaque, int version_id)
+static int kvm_flic_load(QEMUFile *f, void *opaque, size_t size)
 {
     uint64_t len = 0;
     uint64_t count = 0;
     void *buf = NULL;
     int r = 0;
 
-    if (version_id != FLIC_SAVEVM_VERSION) {
-        r = -EINVAL;
-        goto out;
-    }
-
     flic_enable_pfault((struct KVMS390FLICState *) opaque);
 
     count = qemu_get_be64(f);
@@ -372,6 +368,24 @@ static int kvm_flic_load(QEMUFile *f, void *opaque, int version_id)
     return r;
 }
 
+static const VMStateDescription kvm_s390_flic_vmstate = {
+    .name = "s390-flic",
+    .version_id = FLIC_SAVEVM_VERSION,
+    .minimum_version_id = FLIC_SAVEVM_VERSION,
+    .fields = (VMStateField[]) {
+        {
+            .name = "irqs",
+            .info = &(const VMStateInfo) {
+                .name = "irqs",
+                .get = kvm_flic_load,
+                .put = kvm_flic_save,
+            },
+            .flags = VMS_SINGLE,
+        },
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
 {
     KVMS390FLICState *flic_state = KVM_S390_FLIC(dev);
@@ -398,16 +412,6 @@ static void kvm_s390_flic_realize(DeviceState *dev, Error **errp)
     flic_state->clear_io_supported = !ioctl(flic_state->fd,
                                             KVM_HAS_DEVICE_ATTR, test_attr);
 
-    /* Register savevm handler for floating interrupts */
-    register_savevm(NULL, "s390-flic", 0, 1, kvm_flic_save,
-                    kvm_flic_load, (void *) flic_state);
-}
-
-static void kvm_s390_flic_unrealize(DeviceState *dev, Error **errp)
-{
-    KVMS390FLICState *flic_state = KVM_S390_FLIC(dev);
-
-    unregister_savevm(DEVICE(flic_state), "s390-flic", flic_state);
 }
 
 static void kvm_s390_flic_reset(DeviceState *dev)
@@ -438,7 +442,7 @@ static void kvm_s390_flic_class_init(ObjectClass *oc, void *data)
     S390FLICStateClass *fsc = S390_FLIC_COMMON_CLASS(oc);
 
     dc->realize = kvm_s390_flic_realize;
-    dc->unrealize = kvm_s390_flic_unrealize;
+    dc->vmsd = &kvm_s390_flic_vmstate;
     dc->reset = kvm_s390_flic_reset;
     fsc->register_io_adapter = kvm_s390_register_io_adapter;
     fsc->io_adapter_map = kvm_s390_io_adapter_map;
diff --git a/hw/intc/slavio_intctl.c b/hw/intc/slavio_intctl.c
index e82e893628..84e0bee4a9 100644
--- a/hw/intc/slavio_intctl.c
+++ b/hw/intc/slavio_intctl.c
@@ -26,6 +26,7 @@
 #include "hw/sparc/sun4m.h"
 #include "monitor/monitor.h"
 #include "hw/sysbus.h"
+#include "hw/intc/intc.h"
 #include "trace.h"
 
 //#define DEBUG_IRQ_COUNT
@@ -210,38 +211,6 @@ static const MemoryRegionOps slavio_intctlm_mem_ops = {
     },
 };
 
-void slavio_pic_info(Monitor *mon, DeviceState *dev)
-{
-    SLAVIO_INTCTLState *s = SLAVIO_INTCTL(dev);
-    int i;
-
-    for (i = 0; i < MAX_CPUS; i++) {
-        monitor_printf(mon, "per-cpu %d: pending 0x%08x\n", i,
-                       s->slaves[i].intreg_pending);
-    }
-    monitor_printf(mon, "master: pending 0x%08x, disabled 0x%08x\n",
-                   s->intregm_pending, s->intregm_disabled);
-}
-
-void slavio_irq_info(Monitor *mon, DeviceState *dev)
-{
-#ifndef DEBUG_IRQ_COUNT
-    monitor_printf(mon, "irq statistic code not compiled.\n");
-#else
-    SLAVIO_INTCTLState *s = SLAVIO_INTCTL(dev);
-    int i;
-    int64_t count;
-
-    s = SLAVIO_INTCTL(dev);
-    monitor_printf(mon, "IRQ statistics:\n");
-    for (i = 0; i < 32; i++) {
-        count = s->irq_count[i];
-        if (count > 0)
-            monitor_printf(mon, "%2d: %" PRId64 "\n", i, count);
-    }
-#endif
-}
-
 static const uint32_t intbit_to_level[] = {
     2, 3, 5, 7, 9, 11, 13, 2,   3, 5, 7, 9, 11, 13, 12, 12,
     6, 13, 4, 10, 8, 9, 11, 0,  0, 0, 0, 15, 15, 15, 15, 0,
@@ -418,6 +387,31 @@ static void slavio_intctl_reset(DeviceState *d)
     slavio_check_interrupts(s, 0);
 }
 
+#ifdef DEBUG_IRQ_COUNT
+static bool slavio_intctl_get_statistics(InterruptStatsProvider *obj,
+                                         uint64_t **irq_counts,
+                                         unsigned int *nb_irqs)
+{
+    SLAVIO_INTCTLState *s = SLAVIO_INTCTL(obj);
+    *irq_counts = s->irq_count;
+    *nb_irqs = ARRAY_SIZE(s->irq_count);
+    return true;
+}
+#endif
+
+static void slavio_intctl_print_info(InterruptStatsProvider *obj, Monitor *mon)
+{
+    SLAVIO_INTCTLState *s = SLAVIO_INTCTL(obj);
+    int i;
+
+    for (i = 0; i < MAX_CPUS; i++) {
+        monitor_printf(mon, "per-cpu %d: pending 0x%08x\n", i,
+                       s->slaves[i].intreg_pending);
+    }
+    monitor_printf(mon, "master: pending 0x%08x, disabled 0x%08x\n",
+                   s->intregm_pending, s->intregm_disabled);
+}
+
 static void slavio_intctl_init(Object *obj)
 {
     DeviceState *dev = DEVICE(obj);
@@ -449,9 +443,14 @@ static void slavio_intctl_init(Object *obj)
 static void slavio_intctl_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
+    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(klass);
 
     dc->reset = slavio_intctl_reset;
     dc->vmsd = &vmstate_intctl;
+#ifdef DEBUG_IRQ_COUNT
+    ic->get_statistics = slavio_intctl_get_statistics;
+#endif
+    ic->print_info = slavio_intctl_print_info;
 }
 
 static const TypeInfo slavio_intctl_info = {
@@ -460,6 +459,10 @@ static const TypeInfo slavio_intctl_info = {
     .instance_size = sizeof(SLAVIO_INTCTLState),
     .instance_init = slavio_intctl_init,
     .class_init    = slavio_intctl_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { }
+    },
 };
 
 static void slavio_intctl_register_types(void)
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index f12192c082..340f617761 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -50,16 +50,17 @@ xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR %#"PRIx3
 xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr) "icp_eoi: server %d given XIRR %#"PRIx32" new XIRR %#"PRIx32
 xics_icp_irq(int server, int nr, uint8_t priority) "cpu %d trying to deliver irq %#"PRIx32" priority %#x"
 xics_icp_raise(uint32_t xirr, uint8_t pending_priority) "raising IRQ new XIRR=%#x new pending priority=%#x"
-xics_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]"
+xics_ics_simple_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]"
 xics_masked_pending(void) "set_irq_msi: masked pending"
-xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
-xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
-xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
-xics_ics_eoi(int nr) "ics_eoi: irq %#x"
-xics_alloc(int src, int irq) "source#%d, irq %d"
-xics_alloc_block(int src, int first, int num, bool lsi, int align) "source#%d, first irq %d, %d irqs, lsi=%d, alignnum %d"
+xics_ics_simple_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
+xics_ics_simple_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
+xics_ics_simple_reject(int nr, int srcno) "reject irq %#x [src %d]"
+xics_ics_simple_eoi(int nr) "ics_eoi: irq %#x"
+xics_alloc(int irq) "irq %d"
+xics_alloc_block(int first, int num, bool lsi, int align) "first irq %d, %d irqs, lsi=%d, alignnum %d"
 xics_ics_free(int src, int irq, int num) "Source#%d, first irq %d, %d irqs"
 xics_ics_free_warn(int src, int irq) "Source#%d, irq %d is already free"
+xics_icp_post_load(uint32_t server_no, uint32_t xirr, uint64_t addr, uint8_t pend) "server_no %d, xirr %#x, xirr_owner 0x%" PRIx64 ", pending %d"
 
 # hw/intc/s390_flic_kvm.c
 flic_create_device(int err) "flic: create device failed %d"
@@ -84,12 +85,12 @@ gic_acknowledge_irq(int cpu, int irq) "cpu %d acknowledged irq %d"
 # hw/intc/arm_gicv3_cpuif.c
 gicv3_icc_pmr_read(uint32_t cpu, uint64_t val) "GICv3 ICC_PMR read cpu %x value 0x%" PRIx64
 gicv3_icc_pmr_write(uint32_t cpu, uint64_t val) "GICv3 ICC_PMR write cpu %x value 0x%" PRIx64
-gicv3_icc_bpr_read(uint32_t cpu, uint64_t val) "GICv3 ICC_BPR read cpu %x value 0x%" PRIx64
-gicv3_icc_bpr_write(uint32_t cpu, uint64_t val) "GICv3 ICC_BPR write cpu %x value 0x%" PRIx64
-gicv3_icc_ap_read(int regno, uint32_t cpu, uint64_t val) "GICv3 ICC_AP%dR read cpu %x value 0x%" PRIx64
-gicv3_icc_ap_write(int regno, uint32_t cpu, uint64_t val) "GICv3 ICC_AP%dR write cpu %x value 0x%" PRIx64
-gicv3_icc_igrpen_read(uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN read cpu %x value 0x%" PRIx64
-gicv3_icc_igrpen_write(uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN write cpu %x value 0x%" PRIx64
+gicv3_icc_bpr_read(int grp, uint32_t cpu, uint64_t val) "GICv3 ICC_BPR%d read cpu %x value 0x%" PRIx64
+gicv3_icc_bpr_write(int grp, uint32_t cpu, uint64_t val) "GICv3 ICC_BPR%d write cpu %x value 0x%" PRIx64
+gicv3_icc_ap_read(int grp, int regno, uint32_t cpu, uint64_t val) "GICv3 ICC_AP%dR%d read cpu %x value 0x%" PRIx64
+gicv3_icc_ap_write(int grp, int regno, uint32_t cpu, uint64_t val) "GICv3 ICC_AP%dR%d write cpu %x value 0x%" PRIx64
+gicv3_icc_igrpen_read(int grp, uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN%d read cpu %x value 0x%" PRIx64
+gicv3_icc_igrpen_write(int grp, uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN%d write cpu %x value 0x%" PRIx64
 gicv3_icc_igrpen1_el3_read(uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN1_EL3 read cpu %x value 0x%" PRIx64
 gicv3_icc_igrpen1_el3_write(uint32_t cpu, uint64_t val) "GICv3 ICC_IGRPEN1_EL3 write cpu %x value 0x%" PRIx64
 gicv3_icc_ctlr_read(uint32_t cpu, uint64_t val) "GICv3 ICC_CTLR read cpu %x value 0x%" PRIx64
@@ -101,7 +102,7 @@ gicv3_cpuif_set_irqs(uint32_t cpuid, int fiqlevel, int irqlevel) "GICv3 CPU i/f
 gicv3_icc_generate_sgi(uint32_t cpuid, int irq, int irm, uint32_t aff, uint32_t targetlist) "GICv3 CPU i/f %x generating SGI %d IRM %d target affinity 0x%xxx targetlist 0x%x"
 gicv3_icc_iar0_read(uint32_t cpu, uint64_t val) "GICv3 ICC_IAR0 read cpu %x value 0x%" PRIx64
 gicv3_icc_iar1_read(uint32_t cpu, uint64_t val) "GICv3 ICC_IAR1 read cpu %x value 0x%" PRIx64
-gicv3_icc_eoir_write(uint32_t cpu, uint64_t val) "GICv3 ICC_EOIR write cpu %x value 0x%" PRIx64
+gicv3_icc_eoir_write(int grp, uint32_t cpu, uint64_t val) "GICv3 ICC_EOIR%d write cpu %x value 0x%" PRIx64
 gicv3_icc_hppir0_read(uint32_t cpu, uint64_t val) "GICv3 ICC_HPPIR0 read cpu %x value 0x%" PRIx64
 gicv3_icc_hppir1_read(uint32_t cpu, uint64_t val) "GICv3 ICC_HPPIR1 read cpu %x value 0x%" PRIx64
 gicv3_icc_dir_write(uint32_t cpu, uint64_t val) "GICv3 ICC_DIR write cpu %x value 0x%" PRIx64
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index cd48f42046..095c16a300 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -35,6 +35,8 @@
 #include "hw/ppc/xics.h"
 #include "qemu/error-report.h"
 #include "qapi/visitor.h"
+#include "monitor/monitor.h"
+#include "hw/intc/intc.h"
 
 int xics_get_cpu_index_by_dt_id(int cpu_dt_id)
 {
@@ -90,19 +92,63 @@ void xics_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     }
 }
 
+static void xics_common_pic_print_info(InterruptStatsProvider *obj,
+                                       Monitor *mon)
+{
+    XICSState *xics = XICS_COMMON(obj);
+    ICSState *ics;
+    uint32_t i;
+
+    for (i = 0; i < xics->nr_servers; i++) {
+        ICPState *icp = &xics->ss[i];
+
+        if (!icp->output) {
+            continue;
+        }
+        monitor_printf(mon, "CPU %d XIRR=%08x (%p) PP=%02x MFRR=%02x\n",
+                       i, icp->xirr, icp->xirr_owner,
+                       icp->pending_priority, icp->mfrr);
+    }
+
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        monitor_printf(mon, "ICS %4x..%4x %p\n",
+                       ics->offset, ics->offset + ics->nr_irqs - 1, ics);
+
+        if (!ics->irqs) {
+            continue;
+        }
+
+        for (i = 0; i < ics->nr_irqs; i++) {
+            ICSIRQState *irq = ics->irqs + i;
+
+            if (!(irq->flags & XICS_FLAGS_IRQ_MASK)) {
+                continue;
+            }
+            monitor_printf(mon, "  %4x %s %02x %02x\n",
+                           ics->offset + i,
+                           (irq->flags & XICS_FLAGS_IRQ_LSI) ?
+                           "LSI" : "MSI",
+                           irq->priority, irq->status);
+        }
+    }
+}
+
 /*
  * XICS Common class - parent for emulated XICS and KVM-XICS
  */
 static void xics_common_reset(DeviceState *d)
 {
     XICSState *xics = XICS_COMMON(d);
+    ICSState *ics;
     int i;
 
     for (i = 0; i < xics->nr_servers; i++) {
         device_reset(DEVICE(&xics->ss[i]));
     }
 
-    device_reset(DEVICE(xics->ics));
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        device_reset(DEVICE(ics));
+    }
 }
 
 static void xics_prop_get_nr_irqs(Object *obj, Visitor *v, const char *name,
@@ -134,10 +180,28 @@ static void xics_prop_set_nr_irqs(Object *obj, Visitor *v, const char *name,
     }
 
     assert(info->set_nr_irqs);
-    assert(xics->ics);
     info->set_nr_irqs(xics, value, errp);
 }
 
+void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
+                         const char *typename, Error **errp)
+{
+    int i;
+
+    xics->nr_servers = nr_servers;
+
+    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
+    for (i = 0; i < xics->nr_servers; i++) {
+        char name[32];
+        ICPState *icp = &xics->ss[i];
+
+        object_initialize(icp, sizeof(*icp), typename);
+        snprintf(name, sizeof(name), "icp[%d]", i);
+        object_property_add_child(OBJECT(xics), name, OBJECT(icp), errp);
+        icp->xics = xics;
+    }
+}
+
 static void xics_prop_get_nr_servers(Object *obj, Visitor *v,
                                      const char *name, void *opaque,
                                      Error **errp)
@@ -153,7 +217,7 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
                                      Error **errp)
 {
     XICSState *xics = XICS_COMMON(obj);
-    XICSStateClass *info = XICS_COMMON_GET_CLASS(xics);
+    XICSStateClass *xsc = XICS_COMMON_GET_CLASS(xics);
     Error *error = NULL;
     int64_t value;
 
@@ -168,12 +232,15 @@ static void xics_prop_set_nr_servers(Object *obj, Visitor *v,
         return;
     }
 
-    assert(info->set_nr_servers);
-    info->set_nr_servers(xics, value, errp);
+    assert(xsc->set_nr_servers);
+    xsc->set_nr_servers(xics, value, errp);
 }
 
 static void xics_common_initfn(Object *obj)
 {
+    XICSState *xics = XICS_COMMON(obj);
+
+    QLIST_INIT(&xics->ics);
     object_property_add(obj, "nr_irqs", "int",
                         xics_prop_get_nr_irqs, xics_prop_set_nr_irqs,
                         NULL, NULL, NULL);
@@ -185,8 +252,10 @@ static void xics_common_initfn(Object *obj)
 static void xics_common_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
+    InterruptStatsProviderClass *ic = INTERRUPT_STATS_PROVIDER_CLASS(oc);
 
     dc->reset = xics_common_reset;
+    ic->print_info = xics_common_pic_print_info;
 }
 
 static const TypeInfo xics_common_info = {
@@ -196,6 +265,10 @@ static const TypeInfo xics_common_info = {
     .class_size    = sizeof(XICSStateClass),
     .instance_init = xics_common_initfn,
     .class_init    = xics_common_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { }
+    },
 };
 
 /*
@@ -208,42 +281,65 @@ static const TypeInfo xics_common_info = {
 #define XISR(ss)   (((ss)->xirr) & XISR_MASK)
 #define CPPR(ss)   (((ss)->xirr) >> 24)
 
-static void ics_reject(ICSState *ics, int nr);
-static void ics_resend(ICSState *ics);
-static void ics_eoi(ICSState *ics, int nr);
+static void ics_reject(ICSState *ics, uint32_t nr)
+{
+    ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
+
+    if (k->reject) {
+        k->reject(ics, nr);
+    }
+}
+
+static void ics_resend(ICSState *ics)
+{
+    ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
 
-static void icp_check_ipi(XICSState *xics, int server)
+    if (k->resend) {
+        k->resend(ics);
+    }
+}
+
+static void ics_eoi(ICSState *ics, int nr)
 {
-    ICPState *ss = xics->ss + server;
+    ICSStateClass *k = ICS_BASE_GET_CLASS(ics);
 
+    if (k->eoi) {
+        k->eoi(ics, nr);
+    }
+}
+
+static void icp_check_ipi(ICPState *ss)
+{
     if (XISR(ss) && (ss->pending_priority <= ss->mfrr)) {
         return;
     }
 
-    trace_xics_icp_check_ipi(server, ss->mfrr);
+    trace_xics_icp_check_ipi(ss->cs->cpu_index, ss->mfrr);
 
-    if (XISR(ss)) {
-        ics_reject(xics->ics, XISR(ss));
+    if (XISR(ss) && ss->xirr_owner) {
+        ics_reject(ss->xirr_owner, XISR(ss));
     }
 
     ss->xirr = (ss->xirr & ~XISR_MASK) | XICS_IPI;
     ss->pending_priority = ss->mfrr;
+    ss->xirr_owner = NULL;
     qemu_irq_raise(ss->output);
 }
 
-static void icp_resend(XICSState *xics, int server)
+static void icp_resend(ICPState *ss)
 {
-    ICPState *ss = xics->ss + server;
+    ICSState *ics;
 
     if (ss->mfrr < CPPR(ss)) {
-        icp_check_ipi(xics, server);
+        icp_check_ipi(ss);
+    }
+    QLIST_FOREACH(ics, &ss->xics->ics, list) {
+        ics_resend(ics);
     }
-    ics_resend(xics->ics);
 }
 
-void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
+void icp_set_cppr(ICPState *ss, uint8_t cppr)
 {
-    ICPState *ss = xics->ss + server;
     uint8_t old_cppr;
     uint32_t old_xisr;
 
@@ -256,22 +352,23 @@ void icp_set_cppr(XICSState *xics, int server, uint8_t cppr)
             ss->xirr &= ~XISR_MASK; /* Clear XISR */
             ss->pending_priority = 0xff;
             qemu_irq_lower(ss->output);
-            ics_reject(xics->ics, old_xisr);
+            if (ss->xirr_owner) {
+                ics_reject(ss->xirr_owner, old_xisr);
+                ss->xirr_owner = NULL;
+            }
         }
     } else {
         if (!XISR(ss)) {
-            icp_resend(xics, server);
+            icp_resend(ss);
         }
     }
 }
 
-void icp_set_mfrr(XICSState *xics, int server, uint8_t mfrr)
+void icp_set_mfrr(ICPState *ss, uint8_t mfrr)
 {
-    ICPState *ss = xics->ss + server;
-
     ss->mfrr = mfrr;
     if (mfrr < CPPR(ss)) {
-        icp_check_ipi(xics, server);
+        icp_check_ipi(ss);
     }
 }
 
@@ -282,6 +379,7 @@ uint32_t icp_accept(ICPState *ss)
     qemu_irq_lower(ss->output);
     ss->xirr = ss->pending_priority << 24;
     ss->pending_priority = 0xff;
+    ss->xirr_owner = NULL;
 
     trace_xics_icp_accept(xirr, ss->xirr);
 
@@ -296,33 +394,42 @@ uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr)
     return ss->xirr;
 }
 
-void icp_eoi(XICSState *xics, int server, uint32_t xirr)
+void icp_eoi(ICPState *ss, uint32_t xirr)
 {
-    ICPState *ss = xics->ss + server;
+    ICSState *ics;
+    uint32_t irq;
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
-    trace_xics_icp_eoi(server, xirr, ss->xirr);
-    ics_eoi(xics->ics, xirr & XISR_MASK);
+    trace_xics_icp_eoi(ss->cs->cpu_index, xirr, ss->xirr);
+    irq = xirr & XISR_MASK;
+    QLIST_FOREACH(ics, &ss->xics->ics, list) {
+        if (ics_valid_irq(ics, irq)) {
+            ics_eoi(ics, irq);
+        }
+    }
     if (!XISR(ss)) {
-        icp_resend(xics, server);
+        icp_resend(ss);
     }
 }
 
-static void icp_irq(XICSState *xics, int server, int nr, uint8_t priority)
+static void icp_irq(ICSState *ics, int server, int nr, uint8_t priority)
 {
+    XICSState *xics = ics->xics;
     ICPState *ss = xics->ss + server;
 
     trace_xics_icp_irq(server, nr, priority);
 
     if ((priority >= CPPR(ss))
         || (XISR(ss) && (ss->pending_priority <= priority))) {
-        ics_reject(xics->ics, nr);
+        ics_reject(ics, nr);
     } else {
-        if (XISR(ss)) {
-            ics_reject(xics->ics, XISR(ss));
+        if (XISR(ss) && ss->xirr_owner) {
+            ics_reject(ss->xirr_owner, XISR(ss));
+            ss->xirr_owner = NULL;
         }
         ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
+        ss->xirr_owner = ics;
         ss->pending_priority = priority;
         trace_xics_icp_raise(ss->xirr, ss->pending_priority);
         qemu_irq_raise(ss->output);
@@ -397,7 +504,7 @@ static const TypeInfo icp_info = {
 /*
  * ICS: Source layer
  */
-static void resend_msi(ICSState *ics, int srcno)
+static void ics_simple_resend_msi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -405,13 +512,12 @@ static void resend_msi(ICSState *ics, int srcno)
     if (irq->status & XICS_STATUS_REJECTED) {
         irq->status &= ~XICS_STATUS_REJECTED;
         if (irq->priority != 0xff) {
-            icp_irq(ics->xics, irq->server, srcno + ics->offset,
-                    irq->priority);
+            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
 
-static void resend_lsi(ICSState *ics, int srcno)
+static void ics_simple_resend_lsi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -419,51 +525,51 @@ static void resend_lsi(ICSState *ics, int srcno)
         && (irq->status & XICS_STATUS_ASSERTED)
         && !(irq->status & XICS_STATUS_SENT)) {
         irq->status |= XICS_STATUS_SENT;
-        icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+        icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
     }
 }
 
-static void set_irq_msi(ICSState *ics, int srcno, int val)
+static void ics_simple_set_irq_msi(ICSState *ics, int srcno, int val)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
-    trace_xics_set_irq_msi(srcno, srcno + ics->offset);
+    trace_xics_ics_simple_set_irq_msi(srcno, srcno + ics->offset);
 
     if (val) {
         if (irq->priority == 0xff) {
             irq->status |= XICS_STATUS_MASKED_PENDING;
             trace_xics_masked_pending();
         } else  {
-            icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+            icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
         }
     }
 }
 
-static void set_irq_lsi(ICSState *ics, int srcno, int val)
+static void ics_simple_set_irq_lsi(ICSState *ics, int srcno, int val)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
-    trace_xics_set_irq_lsi(srcno, srcno + ics->offset);
+    trace_xics_ics_simple_set_irq_lsi(srcno, srcno + ics->offset);
     if (val) {
         irq->status |= XICS_STATUS_ASSERTED;
     } else {
         irq->status &= ~XICS_STATUS_ASSERTED;
     }
-    resend_lsi(ics, srcno);
+    ics_simple_resend_lsi(ics, srcno);
 }
 
-static void ics_set_irq(void *opaque, int srcno, int val)
+static void ics_simple_set_irq(void *opaque, int srcno, int val)
 {
     ICSState *ics = (ICSState *)opaque;
 
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
-        set_irq_lsi(ics, srcno, val);
+        ics_simple_set_irq_lsi(ics, srcno, val);
     } else {
-        set_irq_msi(ics, srcno, val);
+        ics_simple_set_irq_msi(ics, srcno, val);
     }
 }
 
-static void write_xive_msi(ICSState *ics, int srcno)
+static void ics_simple_write_xive_msi(ICSState *ics, int srcno)
 {
     ICSIRQState *irq = ics->irqs + srcno;
 
@@ -473,71 +579,74 @@ static void write_xive_msi(ICSState *ics, int srcno)
     }
 
     irq->status &= ~XICS_STATUS_MASKED_PENDING;
-    icp_irq(ics->xics, irq->server, srcno + ics->offset, irq->priority);
+    icp_irq(ics, irq->server, srcno + ics->offset, irq->priority);
 }
 
-static void write_xive_lsi(ICSState *ics, int srcno)
+static void ics_simple_write_xive_lsi(ICSState *ics, int srcno)
 {
-    resend_lsi(ics, srcno);
+    ics_simple_resend_lsi(ics, srcno);
 }
 
-void ics_write_xive(ICSState *ics, int nr, int server,
-                    uint8_t priority, uint8_t saved_priority)
+void ics_simple_write_xive(ICSState *ics, int srcno, int server,
+                           uint8_t priority, uint8_t saved_priority)
 {
-    int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
 
     irq->server = server;
     irq->priority = priority;
     irq->saved_priority = saved_priority;
 
-    trace_xics_ics_write_xive(nr, srcno, server, priority);
+    trace_xics_ics_simple_write_xive(ics->offset + srcno, srcno, server,
+                                     priority);
 
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
-        write_xive_lsi(ics, srcno);
+        ics_simple_write_xive_lsi(ics, srcno);
     } else {
-        write_xive_msi(ics, srcno);
+        ics_simple_write_xive_msi(ics, srcno);
     }
 }
 
-static void ics_reject(ICSState *ics, int nr)
+static void ics_simple_reject(ICSState *ics, uint32_t nr)
 {
     ICSIRQState *irq = ics->irqs + nr - ics->offset;
 
-    trace_xics_ics_reject(nr, nr - ics->offset);
-    irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
-    irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
+    trace_xics_ics_simple_reject(nr, nr - ics->offset);
+    if (irq->flags & XICS_FLAGS_IRQ_MSI) {
+        irq->status |= XICS_STATUS_REJECTED;
+    } else if (irq->flags & XICS_FLAGS_IRQ_LSI) {
+        irq->status &= ~XICS_STATUS_SENT;
+    }
 }
 
-static void ics_resend(ICSState *ics)
+static void ics_simple_resend(ICSState *ics)
 {
     int i;
 
     for (i = 0; i < ics->nr_irqs; i++) {
         /* FIXME: filter by server#? */
         if (ics->irqs[i].flags & XICS_FLAGS_IRQ_LSI) {
-            resend_lsi(ics, i);
+            ics_simple_resend_lsi(ics, i);
         } else {
-            resend_msi(ics, i);
+            ics_simple_resend_msi(ics, i);
         }
     }
 }
 
-static void ics_eoi(ICSState *ics, int nr)
+static void ics_simple_eoi(ICSState *ics, uint32_t nr)
 {
     int srcno = nr - ics->offset;
     ICSIRQState *irq = ics->irqs + srcno;
 
-    trace_xics_ics_eoi(nr);
+    trace_xics_ics_simple_eoi(nr);
 
     if (ics->irqs[srcno].flags & XICS_FLAGS_IRQ_LSI) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
 
-static void ics_reset(DeviceState *dev)
+static void ics_simple_reset(DeviceState *dev)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
     int i;
     uint8_t flags[ics->nr_irqs];
 
@@ -554,31 +663,31 @@ static void ics_reset(DeviceState *dev)
     }
 }
 
-static int ics_post_load(ICSState *ics, int version_id)
+static int ics_simple_post_load(ICSState *ics, int version_id)
 {
     int i;
 
     for (i = 0; i < ics->xics->nr_servers; i++) {
-        icp_resend(ics->xics, i);
+        icp_resend(&ics->xics->ss[i]);
     }
 
     return 0;
 }
 
-static void ics_dispatch_pre_save(void *opaque)
+static void ics_simple_dispatch_pre_save(void *opaque)
 {
     ICSState *ics = opaque;
-    ICSStateClass *info = ICS_GET_CLASS(ics);
+    ICSStateClass *info = ICS_BASE_GET_CLASS(ics);
 
     if (info->pre_save) {
         info->pre_save(ics);
     }
 }
 
-static int ics_dispatch_post_load(void *opaque, int version_id)
+static int ics_simple_dispatch_post_load(void *opaque, int version_id)
 {
     ICSState *ics = opaque;
-    ICSStateClass *info = ICS_GET_CLASS(ics);
+    ICSStateClass *info = ICS_BASE_GET_CLASS(ics);
 
     if (info->post_load) {
         return info->post_load(ics, version_id);
@@ -587,7 +696,7 @@ static int ics_dispatch_post_load(void *opaque, int version_id)
     return 0;
 }
 
-static const VMStateDescription vmstate_ics_irq = {
+static const VMStateDescription vmstate_ics_simple_irq = {
     .name = "ics/irq",
     .version_id = 2,
     .minimum_version_id = 1,
@@ -601,86 +710,93 @@ static const VMStateDescription vmstate_ics_irq = {
     },
 };
 
-static const VMStateDescription vmstate_ics = {
+static const VMStateDescription vmstate_ics_simple = {
     .name = "ics",
     .version_id = 1,
     .minimum_version_id = 1,
-    .pre_save = ics_dispatch_pre_save,
-    .post_load = ics_dispatch_post_load,
+    .pre_save = ics_simple_dispatch_pre_save,
+    .post_load = ics_simple_dispatch_post_load,
     .fields = (VMStateField[]) {
         /* Sanity check */
         VMSTATE_UINT32_EQUAL(nr_irqs, ICSState),
 
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(irqs, ICSState, nr_irqs,
-                                             vmstate_ics_irq, ICSIRQState),
+                                             vmstate_ics_simple_irq,
+                                             ICSIRQState),
         VMSTATE_END_OF_LIST()
     },
 };
 
-static void ics_initfn(Object *obj)
+static void ics_simple_initfn(Object *obj)
 {
-    ICSState *ics = ICS(obj);
+    ICSState *ics = ICS_SIMPLE(obj);
 
     ics->offset = XICS_IRQ_BASE;
 }
 
-static void ics_realize(DeviceState *dev, Error **errp)
+static void ics_simple_realize(DeviceState *dev, Error **errp)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
 
     if (!ics->nr_irqs) {
         error_setg(errp, "Number of interrupts needs to be greater 0");
         return;
     }
     ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
-    ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
+    ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 }
 
-static void ics_class_init(ObjectClass *klass, void *data)
+static void ics_simple_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    ICSStateClass *isc = ICS_CLASS(klass);
+    ICSStateClass *isc = ICS_BASE_CLASS(klass);
 
-    dc->realize = ics_realize;
-    dc->vmsd = &vmstate_ics;
-    dc->reset = ics_reset;
-    isc->post_load = ics_post_load;
+    dc->realize = ics_simple_realize;
+    dc->vmsd = &vmstate_ics_simple;
+    dc->reset = ics_simple_reset;
+    isc->post_load = ics_simple_post_load;
+    isc->reject = ics_simple_reject;
+    isc->resend = ics_simple_resend;
+    isc->eoi = ics_simple_eoi;
 }
 
-static const TypeInfo ics_info = {
-    .name = TYPE_ICS,
+static const TypeInfo ics_simple_info = {
+    .name = TYPE_ICS_SIMPLE,
+    .parent = TYPE_ICS_BASE,
+    .instance_size = sizeof(ICSState),
+    .class_init = ics_simple_class_init,
+    .class_size = sizeof(ICSStateClass),
+    .instance_init = ics_simple_initfn,
+};
+
+static const TypeInfo ics_base_info = {
+    .name = TYPE_ICS_BASE,
     .parent = TYPE_DEVICE,
+    .abstract = true,
     .instance_size = sizeof(ICSState),
-    .class_init = ics_class_init,
     .class_size = sizeof(ICSStateClass),
-    .instance_init = ics_initfn,
 };
 
 /*
  * Exported functions
  */
-int xics_find_source(XICSState *xics, int irq)
+ICSState *xics_find_source(XICSState *xics, int irq)
 {
-    int sources = 1;
-    int src;
+    ICSState *ics;
 
-    /* FIXME: implement multiple sources */
-    for (src = 0; src < sources; ++src) {
-        ICSState *ics = &xics->ics[src];
+    QLIST_FOREACH(ics, &xics->ics, list) {
         if (ics_valid_irq(ics, irq)) {
-            return src;
+            return ics;
         }
     }
-
-    return -1;
+    return NULL;
 }
 
 qemu_irq xics_get_qirq(XICSState *xics, int irq)
 {
-    int src = xics_find_source(xics, irq);
+    ICSState *ics = xics_find_source(xics, irq);
 
-    if (src >= 0) {
-        ICSState *ics = &xics->ics[src];
+    if (ics) {
         return ics->qirqs[irq - ics->offset];
     }
 
@@ -698,7 +814,8 @@ void ics_set_irq_type(ICSState *ics, int srcno, bool lsi)
 static void xics_register_types(void)
 {
     type_register_static(&xics_common_info);
-    type_register_static(&ics_info);
+    type_register_static(&ics_simple_info);
+    type_register_static(&ics_base_info);
     type_register_static(&icp_info);
 }
 
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index edbd62fd1b..17694eaa87 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -272,7 +272,7 @@ static void ics_kvm_set_irq(void *opaque, int srcno, int val)
 
 static void ics_kvm_reset(DeviceState *dev)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
     int i;
     uint8_t flags[ics->nr_irqs];
 
@@ -293,7 +293,7 @@ static void ics_kvm_reset(DeviceState *dev)
 
 static void ics_kvm_realize(DeviceState *dev, Error **errp)
 {
-    ICSState *ics = ICS(dev);
+    ICSState *ics = ICS_SIMPLE(dev);
 
     if (!ics->nr_irqs) {
         error_setg(errp, "Number of interrupts needs to be greater 0");
@@ -306,7 +306,7 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
 static void ics_kvm_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    ICSStateClass *icsc = ICS_CLASS(klass);
+    ICSStateClass *icsc = ICS_BASE_CLASS(klass);
 
     dc->realize = ics_kvm_realize;
     dc->reset = ics_kvm_reset;
@@ -315,8 +315,8 @@ static void ics_kvm_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo ics_kvm_info = {
-    .name = TYPE_KVM_ICS,
-    .parent = TYPE_ICS,
+    .name = TYPE_ICS_KVM,
+    .parent = TYPE_ICS_SIMPLE,
     .instance_size = sizeof(ICSState),
     .class_init = ics_kvm_class_init,
 };
@@ -329,6 +329,7 @@ static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
     CPUState *cs;
     ICPState *ss;
     KVMXICSState *xicskvm = XICS_SPAPR_KVM(xics);
+    int ret;
 
     cs = CPU(cpu);
     ss = &xics->ss[cs->cpu_index];
@@ -347,42 +348,32 @@ static void xics_kvm_cpu_setup(XICSState *xics, PowerPCCPU *cpu)
         return;
     }
 
-    if (xicskvm->kernel_xics_fd != -1) {
-        int ret;
-
-        ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0,
-                                  xicskvm->kernel_xics_fd,
-                                  kvm_arch_vcpu_id(cs));
-        if (ret < 0) {
-            error_report("Unable to connect CPU%ld to kernel XICS: %s",
-                    kvm_arch_vcpu_id(cs), strerror(errno));
-            exit(1);
-        }
-        ss->cap_irq_xics_enabled = true;
+    ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, xicskvm->kernel_xics_fd,
+                              kvm_arch_vcpu_id(cs));
+    if (ret < 0) {
+        error_report("Unable to connect CPU%ld to kernel XICS: %s",
+                     kvm_arch_vcpu_id(cs), strerror(errno));
+        exit(1);
     }
+    ss->cap_irq_xics_enabled = true;
 }
 
 static void xics_kvm_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
                                  Error **errp)
 {
-    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
+    ICSState *ics = QLIST_FIRST(&xics->ics);
+
+    /* This needs to be deprecated ... */
+    xics->nr_irqs = nr_irqs;
+    if (ics) {
+        ics->nr_irqs = nr_irqs;
+    }
 }
 
 static void xics_kvm_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                     Error **errp)
 {
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char buffer[32];
-        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_KVM_ICP);
-        snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
-                                  errp);
-    }
+    xics_set_nr_servers(xics, nr_servers, TYPE_KVM_ICP, errp);
 }
 
 static void rtas_dummy(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -398,6 +389,7 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
 {
     KVMXICSState *xicskvm = XICS_SPAPR_KVM(dev);
     XICSState *xics = XICS_COMMON(dev);
+    ICSState *ics;
     int i, rc;
     Error *error = NULL;
     struct kvm_create_device xics_create_device = {
@@ -449,10 +441,12 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
 
     xicskvm->kernel_xics_fd = xics_create_device.fd;
 
-    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
-    if (error) {
-        error_propagate(errp, error);
-        goto fail;
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        object_property_set_bool(OBJECT(ics), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            goto fail;
+        }
     }
 
     assert(xics->nr_servers);
@@ -481,10 +475,12 @@ static void xics_kvm_realize(DeviceState *dev, Error **errp)
 static void xics_kvm_initfn(Object *obj)
 {
     XICSState *xics = XICS_COMMON(obj);
+    ICSState *ics;
 
-    xics->ics = ICS(object_new(TYPE_KVM_ICS));
-    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->xics = xics;
+    ics = ICS_SIMPLE(object_new(TYPE_ICS_KVM));
+    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
+    ics->xics = xics;
+    QLIST_INSERT_HEAD(&xics->ics, ics, list);
 }
 
 static void xics_kvm_class_init(ObjectClass *oc, void *data)
diff --git a/hw/intc/xics_spapr.c b/hw/intc/xics_spapr.c
index 618826dacf..2e3f1c5e95 100644
--- a/hw/intc/xics_spapr.c
+++ b/hw/intc/xics_spapr.c
@@ -32,6 +32,7 @@
 #include "qemu/timer.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/xics.h"
+#include "hw/ppc/fdt.h"
 #include "qapi/visitor.h"
 #include "qapi/error.h"
 
@@ -43,9 +44,10 @@ static target_ulong h_cppr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     target_ulong cppr = args[0];
 
-    icp_set_cppr(spapr->xics, cs->cpu_index, cppr);
+    icp_set_cppr(icp, cppr);
     return H_SUCCESS;
 }
 
@@ -59,7 +61,7 @@ static target_ulong h_ipi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return H_PARAMETER;
     }
 
-    icp_set_mfrr(spapr->xics, server, mfrr);
+    icp_set_mfrr(spapr->xics->ss + server, mfrr);
     return H_SUCCESS;
 }
 
@@ -67,7 +69,8 @@ static target_ulong h_xirr(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                            target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    uint32_t xirr = icp_accept(spapr->xics->ss + cs->cpu_index);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
     return H_SUCCESS;
@@ -77,8 +80,8 @@ static target_ulong h_xirr_x(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                              target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
-    ICPState *ss = &spapr->xics->ss[cs->cpu_index];
-    uint32_t xirr = icp_accept(ss);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
+    uint32_t xirr = icp_accept(icp);
 
     args[0] = xirr;
     args[1] = cpu_get_host_ticks();
@@ -89,9 +92,10 @@ static target_ulong h_eoi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     target_ulong xirr = args[0];
 
-    icp_eoi(spapr->xics, cs->cpu_index, xirr);
+    icp_eoi(icp, xirr);
     return H_SUCCESS;
 }
 
@@ -99,8 +103,9 @@ static target_ulong h_ipoll(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                             target_ulong opcode, target_ulong *args)
 {
     CPUState *cs = CPU(cpu);
+    ICPState *icp = &spapr->xics->ss[cs->cpu_index];
     uint32_t mfrr;
-    uint32_t xirr = icp_ipoll(spapr->xics->ss + cs->cpu_index, &mfrr);
+    uint32_t xirr = icp_ipoll(icp, &mfrr);
 
     args[0] = xirr;
     args[1] = mfrr;
@@ -113,13 +118,17 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
-    uint32_t nr, server, priority;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    uint32_t nr, srcno, server, priority;
 
     if ((nargs != 3) || (nret != 1)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
+    if (!ics) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
 
     nr = rtas_ld(args, 0);
     server = xics_get_cpu_index_by_dt_id(rtas_ld(args, 1));
@@ -131,7 +140,8 @@ static void rtas_set_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, server, priority, priority);
+    srcno = nr - ics->offset;
+    ics_simple_write_xive(ics, srcno, server, priority, priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -141,13 +151,17 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                           uint32_t nargs, target_ulong args,
                           uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
-    uint32_t nr;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 3)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
+    if (!ics) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
 
     nr = rtas_ld(args, 0);
 
@@ -157,8 +171,9 @@ static void rtas_get_xive(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    rtas_st(rets, 1, ics->irqs[nr - ics->offset].server);
-    rtas_st(rets, 2, ics->irqs[nr - ics->offset].priority);
+    srcno = nr - ics->offset;
+    rtas_st(rets, 1, ics->irqs[srcno].server);
+    rtas_st(rets, 2, ics->irqs[srcno].priority);
 }
 
 static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -166,13 +181,17 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                          uint32_t nargs, target_ulong args,
                          uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
-    uint32_t nr;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
+    if (!ics) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
 
     nr = rtas_ld(args, 0);
 
@@ -181,8 +200,9 @@ static void rtas_int_off(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
-                   ics->irqs[nr - ics->offset].priority);
+    srcno = nr - ics->offset;
+    ics_simple_write_xive(ics, srcno, ics->irqs[srcno].server, 0xff,
+                          ics->irqs[srcno].priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -192,13 +212,17 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
                         uint32_t nargs, target_ulong args,
                         uint32_t nret, target_ulong rets)
 {
-    ICSState *ics = spapr->xics->ics;
-    uint32_t nr;
+    ICSState *ics = QLIST_FIRST(&spapr->xics->ics);
+    uint32_t nr, srcno;
 
     if ((nargs != 1) || (nret != 1)) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
         return;
     }
+    if (!ics) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
 
     nr = rtas_ld(args, 0);
 
@@ -207,9 +231,10 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         return;
     }
 
-    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
-                   ics->irqs[nr - ics->offset].saved_priority,
-                   ics->irqs[nr - ics->offset].saved_priority);
+    srcno = nr - ics->offset;
+    ics_simple_write_xive(ics, srcno, ics->irqs[srcno].server,
+                          ics->irqs[srcno].saved_priority,
+                          ics->irqs[srcno].saved_priority);
 
     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
@@ -217,29 +242,25 @@ static void rtas_int_on(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 static void xics_spapr_set_nr_irqs(XICSState *xics, uint32_t nr_irqs,
                                    Error **errp)
 {
-    xics->nr_irqs = xics->ics->nr_irqs = nr_irqs;
+    ICSState *ics = QLIST_FIRST(&xics->ics);
+
+    /* This needs to be deprecated ... */
+    xics->nr_irqs = nr_irqs;
+    if (ics) {
+        ics->nr_irqs = nr_irqs;
+    }
 }
 
 static void xics_spapr_set_nr_servers(XICSState *xics, uint32_t nr_servers,
                                       Error **errp)
 {
-    int i;
-
-    xics->nr_servers = nr_servers;
-
-    xics->ss = g_malloc0(xics->nr_servers * sizeof(ICPState));
-    for (i = 0; i < xics->nr_servers; i++) {
-        char buffer[32];
-        object_initialize(&xics->ss[i], sizeof(xics->ss[i]), TYPE_ICP);
-        snprintf(buffer, sizeof(buffer), "icp[%d]", i);
-        object_property_add_child(OBJECT(xics), buffer, OBJECT(&xics->ss[i]),
-                                  errp);
-    }
+    xics_set_nr_servers(xics, nr_servers, TYPE_ICP, errp);
 }
 
 static void xics_spapr_realize(DeviceState *dev, Error **errp)
 {
     XICSState *xics = XICS_SPAPR(dev);
+    ICSState *ics;
     Error *error = NULL;
     int i;
 
@@ -261,10 +282,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
     spapr_register_hypercall(H_EOI, h_eoi);
     spapr_register_hypercall(H_IPOLL, h_ipoll);
 
-    object_property_set_bool(OBJECT(xics->ics), true, "realized", &error);
-    if (error) {
-        error_propagate(errp, error);
-        return;
+    QLIST_FOREACH(ics, &xics->ics, list) {
+        object_property_set_bool(OBJECT(ics), true, "realized", &error);
+        if (error) {
+            error_propagate(errp, error);
+            return;
+        }
     }
 
     for (i = 0; i < xics->nr_servers; i++) {
@@ -280,10 +303,12 @@ static void xics_spapr_realize(DeviceState *dev, Error **errp)
 static void xics_spapr_initfn(Object *obj)
 {
     XICSState *xics = XICS_SPAPR(obj);
+    ICSState *ics;
 
-    xics->ics = ICS(object_new(TYPE_ICS));
-    object_property_add_child(obj, "ics", OBJECT(xics->ics), NULL);
-    xics->ics->xics = xics;
+    ics = ICS_SIMPLE(object_new(TYPE_ICS_SIMPLE));
+    object_property_add_child(obj, "ics", OBJECT(ics), NULL);
+    ics->xics = xics;
+    QLIST_INSERT_HEAD(&xics->ics, ics, list);
 }
 
 static void xics_spapr_class_init(ObjectClass *oc, void *data)
@@ -329,14 +354,15 @@ static int ics_find_free_block(ICSState *ics, int num, int alignnum)
     return -1;
 }
 
-int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi,
-                     Error **errp)
+int xics_spapr_alloc(XICSState *xics, int irq_hint, bool lsi, Error **errp)
 {
-    ICSState *ics = &xics->ics[src];
+    ICSState *ics = QLIST_FIRST(&xics->ics);
     int irq;
 
+    if (!ics) {
+        return -1;
+    }
     if (irq_hint) {
-        assert(src == xics_find_source(xics, irq_hint));
         if (!ICS_IRQ_FREE(ics, irq_hint - ics->offset)) {
             error_setg(errp, "can't allocate IRQ %d: already in use", irq_hint);
             return -1;
@@ -352,7 +378,7 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi,
     }
 
     ics_set_irq_type(ics, irq - ics->offset, lsi);
-    trace_xics_alloc(src, irq);
+    trace_xics_alloc(irq);
 
     return irq;
 }
@@ -361,13 +387,16 @@ int xics_spapr_alloc(XICSState *xics, int src, int irq_hint, bool lsi,
  * Allocate block of consecutive IRQs, and return the number of the first IRQ in
  * the block. If align==true, aligns the first IRQ number to num.
  */
-int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi,
-                           bool align, Error **errp)
+int xics_spapr_alloc_block(XICSState *xics, int num, bool lsi, bool align,
+                           Error **errp)
 {
+    ICSState *ics = QLIST_FIRST(&xics->ics);
     int i, first = -1;
-    ICSState *ics = &xics->ics[src];
 
-    assert(src == 0);
+    if (!ics) {
+        return -1;
+    }
+
     /*
      * MSIMesage::data is used for storing VIRQ so
      * it has to be aligned to num to support multiple
@@ -394,7 +423,7 @@ int xics_spapr_alloc_block(XICSState *xics, int src, int num, bool lsi,
     }
     first += ics->offset;
 
-    trace_xics_alloc_block(src, first, num, lsi, align);
+    trace_xics_alloc_block(first, num, lsi, align);
 
     return first;
 }
@@ -405,7 +434,7 @@ static void ics_free(ICSState *ics, int srcno, int num)
 
     for (i = srcno; i < srcno + num; ++i) {
         if (ICS_IRQ_FREE(ics, i)) {
-            trace_xics_ics_free_warn(ics - ics->xics->ics, i + ics->offset);
+            trace_xics_ics_free_warn(0, i + ics->offset);
         }
         memset(&ics->irqs[i], 0, sizeof(ICSIRQState));
     }
@@ -413,19 +442,35 @@ static void ics_free(ICSState *ics, int srcno, int num)
 
 void xics_spapr_free(XICSState *xics, int irq, int num)
 {
-    int src = xics_find_source(xics, irq);
-
-    if (src >= 0) {
-        ICSState *ics = &xics->ics[src];
-
-        /* FIXME: implement multiple sources */
-        assert(src == 0);
+    ICSState *ics = xics_find_source(xics, irq);
 
-        trace_xics_ics_free(ics - xics->ics, irq, num);
+    if (ics) {
+        trace_xics_ics_free(0, irq, num);
         ics_free(ics, irq - ics->offset, num);
     }
 }
 
+void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle)
+{
+    uint32_t interrupt_server_ranges_prop[] = {
+        0, cpu_to_be32(xics->nr_servers),
+    };
+    int node;
+
+    _FDT(node = fdt_add_subnode(fdt, 0, "interrupt-controller"));
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type",
+                            "PowerPC-External-Interrupt-Presentation"));
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,ppc-xicp"));
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+    _FDT(fdt_setprop(fdt, node, "ibm,interrupt-server-ranges",
+                     interrupt_server_ranges_prop,
+                     sizeof(interrupt_server_ranges_prop)));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, node, "linux,phandle", phandle));
+    _FDT(fdt_setprop_cell(fdt, node, "phandle", phandle));
+}
+
 static void xics_spapr_register_types(void)
 {
     type_register_static(&xics_spapr_info);
diff --git a/hw/ipmi/Makefile.objs b/hw/ipmi/Makefile.objs
index a90318d5ba..1b422bbee0 100644
--- a/hw/ipmi/Makefile.objs
+++ b/hw/ipmi/Makefile.objs
@@ -1,5 +1,5 @@
 common-obj-$(CONFIG_IPMI) += ipmi.o
 common-obj-$(CONFIG_IPMI_LOCAL) += ipmi_bmc_sim.o
-common-obj-$(CONFIG_IPMI_LOCAL) += ipmi_bmc_extern.o
+common-obj-$(CONFIG_IPMI_EXTERN) += ipmi_bmc_extern.o
 common-obj-$(CONFIG_ISA_IPMI_KCS) += isa_ipmi_kcs.o
 common-obj-$(CONFIG_ISA_IPMI_BT) += isa_ipmi_bt.o
diff --git a/hw/ipmi/ipmi.c b/hw/ipmi/ipmi.c
index f09f217e78..5cf1caa88a 100644
--- a/hw/ipmi/ipmi.c
+++ b/hw/ipmi/ipmi.c
@@ -51,7 +51,7 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly)
         if (checkonly) {
             return 0;
         }
-        qemu_system_powerdown_request();
+        qemu_system_shutdown_request();
         return 0;
 
     case IPMI_SEND_NMI:
@@ -61,9 +61,15 @@ static int ipmi_do_hw_op(IPMIInterface *s, enum ipmi_op op, int checkonly)
         qmp_inject_nmi(NULL);
         return 0;
 
+    case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP:
+        if (checkonly) {
+            return 0;
+        }
+        qemu_system_powerdown_request();
+        return 0;
+
     case IPMI_POWERCYCLE_CHASSIS:
     case IPMI_PULSE_DIAG_IRQ:
-    case IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP:
     case IPMI_POWERON_CHASSIS:
     default:
         return IPMI_CC_COMMAND_NOT_SUPPORTED;
diff --git a/hw/ipmi/ipmi_bmc_extern.c b/hw/ipmi/ipmi_bmc_extern.c
index 157879e177..e8e3d250b6 100644
--- a/hw/ipmi/ipmi_bmc_extern.c
+++ b/hw/ipmi/ipmi_bmc_extern.c
@@ -54,7 +54,8 @@
 #define   VM_CAPABILITIES_IRQ      0x04
 #define   VM_CAPABILITIES_NMI      0x08
 #define   VM_CAPABILITIES_ATTN     0x10
-#define VM_CMD_FORCEOFF            0x09
+#define   VM_CAPABILITIES_GRACEFUL_SHUTDOWN 0x20
+#define VM_CMD_GRACEFUL_SHUTDOWN   0x09
 
 #define TYPE_IPMI_BMC_EXTERN "ipmi-bmc-extern"
 #define IPMI_BMC_EXTERN(obj) OBJECT_CHECK(IPMIBmcExtern, (obj), \
@@ -62,7 +63,7 @@
 typedef struct IPMIBmcExtern {
     IPMIBmc parent;
 
-    CharDriverState *chr;
+    CharBackend chr;
 
     bool connected;
 
@@ -100,12 +101,16 @@ ipmb_checksum(const unsigned char *data, int size, unsigned char start)
 
 static void continue_send(IPMIBmcExtern *ibe)
 {
+    int ret;
     if (ibe->outlen == 0) {
         goto check_reset;
     }
  send:
-    ibe->outpos += qemu_chr_fe_write(ibe->chr, ibe->outbuf + ibe->outpos,
-                                     ibe->outlen - ibe->outpos);
+    ret = qemu_chr_fe_write(&ibe->chr, ibe->outbuf + ibe->outpos,
+                            ibe->outlen - ibe->outpos);
+    if (ret > 0) {
+        ibe->outpos += ret;
+    }
     if (ibe->outpos < ibe->outlen) {
         /* Not fully transmitted, try again in a 10ms */
         timer_mod_ns(ibe->extern_timer,
@@ -272,8 +277,8 @@ static void handle_hw_op(IPMIBmcExtern *ibe, unsigned char hw_op)
         k->do_hw_op(s, IPMI_SEND_NMI, 0);
         break;
 
-    case VM_CMD_FORCEOFF:
-        qemu_system_shutdown_request();
+    case VM_CMD_GRACEFUL_SHUTDOWN:
+        k->do_hw_op(s, IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 0);
         break;
     }
 }
@@ -397,6 +402,10 @@ static void chr_event(void *opaque, int event)
         if (k->do_hw_op(ibe->parent.intf, IPMI_POWEROFF_CHASSIS, 1) == 0) {
             v |= VM_CAPABILITIES_POWER;
         }
+        if (k->do_hw_op(ibe->parent.intf, IPMI_SHUTDOWN_VIA_ACPI_OVERTEMP, 1)
+            == 0) {
+            v |= VM_CAPABILITIES_GRACEFUL_SHUTDOWN;
+        }
         if (k->do_hw_op(ibe->parent.intf, IPMI_RESET_CHASSIS, 1) == 0) {
             v |= VM_CAPABILITIES_RESET;
         }
@@ -438,12 +447,13 @@ static void ipmi_bmc_extern_realize(DeviceState *dev, Error **errp)
 {
     IPMIBmcExtern *ibe = IPMI_BMC_EXTERN(dev);
 
-    if (!ibe->chr) {
+    if (!qemu_chr_fe_get_driver(&ibe->chr)) {
         error_setg(errp, "IPMI external bmc requires chardev attribute");
         return;
     }
 
-    qemu_chr_add_handlers(ibe->chr, can_receive, receive, chr_event, ibe);
+    qemu_chr_fe_set_handlers(&ibe->chr, can_receive, receive,
+                             chr_event, ibe, NULL, true);
 }
 
 static int ipmi_bmc_extern_post_migrate(void *opaque, int version_id)
@@ -487,6 +497,14 @@ static void ipmi_bmc_extern_init(Object *obj)
     vmstate_register(NULL, 0, &vmstate_ipmi_bmc_extern, ibe);
 }
 
+static void ipmi_bmc_extern_finalize(Object *obj)
+{
+    IPMIBmcExtern *ibe = IPMI_BMC_EXTERN(obj);
+
+    timer_del(ibe->extern_timer);
+    timer_free(ibe->extern_timer);
+}
+
 static Property ipmi_bmc_extern_properties[] = {
     DEFINE_PROP_CHR("chardev", IPMIBmcExtern, chr),
     DEFINE_PROP_END_OF_LIST(),
@@ -499,6 +517,7 @@ static void ipmi_bmc_extern_class_init(ObjectClass *oc, void *data)
 
     bk->handle_command = ipmi_bmc_extern_handle_command;
     bk->handle_reset = ipmi_bmc_extern_handle_reset;
+    dc->hotpluggable = false;
     dc->realize = ipmi_bmc_extern_realize;
     dc->props = ipmi_bmc_extern_properties;
 }
@@ -508,6 +527,7 @@ static const TypeInfo ipmi_bmc_extern_type = {
     .parent        = TYPE_IPMI_BMC,
     .instance_size = sizeof(IPMIBmcExtern),
     .instance_init = ipmi_bmc_extern_init,
+    .instance_finalize = ipmi_bmc_extern_finalize,
     .class_init    = ipmi_bmc_extern_class_init,
  };
 
diff --git a/hw/ipmi/ipmi_bmc_sim.c b/hw/ipmi/ipmi_bmc_sim.c
index dc9c14cd29..c7883d6f5e 100644
--- a/hw/ipmi/ipmi_bmc_sim.c
+++ b/hw/ipmi/ipmi_bmc_sim.c
@@ -217,7 +217,6 @@ struct IPMIBmcSim {
     /* Odd netfns are for responses, so we only need the even ones. */
     const IPMINetfn *netfns[MAX_NETFNS / 2];
 
-    QemuMutex lock;
     /* We allow one event in the buffer */
     uint8_t evtbuf[16];
 
@@ -940,7 +939,6 @@ static void get_msg(IPMIBmcSim *ibs,
 {
     IPMIRcvBufEntry *msg;
 
-    qemu_mutex_lock(&ibs->lock);
     if (QTAILQ_EMPTY(&ibs->rcvbufs)) {
         rsp_buffer_set_error(rsp, 0x80); /* Queue empty */
         goto out;
@@ -960,7 +958,6 @@ static void get_msg(IPMIBmcSim *ibs,
     }
 
 out:
-    qemu_mutex_unlock(&ibs->lock);
     return;
 }
 
@@ -1055,11 +1052,9 @@ static void send_msg(IPMIBmcSim *ibs,
  end_msg:
     msg->buf[msg->len] = ipmb_checksum(msg->buf, msg->len, 0);
     msg->len++;
-    qemu_mutex_lock(&ibs->lock);
     QTAILQ_INSERT_TAIL(&ibs->rcvbufs, msg, entry);
     ibs->msg_flags |= IPMI_BMC_MSG_FLAG_RCV_MSG_QUEUE;
     k->set_atn(s, 1, attn_irq_enabled(ibs));
-    qemu_mutex_unlock(&ibs->lock);
 }
 
 static void do_watchdog_reset(IPMIBmcSim *ibs)
@@ -1753,7 +1748,6 @@ static void ipmi_sim_realize(DeviceState *dev, Error **errp)
     unsigned int i;
     IPMIBmcSim *ibs = IPMI_BMC_SIMULATOR(b);
 
-    qemu_mutex_init(&ibs->lock);
     QTAILQ_INIT(&ibs->rcvbufs);
 
     ibs->bmc_global_enables = (1 << IPMI_BMC_EVENT_LOG_BIT);
@@ -1773,7 +1767,7 @@ static void ipmi_sim_realize(DeviceState *dev, Error **errp)
     ibs->acpi_power_state[1] = 0;
 
     if (qemu_uuid_set) {
-        memcpy(&ibs->uuid, qemu_uuid, 16);
+        memcpy(&ibs->uuid, &qemu_uuid, 16);
     } else {
         memset(&ibs->uuid, 0, 16);
     }
@@ -1791,6 +1785,7 @@ static void ipmi_sim_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
     IPMIBmcClass *bk = IPMI_BMC_CLASS(oc);
 
+    dc->hotpluggable = false;
     dc->realize = ipmi_sim_realize;
     bk->handle_command = ipmi_sim_handle_command;
 }
diff --git a/hw/ipmi/isa_ipmi_kcs.c b/hw/ipmi/isa_ipmi_kcs.c
index 9a38f8a28a..80444977a0 100644
--- a/hw/ipmi/isa_ipmi_kcs.c
+++ b/hw/ipmi/isa_ipmi_kcs.c
@@ -433,10 +433,8 @@ const VMStateDescription vmstate_ISAIPMIKCSDevice = {
         VMSTATE_BOOL(kcs.use_irq, ISAIPMIKCSDevice),
         VMSTATE_BOOL(kcs.irqs_enabled, ISAIPMIKCSDevice),
         VMSTATE_UINT32(kcs.outpos, ISAIPMIKCSDevice),
-        VMSTATE_VBUFFER_UINT32(kcs.outmsg, ISAIPMIKCSDevice, 1, NULL, 0,
-                               kcs.outlen),
-        VMSTATE_VBUFFER_UINT32(kcs.inmsg, ISAIPMIKCSDevice, 1, NULL, 0,
-                               kcs.inlen),
+        VMSTATE_UINT8_ARRAY(kcs.outmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
+        VMSTATE_UINT8_ARRAY(kcs.inmsg, ISAIPMIKCSDevice, MAX_IPMI_MSG_SIZE),
         VMSTATE_BOOL(kcs.write_end, ISAIPMIKCSDevice),
         VMSTATE_UINT8(kcs.status_reg, ISAIPMIKCSDevice),
         VMSTATE_UINT8(kcs.data_out_reg, ISAIPMIKCSDevice),
diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c
index ce74db232a..9d07b118c0 100644
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -131,24 +131,20 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start)
     isa_init_ioport(dev, start);
 }
 
-void isa_register_portio_list(ISADevice *dev, uint16_t start,
+void isa_register_portio_list(ISADevice *dev,
+                              PortioList *piolist, uint16_t start,
                               const MemoryRegionPortio *pio_start,
                               void *opaque, const char *name)
 {
-    PortioList piolist;
+    assert(piolist && !piolist->owner);
 
     /* START is how we should treat DEV, regardless of the actual
        contents of the portio array.  This is how the old code
        actually handled e.g. the FDC device.  */
     isa_init_ioport(dev, start);
 
-    /* FIXME: the device should store created PortioList in its state.  Note
-       that DEV can be NULL here and that single device can register several
-       portio lists.  Current implementation is leaking memory allocated
-       in portio_list_init.  The leak is not critical because it happens only
-       at initialization time.  */
-    portio_list_init(&piolist, OBJECT(dev), pio_start, opaque, name);
-    portio_list_add(&piolist, isabus->address_space_io, start);
+    portio_list_init(piolist, OBJECT(dev), pio_start, opaque, name);
+    portio_list_add(piolist, isabus->address_space_io, start);
 }
 
 static void isa_device_init(Object *obj)
diff --git a/hw/isa/pc87312.c b/hw/isa/pc87312.c
index c3ebf3e7a0..b1c1a0acb1 100644
--- a/hw/isa/pc87312.c
+++ b/hw/isa/pc87312.c
@@ -283,7 +283,7 @@ static void pc87312_realize(DeviceState *dev, Error **errp)
         /* FIXME use a qdev chardev prop instead of parallel_hds[] */
         chr = parallel_hds[0];
         if (chr == NULL) {
-            chr = qemu_chr_new("par0", "null", NULL);
+            chr = qemu_chr_new("par0", "null");
         }
         isa = isa_create(bus, "isa-parallel");
         d = DEVICE(isa);
@@ -303,7 +303,7 @@ static void pc87312_realize(DeviceState *dev, Error **errp)
             chr = serial_hds[i];
             if (chr == NULL) {
                 snprintf(name, sizeof(name), "ser%d", i);
-                chr = qemu_chr_new(name, "null", NULL);
+                chr = qemu_chr_new(name, "null");
             }
             isa = isa_create(bus, "isa-serial");
             d = DEVICE(isa);
diff --git a/hw/lm32/lm32_hwsetup.h b/hw/lm32/lm32_hwsetup.h
index b71e6eafba..23e18784df 100644
--- a/hw/lm32/lm32_hwsetup.h
+++ b/hw/lm32/lm32_hwsetup.h
@@ -75,7 +75,7 @@ static inline void hwsetup_create_rom(HWSetup *hw,
         hwaddr base)
 {
     rom_add_blob("hwsetup", hw->data, TARGET_PAGE_SIZE,
-                 TARGET_PAGE_SIZE, base, NULL, NULL, NULL);
+                 TARGET_PAGE_SIZE, base, NULL, NULL, NULL, NULL);
 }
 
 static inline void hwsetup_add_u8(HWSetup *hw, uint8_t u)
diff --git a/hw/m68k/mcf5206.c b/hw/m68k/mcf5206.c
index e14896e529..b81901fdfd 100644
--- a/hw/m68k/mcf5206.c
+++ b/hw/m68k/mcf5206.c
@@ -139,7 +139,7 @@ static m5206_timer_state *m5206_timer_init(qemu_irq irq)
 
     s = (m5206_timer_state *)g_malloc0(sizeof(m5206_timer_state));
     bh = qemu_bh_new(m5206_timer_trigger, s);
-    s->timer = ptimer_init(bh);
+    s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     s->irq = irq;
     m5206_timer_reset(s);
     return s;
diff --git a/hw/m68k/mcf5208.c b/hw/m68k/mcf5208.c
index 24155574f2..3438314c35 100644
--- a/hw/m68k/mcf5208.c
+++ b/hw/m68k/mcf5208.c
@@ -21,7 +21,7 @@
 #include "elf.h"
 #include "exec/address-spaces.h"
 
-#define SYS_FREQ 66000000
+#define SYS_FREQ 166666666
 
 #define PCSR_EN         0x0001
 #define PCSR_RLD        0x0002
@@ -183,7 +183,7 @@ static void mcf5208_sys_init(MemoryRegion *address_space, qemu_irq *pic)
     for (i = 0; i < 2; i++) {
         s = (m5208_timer_state *)g_malloc0(sizeof(m5208_timer_state));
         bh = qemu_bh_new(m5208_timer_trigger, s);
-        s->timer = ptimer_init(bh);
+        s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
         memory_region_init_io(&s->iomem, NULL, &m5208_timer_ops, s,
                               "m5208-timer", 0x00004000);
         memory_region_add_subregion(address_space, 0xfc080000 + 0x4000 * i,
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 7895805a23..db896b0bb6 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -148,13 +148,9 @@ static MemoryRegion *nvdimm_get_vmstate_memory_region(PCDIMMDevice *dimm)
 
 static void nvdimm_class_init(ObjectClass *oc, void *data)
 {
-    DeviceClass *dc = DEVICE_CLASS(oc);
     PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
     NVDIMMClass *nvc = NVDIMM_CLASS(oc);
 
-    /* nvdimm hotplug has not been supported yet. */
-    dc->hotpluggable = false;
-
     ddc->realize = nvdimm_realize;
     ddc->get_memory_region = nvdimm_get_memory_region;
     ddc->get_vmstate_memory_region = nvdimm_get_vmstate_memory_region;
diff --git a/hw/mem/trace-events b/hw/mem/trace-events
new file mode 100644
index 0000000000..323c3c10d5
--- /dev/null
+++ b/hw/mem/trace-events
@@ -0,0 +1,5 @@
+# See docs/trace-events.txt for syntax documentation.
+
+# hw/mem/pc-dimm.c
+mhp_pc_dimm_assigned_slot(int slot) "%d"
+mhp_pc_dimm_assigned_address(uint64_t addr) "0x%"PRIx64
diff --git a/hw/microblaze/boot.c b/hw/microblaze/boot.c
index 9eebb1a521..1834d22a61 100644
--- a/hw/microblaze/boot.c
+++ b/hw/microblaze/boot.c
@@ -30,7 +30,6 @@
 #include "qemu/option.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
-#include "qemu-common.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/sysemu.h"
 #include "hw/loader.h"
diff --git a/hw/mips/mips_fulong2e.c b/hw/mips/mips_fulong2e.c
index 889cdc7ca7..9a4dae42d9 100644
--- a/hw/mips/mips_fulong2e.c
+++ b/hw/mips/mips_fulong2e.c
@@ -374,7 +374,7 @@ static void mips_fulong2e_init(MachineState *machine)
 
     rtc_init(isa_bus, 2000, NULL);
 
-    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+    serial_hds_isa_init(isa_bus, 0, MAX_SERIAL_PORTS);
     parallel_hds_isa_init(isa_bus, 1);
 
     /* Sound card */
diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index e90857ee0b..cf48f420cc 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -47,7 +47,6 @@
 #include "elf.h"
 #include "hw/timer/mc146818rtc.h"
 #include "hw/timer/i8254.h"
-#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "exec/address-spaces.h"
 #include "hw/sysbus.h"             /* SysBusDevice */
@@ -85,9 +84,10 @@ typedef struct {
     uint32_t i2coe;
     uint32_t i2cout;
     uint32_t i2csel;
-    CharDriverState *display;
+    CharBackend display;
     char display_text[9];
     SerialState *uart;
+    bool display_inited;
 } MaltaFPGAState;
 
 #define TYPE_MIPS_MALTA "mips-malta"
@@ -124,8 +124,10 @@ static void malta_fpga_update_display(void *opaque)
     }
     leds_text[8] = '\0';
 
-    qemu_chr_fe_printf(s->display, "\e[H\n\n|\e[32m%-8.8s\e[00m|\r\n", leds_text);
-    qemu_chr_fe_printf(s->display, "\n\n\n\n|\e[31m%-8.8s\e[00m|", s->display_text);
+    qemu_chr_fe_printf(&s->display, "\e[H\n\n|\e[32m%-8.8s\e[00m|\r\n",
+                       leds_text);
+    qemu_chr_fe_printf(&s->display, "\n\n\n\n|\e[31m%-8.8s\e[00m|",
+                       s->display_text);
 }
 
 /*
@@ -530,23 +532,29 @@ static void malta_fpga_reset(void *opaque)
     snprintf(s->display_text, 9, "        ");
 }
 
-static void malta_fpga_led_init(CharDriverState *chr)
+static void malta_fgpa_display_event(void *opaque, int event)
 {
-    qemu_chr_fe_printf(chr, "\e[HMalta LEDBAR\r\n");
-    qemu_chr_fe_printf(chr, "+--------+\r\n");
-    qemu_chr_fe_printf(chr, "+        +\r\n");
-    qemu_chr_fe_printf(chr, "+--------+\r\n");
-    qemu_chr_fe_printf(chr, "\n");
-    qemu_chr_fe_printf(chr, "Malta ASCII\r\n");
-    qemu_chr_fe_printf(chr, "+--------+\r\n");
-    qemu_chr_fe_printf(chr, "+        +\r\n");
-    qemu_chr_fe_printf(chr, "+--------+\r\n");
+    MaltaFPGAState *s = opaque;
+
+    if (event == CHR_EVENT_OPENED && !s->display_inited) {
+        qemu_chr_fe_printf(&s->display, "\e[HMalta LEDBAR\r\n");
+        qemu_chr_fe_printf(&s->display, "+--------+\r\n");
+        qemu_chr_fe_printf(&s->display, "+        +\r\n");
+        qemu_chr_fe_printf(&s->display, "+--------+\r\n");
+        qemu_chr_fe_printf(&s->display, "\n");
+        qemu_chr_fe_printf(&s->display, "Malta ASCII\r\n");
+        qemu_chr_fe_printf(&s->display, "+--------+\r\n");
+        qemu_chr_fe_printf(&s->display, "+        +\r\n");
+        qemu_chr_fe_printf(&s->display, "+--------+\r\n");
+        s->display_inited = true;
+    }
 }
 
 static MaltaFPGAState *malta_fpga_init(MemoryRegion *address_space,
          hwaddr base, qemu_irq uart_irq, CharDriverState *uart_chr)
 {
     MaltaFPGAState *s;
+    CharDriverState *chr;
 
     s = (MaltaFPGAState *)g_malloc0(sizeof(MaltaFPGAState));
 
@@ -560,7 +568,10 @@ static MaltaFPGAState *malta_fpga_init(MemoryRegion *address_space,
     memory_region_add_subregion(address_space, base, &s->iomem_lo);
     memory_region_add_subregion(address_space, base + 0xa00, &s->iomem_hi);
 
-    s->display = qemu_chr_new("fpga", "vc:320x200", malta_fpga_led_init);
+    chr = qemu_chr_new("fpga", "vc:320x200");
+    qemu_chr_fe_init(&s->display, chr, NULL);
+    qemu_chr_fe_set_handlers(&s->display, NULL, NULL,
+                             malta_fgpa_display_event, s, NULL, true);
 
     s->uart = serial_mm_init(address_space, base + 0x900, 3, uart_irq,
                              230400, uart_chr, DEVICE_NATIVE_ENDIAN);
@@ -1025,7 +1036,7 @@ void mips_malta_init(MachineState *machine)
         if (!serial_hds[i]) {
             char label[32];
             snprintf(label, sizeof(label), "serial%d", i);
-            serial_hds[i] = qemu_chr_new(label, "null", NULL);
+            serial_hds[i] = qemu_chr_new(label, "null");
         }
     }
 
@@ -1215,7 +1226,7 @@ void mips_malta_init(MachineState *machine)
     isa_create_simple(isa_bus, "i8042");
 
     rtc_init(isa_bus, 2000, NULL);
-    serial_hds_isa_init(isa_bus, 2);
+    serial_hds_isa_init(isa_bus, 0, 2);
     parallel_hds_isa_init(isa_bus, 1);
 
     for(i = 0; i < MAX_FD; i++) {
diff --git a/hw/mips/mips_r4k.c b/hw/mips/mips_r4k.c
index 16a59c779c..27548c43b6 100644
--- a/hw/mips/mips_r4k.c
+++ b/hw/mips/mips_r4k.c
@@ -286,7 +286,7 @@ void mips_r4k_init(MachineState *machine)
 
     pit = pit_init(isa_bus, 0x40, 0, NULL);
 
-    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+    serial_hds_isa_init(isa_bus, 0, MAX_SERIAL_PORTS);
 
     isa_vga_init(isa_bus);
 
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 4cfbd1024a..1a89615a62 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -52,4 +52,4 @@ obj-$(CONFIG_PVPANIC) += pvpanic.o
 obj-$(CONFIG_EDU) += edu.o
 obj-$(CONFIG_HYPERV_TESTDEV) += hyperv_testdev.o
 obj-$(CONFIG_AUX) += auxbus.o
-obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o
+obj-$(CONFIG_ASPEED_SOC) += aspeed_scu.o aspeed_sdmc.o
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index c7e2c8263f..b1f3e6f6b8 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -120,6 +120,41 @@ static const uint32_t ast2400_a0_resets[ASPEED_SCU_NR_REGS] = {
      [BMC_DEV_ID]      = 0x00002402U
 };
 
+/* SCU70 bit 23: 0 24Mhz. bit 11:9: 0b001 AXI:ABH ratio 2:1 */
+/* AST2500 revision A1 */
+
+static const uint32_t ast2500_a1_resets[ASPEED_SCU_NR_REGS] = {
+     [SYS_RST_CTRL]    = 0xFFCFFEDCU,
+     [CLK_SEL]         = 0xF3F40000U,
+     [CLK_STOP_CTRL]   = 0x19FC3E8BU,
+     [D2PLL_PARAM]     = 0x00026108U,
+     [MPLL_PARAM]      = 0x00030291U,
+     [HPLL_PARAM]      = 0x93000400U,
+     [MISC_CTRL1]      = 0x00000010U,
+     [PCI_CTRL1]       = 0x20001A03U,
+     [PCI_CTRL2]       = 0x20001A03U,
+     [PCI_CTRL3]       = 0x04000030U,
+     [SYS_RST_STATUS]  = 0x00000001U,
+     [SOC_SCRATCH1]    = 0x000000C0U, /* SoC completed DRAM init */
+     [MISC_CTRL2]      = 0x00000023U,
+     [RNG_CTRL]        = 0x0000000EU,
+     [PINMUX_CTRL2]    = 0x0000F000U,
+     [PINMUX_CTRL3]    = 0x03000000U,
+     [PINMUX_CTRL4]    = 0x00000000U,
+     [PINMUX_CTRL5]    = 0x0000A000U,
+     [WDT_RST_CTRL]    = 0x023FFFF3U,
+     [PINMUX_CTRL8]    = 0xFFFF0000U,
+     [PINMUX_CTRL9]    = 0x000FFFFFU,
+     [FREE_CNTR4]      = 0x000000FFU,
+     [FREE_CNTR4_EXT]  = 0x000000FFU,
+     [CPU2_BASE_SEG1]  = 0x80000000U,
+     [CPU2_BASE_SEG4]  = 0x1E600000U,
+     [CPU2_BASE_SEG5]  = 0xC0000000U,
+     [UART_HPLL_CLK]   = 0x00001903U,
+     [PCIE_CTRL]       = 0x0000007BU,
+     [BMC_DEV_ID]      = 0x00002402U
+};
+
 static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
 {
     AspeedSCUState *s = ASPEED_SCU(opaque);
@@ -198,6 +233,10 @@ static void aspeed_scu_reset(DeviceState *dev)
     case AST2400_A0_SILICON_REV:
         reset = ast2400_a0_resets;
         break;
+    case AST2500_A0_SILICON_REV:
+    case AST2500_A1_SILICON_REV:
+        reset = ast2500_a1_resets;
+        break;
     default:
         g_assert_not_reached();
     }
@@ -208,7 +247,11 @@ static void aspeed_scu_reset(DeviceState *dev)
     s->regs[HW_STRAP2] = s->hw_strap2;
 }
 
-static uint32_t aspeed_silicon_revs[] = { AST2400_A0_SILICON_REV, };
+static uint32_t aspeed_silicon_revs[] = {
+    AST2400_A0_SILICON_REV,
+    AST2500_A0_SILICON_REV,
+    AST2500_A1_SILICON_REV,
+};
 
 bool is_supported_silicon_rev(uint32_t silicon_rev)
 {
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
new file mode 100644
index 0000000000..8830dc084c
--- /dev/null
+++ b/hw/misc/aspeed_sdmc.c
@@ -0,0 +1,280 @@
+/*
+ * ASPEED SDRAM Memory Controller
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "hw/misc/aspeed_sdmc.h"
+#include "hw/misc/aspeed_scu.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+/* Protection Key Register */
+#define R_PROT            (0x00 / 4)
+#define   PROT_KEY_UNLOCK     0xFC600309
+
+/* Configuration Register */
+#define R_CONF            (0x04 / 4)
+
+/*
+ * Configuration register Ox4 (for Aspeed AST2400 SOC)
+ *
+ * These are for the record and future use. ASPEED_SDMC_DRAM_SIZE is
+ * what we care about right now as it is checked by U-Boot to
+ * determine the RAM size.
+ */
+
+#define ASPEED_SDMC_RESERVED            0xFFFFF800 /* 31:11 reserved */
+#define ASPEED_SDMC_AST2300_COMPAT      (1 << 10)
+#define ASPEED_SDMC_SCRAMBLE_PATTERN    (1 << 9)
+#define ASPEED_SDMC_DATA_SCRAMBLE       (1 << 8)
+#define ASPEED_SDMC_ECC_ENABLE          (1 << 7)
+#define ASPEED_SDMC_VGA_COMPAT          (1 << 6) /* readonly */
+#define ASPEED_SDMC_DRAM_BANK           (1 << 5)
+#define ASPEED_SDMC_DRAM_BURST          (1 << 4)
+#define ASPEED_SDMC_VGA_APERTURE(x)     ((x & 0x3) << 2) /* readonly */
+#define     ASPEED_SDMC_VGA_8MB             0x0
+#define     ASPEED_SDMC_VGA_16MB            0x1
+#define     ASPEED_SDMC_VGA_32MB            0x2
+#define     ASPEED_SDMC_VGA_64MB            0x3
+#define ASPEED_SDMC_DRAM_SIZE(x)        (x & 0x3)
+#define     ASPEED_SDMC_DRAM_64MB           0x0
+#define     ASPEED_SDMC_DRAM_128MB          0x1
+#define     ASPEED_SDMC_DRAM_256MB          0x2
+#define     ASPEED_SDMC_DRAM_512MB          0x3
+
+#define ASPEED_SDMC_READONLY_MASK                       \
+    (ASPEED_SDMC_RESERVED | ASPEED_SDMC_VGA_COMPAT |    \
+     ASPEED_SDMC_VGA_APERTURE(ASPEED_SDMC_VGA_64MB))
+/*
+ * Configuration register Ox4 (for Aspeed AST2500 SOC and higher)
+ *
+ * Incompatibilities are annotated in the list. ASPEED_SDMC_HW_VERSION
+ * should be set to 1 for the AST2500 SOC.
+ */
+#define ASPEED_SDMC_HW_VERSION(x)       ((x & 0xf) << 28) /* readonly */
+#define ASPEED_SDMC_SW_VERSION          ((x & 0xff) << 20)
+#define ASPEED_SDMC_CACHE_INITIAL_DONE  (1 << 19)  /* readonly */
+#define ASPEED_SDMC_AST2500_RESERVED    0x7C000 /* 18:14 reserved */
+#define ASPEED_SDMC_CACHE_DDR4_CONF     (1 << 13)
+#define ASPEED_SDMC_CACHE_INITIAL       (1 << 12)
+#define ASPEED_SDMC_CACHE_RANGE_CTRL    (1 << 11)
+#define ASPEED_SDMC_CACHE_ENABLE        (1 << 10) /* differs from AST2400 */
+#define ASPEED_SDMC_DRAM_TYPE           (1 << 4)  /* differs from AST2400 */
+
+/* DRAM size definitions differs */
+#define     ASPEED_SDMC_AST2500_128MB       0x0
+#define     ASPEED_SDMC_AST2500_256MB       0x1
+#define     ASPEED_SDMC_AST2500_512MB       0x2
+#define     ASPEED_SDMC_AST2500_1024MB      0x3
+
+#define ASPEED_SDMC_AST2500_READONLY_MASK                               \
+    (ASPEED_SDMC_HW_VERSION(0xf) | ASPEED_SDMC_CACHE_INITIAL_DONE |     \
+     ASPEED_SDMC_AST2500_RESERVED | ASPEED_SDMC_VGA_COMPAT |            \
+     ASPEED_SDMC_VGA_APERTURE(ASPEED_SDMC_VGA_64MB))
+
+static uint64_t aspeed_sdmc_read(void *opaque, hwaddr addr, unsigned size)
+{
+    AspeedSDMCState *s = ASPEED_SDMC(opaque);
+
+    addr >>= 2;
+
+    if (addr >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return 0;
+    }
+
+    return s->regs[addr];
+}
+
+static void aspeed_sdmc_write(void *opaque, hwaddr addr, uint64_t data,
+                             unsigned int size)
+{
+    AspeedSDMCState *s = ASPEED_SDMC(opaque);
+
+    addr >>= 2;
+
+    if (addr >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return;
+    }
+
+    if (addr != R_PROT && s->regs[R_PROT] != PROT_KEY_UNLOCK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked!\n", __func__);
+        return;
+    }
+
+    if (addr == R_CONF) {
+        /* Make sure readonly bits are kept */
+        switch (s->silicon_rev) {
+        case AST2400_A0_SILICON_REV:
+            data &= ~ASPEED_SDMC_READONLY_MASK;
+            break;
+        case AST2500_A0_SILICON_REV:
+            data &= ~ASPEED_SDMC_AST2500_READONLY_MASK;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+    s->regs[addr] = data;
+}
+
+static const MemoryRegionOps aspeed_sdmc_ops = {
+    .read = aspeed_sdmc_read,
+    .write = aspeed_sdmc_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+};
+
+static int ast2400_rambits(AspeedSDMCState *s)
+{
+    switch (s->ram_size >> 20) {
+    case 64:
+        return ASPEED_SDMC_DRAM_64MB;
+    case 128:
+        return ASPEED_SDMC_DRAM_128MB;
+    case 256:
+        return ASPEED_SDMC_DRAM_256MB;
+    case 512:
+        return ASPEED_SDMC_DRAM_512MB;
+    default:
+        break;
+    }
+
+    /* use a common default */
+    error_report("warning: Invalid RAM size 0x%" PRIx64
+                 ". Using default 256M", s->ram_size);
+    s->ram_size = 256 << 20;
+    return ASPEED_SDMC_DRAM_256MB;
+}
+
+static int ast2500_rambits(AspeedSDMCState *s)
+{
+    switch (s->ram_size >> 20) {
+    case 128:
+        return ASPEED_SDMC_AST2500_128MB;
+    case 256:
+        return ASPEED_SDMC_AST2500_256MB;
+    case 512:
+        return ASPEED_SDMC_AST2500_512MB;
+    case 1024:
+        return ASPEED_SDMC_AST2500_1024MB;
+    default:
+        break;
+    }
+
+    /* use a common default */
+    error_report("warning: Invalid RAM size 0x%" PRIx64
+                 ". Using default 512M", s->ram_size);
+    s->ram_size = 512 << 20;
+    return ASPEED_SDMC_AST2500_512MB;
+}
+
+static void aspeed_sdmc_reset(DeviceState *dev)
+{
+    AspeedSDMCState *s = ASPEED_SDMC(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+
+    /* Set ram size bit and defaults values */
+    switch (s->silicon_rev) {
+    case AST2400_A0_SILICON_REV:
+        s->regs[R_CONF] |=
+            ASPEED_SDMC_VGA_COMPAT |
+            ASPEED_SDMC_DRAM_SIZE(s->ram_bits);
+        break;
+
+    case AST2500_A0_SILICON_REV:
+    case AST2500_A1_SILICON_REV:
+        s->regs[R_CONF] |=
+            ASPEED_SDMC_HW_VERSION(1) |
+            ASPEED_SDMC_VGA_APERTURE(ASPEED_SDMC_VGA_64MB) |
+            ASPEED_SDMC_DRAM_SIZE(s->ram_bits);
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void aspeed_sdmc_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    AspeedSDMCState *s = ASPEED_SDMC(dev);
+
+    if (!is_supported_silicon_rev(s->silicon_rev)) {
+        error_setg(errp, "Unknown silicon revision: 0x%" PRIx32,
+                s->silicon_rev);
+        return;
+    }
+
+    switch (s->silicon_rev) {
+    case AST2400_A0_SILICON_REV:
+        s->ram_bits = ast2400_rambits(s);
+        break;
+    case AST2500_A0_SILICON_REV:
+    case AST2500_A1_SILICON_REV:
+        s->ram_bits = ast2500_rambits(s);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_sdmc_ops, s,
+                          TYPE_ASPEED_SDMC, 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static const VMStateDescription vmstate_aspeed_sdmc = {
+    .name = "aspeed.sdmc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_ARRAY(regs, AspeedSDMCState, ASPEED_SDMC_NR_REGS),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property aspeed_sdmc_properties[] = {
+    DEFINE_PROP_UINT32("silicon-rev", AspeedSDMCState, silicon_rev, 0),
+    DEFINE_PROP_UINT64("ram-size", AspeedSDMCState, ram_size, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void aspeed_sdmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = aspeed_sdmc_realize;
+    dc->reset = aspeed_sdmc_reset;
+    dc->desc = "ASPEED SDRAM Memory Controller";
+    dc->vmsd = &vmstate_aspeed_sdmc;
+    dc->props = aspeed_sdmc_properties;
+}
+
+static const TypeInfo aspeed_sdmc_info = {
+    .name = TYPE_ASPEED_SDMC,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(AspeedSDMCState),
+    .class_init = aspeed_sdmc_class_init,
+};
+
+static void aspeed_sdmc_register_types(void)
+{
+    type_register_static(&aspeed_sdmc_info);
+}
+
+type_init(aspeed_sdmc_register_types);
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index 888ba49a0e..401039c100 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -24,6 +24,7 @@
 
 #include "qemu/osdep.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
 #include "qemu/timer.h"
 #include "qemu/main-loop.h" /* iothread mutex */
 #include "qapi/visitor.h"
@@ -69,11 +70,20 @@ typedef struct {
     uint64_t dma_mask;
 } EduState;
 
+static bool edu_msi_enabled(EduState *edu)
+{
+    return msi_enabled(&edu->pdev);
+}
+
 static void edu_raise_irq(EduState *edu, uint32_t val)
 {
     edu->irq_status |= val;
     if (edu->irq_status) {
-        pci_set_irq(&edu->pdev, 1);
+        if (edu_msi_enabled(edu)) {
+            msi_notify(&edu->pdev, 0);
+        } else {
+            pci_set_irq(&edu->pdev, 1);
+        }
     }
 }
 
@@ -81,7 +91,7 @@ static void edu_lower_irq(EduState *edu, uint32_t val)
 {
     edu->irq_status &= ~val;
 
-    if (!edu->irq_status) {
+    if (!edu->irq_status && !edu_msi_enabled(edu)) {
         pci_set_irq(&edu->pdev, 0);
     }
 }
@@ -342,6 +352,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
 
     pci_config_set_interrupt_pin(pci_conf, 1);
 
+    if (msi_init(pdev, 0, 1, true, false, errp)) {
+        return;
+    }
+
     memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
                     "edu-mmio", 1 << 20);
     pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
diff --git a/hw/misc/imx25_ccm.c b/hw/misc/imx25_ccm.c
index 5cd8c0a9a7..19e948a52d 100644
--- a/hw/misc/imx25_ccm.c
+++ b/hw/misc/imx25_ccm.c
@@ -27,7 +27,7 @@
         } \
     } while (0)
 
-static char const *imx25_ccm_reg_name(uint32_t reg)
+static const char *imx25_ccm_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
diff --git a/hw/misc/imx31_ccm.c b/hw/misc/imx31_ccm.c
index 1c03e52c40..b890c383be 100644
--- a/hw/misc/imx31_ccm.c
+++ b/hw/misc/imx31_ccm.c
@@ -29,7 +29,7 @@
         } \
     } while (0)
 
-static char const *imx31_ccm_reg_name(uint32_t reg)
+static const char *imx31_ccm_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
diff --git a/hw/misc/imx6_ccm.c b/hw/misc/imx6_ccm.c
index 17e15d4c92..1b421013a3 100644
--- a/hw/misc/imx6_ccm.c
+++ b/hw/misc/imx6_ccm.c
@@ -26,7 +26,7 @@
         } \
     } while (0)
 
-static char const *imx6_ccm_reg_name(uint32_t reg)
+static const char *imx6_ccm_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
@@ -99,7 +99,7 @@ static char const *imx6_ccm_reg_name(uint32_t reg)
     }
 }
 
-static char const *imx6_analog_reg_name(uint32_t reg)
+static const char *imx6_analog_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c
index 8bb6829575..55b817b8d7 100644
--- a/hw/misc/imx6_src.c
+++ b/hw/misc/imx6_src.c
@@ -27,7 +27,7 @@
         } \
     } while (0)
 
-static char const *imx6_src_reg_name(uint32_t reg)
+static const char *imx6_src_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 40a2ebca20..abeaf3da08 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -88,7 +88,7 @@ typedef struct IVShmemState {
 
     /* exactly one of these two may be set */
     HostMemoryBackend *hostmem; /* with interrupts */
-    CharDriverState *server_chr; /* without interrupts */
+    CharBackend server_chr; /* without interrupts */
 
     /* registers */
     uint32_t intrmask;
@@ -627,8 +627,7 @@ static void ivshmem_read(void *opaque, const uint8_t *buf, int size)
     msg = le64_to_cpu(s->msg_buf);
     s->msg_buffered_bytes = 0;
 
-    fd = qemu_chr_fe_get_msgfd(s->server_chr);
-    IVSHMEM_DPRINTF("posn is %" PRId64 ", fd is %d\n", msg, fd);
+    fd = qemu_chr_fe_get_msgfd(&s->server_chr);
 
     process_msg(s, msg, fd, &err);
     if (err) {
@@ -643,8 +642,8 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
 
     n = 0;
     do {
-        ret = qemu_chr_fe_read_all(s->server_chr, (uint8_t *)&msg + n,
-                                 sizeof(msg) - n);
+        ret = qemu_chr_fe_read_all(&s->server_chr, (uint8_t *)&msg + n,
+                                   sizeof(msg) - n);
         if (ret < 0 && ret != -EINTR) {
             error_setg_errno(errp, -ret, "read from server failed");
             return INT64_MIN;
@@ -652,7 +651,7 @@ static int64_t ivshmem_recv_msg(IVShmemState *s, int *pfd, Error **errp)
         n += ret;
     } while (n < sizeof(msg));
 
-    *pfd = qemu_chr_fe_get_msgfd(s->server_chr);
+    *pfd = qemu_chr_fe_get_msgfd(&s->server_chr);
     return msg;
 }
 
@@ -859,7 +858,7 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY,
                      &s->ivshmem_mmio);
 
-    if (!s->not_legacy_32bit) {
+    if (s->not_legacy_32bit) {
         attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
     }
 
@@ -869,10 +868,11 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
         s->ivshmem_bar2 = host_memory_backend_get_memory(s->hostmem,
                                                          &error_abort);
     } else {
-        assert(s->server_chr);
+        CharDriverState *chr = qemu_chr_fe_get_driver(&s->server_chr);
+        assert(chr);
 
         IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
-                        s->server_chr->filename);
+                        chr->filename);
 
         /* we allocate enough space for 16 peers and grow as needed */
         resize_peers(s, 16);
@@ -894,8 +894,8 @@ static void ivshmem_common_realize(PCIDevice *dev, Error **errp)
             return;
         }
 
-        qemu_chr_add_handlers(s->server_chr, ivshmem_can_receive,
-                              ivshmem_read, NULL, s);
+        qemu_chr_fe_set_handlers(&s->server_chr, ivshmem_can_receive,
+                                 ivshmem_read, NULL, s, NULL, true);
 
         if (ivshmem_setup_interrupts(s) < 0) {
             error_setg(errp, "failed to initialize interrupts");
@@ -1045,6 +1045,7 @@ static void ivshmem_plain_init(Object *obj)
                              ivshmem_check_memdev_is_busy,
                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
                              &error_abort);
+    s->not_legacy_32bit = 1;
 }
 
 static void ivshmem_plain_realize(PCIDevice *dev, Error **errp)
@@ -1116,13 +1117,14 @@ static void ivshmem_doorbell_init(Object *obj)
 
     s->features |= (1 << IVSHMEM_MSI);
     s->legacy_size = SIZE_MAX;  /* whatever the server sends */
+    s->not_legacy_32bit = 1;
 }
 
 static void ivshmem_doorbell_realize(PCIDevice *dev, Error **errp)
 {
     IVShmemState *s = IVSHMEM_COMMON(dev);
 
-    if (!s->server_chr) {
+    if (!qemu_chr_fe_get_driver(&s->server_chr)) {
         error_setg(errp, "You must specify a 'chardev'");
         return;
     }
@@ -1251,7 +1253,7 @@ static void ivshmem_realize(PCIDevice *dev, Error **errp)
                      " or ivshmem-doorbell instead");
     }
 
-    if (!!s->server_chr + !!s->shmobj != 1) {
+    if (!!qemu_chr_fe_get_driver(&s->server_chr) + !!s->shmobj != 1) {
         error_setg(errp, "You must specify either 'shm' or 'chardev'");
         return;
     }
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index be03926b96..5d57f45dc6 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -89,22 +89,16 @@ static void macio_escc_legacy_setup(MacIOState *macio_state)
     MemoryRegion *bar = &macio_state->bar;
     int i;
     static const int maps[] = {
-        0x00, 0x00,
-        0x02, 0x20,
-        0x04, 0x10,
-        0x06, 0x30,
-        0x08, 0x40,
-        0x0A, 0x50,
-        0x60, 0x60,
-        0x70, 0x70,
-        0x80, 0x70,
-        0x90, 0x80,
-        0xA0, 0x90,
-        0xB0, 0xA0,
-        0xC0, 0xB0,
-        0xD0, 0xC0,
-        0xE0, 0xD0,
-        0xF0, 0xE0,
+        0x00, 0x00, /* Command B */
+        0x02, 0x20, /* Command A */
+        0x04, 0x10, /* Data B */
+        0x06, 0x30, /* Data A */
+        0x08, 0x40, /* Enhancement B */
+        0x0A, 0x50, /* Enhancement A */
+        0x80, 0x80, /* Recovery count */
+        0x90, 0x90, /* Start A */
+        0xa0, 0xa0, /* Start B */
+        0xb0, 0xb0, /* Detect AB */
     };
 
     memory_region_init(escc_legacy, OBJECT(macio_state), "escc-legacy", 256);
diff --git a/hw/misc/milkymist-pfpu.c b/hw/misc/milkymist-pfpu.c
index 1da21a643e..3ca25894f1 100644
--- a/hw/misc/milkymist-pfpu.c
+++ b/hw/misc/milkymist-pfpu.c
@@ -137,7 +137,7 @@ struct MilkymistPFPUState {
 };
 typedef struct MilkymistPFPUState MilkymistPFPUState;
 
-static inline hwaddr
+static inline uint32_t
 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
 {
     return base + 8 * (128 * y + x);
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index db1b301e7f..7915732f74 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -26,6 +26,8 @@
 #include <zlib.h> /* For crc32 */
 
 #include "hw/net/cadence_gem.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
 #include "net/checksum.h"
 
 #ifdef CADENCE_GEM_ERR_DEBUG
@@ -141,6 +143,55 @@
 #define GEM_DESCONF6      (0x00000294/4)
 #define GEM_DESCONF7      (0x00000298/4)
 
+#define GEM_INT_Q1_STATUS               (0x00000400 / 4)
+#define GEM_INT_Q1_MASK                 (0x00000640 / 4)
+
+#define GEM_TRANSMIT_Q1_PTR             (0x00000440 / 4)
+#define GEM_TRANSMIT_Q7_PTR             (GEM_TRANSMIT_Q1_PTR + 6)
+
+#define GEM_RECEIVE_Q1_PTR              (0x00000480 / 4)
+#define GEM_RECEIVE_Q7_PTR              (GEM_RECEIVE_Q1_PTR + 6)
+
+#define GEM_INT_Q1_ENABLE               (0x00000600 / 4)
+#define GEM_INT_Q7_ENABLE               (GEM_INT_Q1_ENABLE + 6)
+
+#define GEM_INT_Q1_DISABLE              (0x00000620 / 4)
+#define GEM_INT_Q7_DISABLE              (GEM_INT_Q1_DISABLE + 6)
+
+#define GEM_INT_Q1_MASK                 (0x00000640 / 4)
+#define GEM_INT_Q7_MASK                 (GEM_INT_Q1_MASK + 6)
+
+#define GEM_SCREENING_TYPE1_REGISTER_0  (0x00000500 / 4)
+
+#define GEM_ST1R_UDP_PORT_MATCH_ENABLE  (1 << 29)
+#define GEM_ST1R_DSTC_ENABLE            (1 << 28)
+#define GEM_ST1R_UDP_PORT_MATCH_SHIFT   (12)
+#define GEM_ST1R_UDP_PORT_MATCH_WIDTH   (27 - GEM_ST1R_UDP_PORT_MATCH_SHIFT + 1)
+#define GEM_ST1R_DSTC_MATCH_SHIFT       (4)
+#define GEM_ST1R_DSTC_MATCH_WIDTH       (11 - GEM_ST1R_DSTC_MATCH_SHIFT + 1)
+#define GEM_ST1R_QUEUE_SHIFT            (0)
+#define GEM_ST1R_QUEUE_WIDTH            (3 - GEM_ST1R_QUEUE_SHIFT + 1)
+
+#define GEM_SCREENING_TYPE2_REGISTER_0  (0x00000540 / 4)
+
+#define GEM_ST2R_COMPARE_A_ENABLE       (1 << 18)
+#define GEM_ST2R_COMPARE_A_SHIFT        (13)
+#define GEM_ST2R_COMPARE_WIDTH          (17 - GEM_ST2R_COMPARE_A_SHIFT + 1)
+#define GEM_ST2R_ETHERTYPE_ENABLE       (1 << 12)
+#define GEM_ST2R_ETHERTYPE_INDEX_SHIFT  (9)
+#define GEM_ST2R_ETHERTYPE_INDEX_WIDTH  (11 - GEM_ST2R_ETHERTYPE_INDEX_SHIFT \
+                                            + 1)
+#define GEM_ST2R_QUEUE_SHIFT            (0)
+#define GEM_ST2R_QUEUE_WIDTH            (3 - GEM_ST2R_QUEUE_SHIFT + 1)
+
+#define GEM_SCREENING_TYPE2_ETHERTYPE_REG_0     (0x000006e0 / 4)
+#define GEM_TYPE2_COMPARE_0_WORD_0              (0x00000700 / 4)
+
+#define GEM_T2CW1_COMPARE_OFFSET_SHIFT  (7)
+#define GEM_T2CW1_COMPARE_OFFSET_WIDTH  (8 - GEM_T2CW1_COMPARE_OFFSET_SHIFT + 1)
+#define GEM_T2CW1_OFFSET_VALUE_SHIFT    (0)
+#define GEM_T2CW1_OFFSET_VALUE_WIDTH    (6 - GEM_T2CW1_OFFSET_VALUE_SHIFT + 1)
+
 /*****************************************/
 #define GEM_NWCTRL_TXSTART     0x00000200 /* Transmit Enable */
 #define GEM_NWCTRL_TXENA       0x00000008 /* Transmit Enable */
@@ -284,9 +335,9 @@ static inline unsigned tx_desc_get_length(unsigned *desc)
     return desc[1] & DESC_1_LENGTH;
 }
 
-static inline void print_gem_tx_desc(unsigned *desc)
+static inline void print_gem_tx_desc(unsigned *desc, uint8_t queue)
 {
-    DB_PRINT("TXDESC:\n");
+    DB_PRINT("TXDESC (queue %" PRId8 "):\n", queue);
     DB_PRINT("bufaddr: 0x%08x\n", *desc);
     DB_PRINT("used_hw: %d\n", tx_desc_get_used(desc));
     DB_PRINT("wrap:    %d\n", tx_desc_get_wrap(desc));
@@ -416,6 +467,7 @@ static void phy_update_link(CadenceGEMState *s)
 static int gem_can_receive(NetClientState *nc)
 {
     CadenceGEMState *s;
+    int i;
 
     s = qemu_get_nic_opaque(nc);
 
@@ -428,18 +480,20 @@ static int gem_can_receive(NetClientState *nc)
         return 0;
     }
 
-    if (rx_desc_get_ownership(s->rx_desc) == 1) {
-        if (s->can_rx_state != 2) {
-            s->can_rx_state = 2;
-            DB_PRINT("can't receive - busy buffer descriptor 0x%x\n",
-                     s->rx_desc_addr);
+    for (i = 0; i < s->num_priority_queues; i++) {
+        if (rx_desc_get_ownership(s->rx_desc[i]) == 1) {
+            if (s->can_rx_state != 2) {
+                s->can_rx_state = 2;
+                DB_PRINT("can't receive - busy buffer descriptor (q%d) 0x%x\n",
+                         i, s->rx_desc_addr[i]);
+             }
+            return 0;
         }
-        return 0;
     }
 
     if (s->can_rx_state != 0) {
         s->can_rx_state = 0;
-        DB_PRINT("can receive 0x%x\n", s->rx_desc_addr);
+        DB_PRINT("can receive\n");
     }
     return 1;
 }
@@ -450,9 +504,20 @@ static int gem_can_receive(NetClientState *nc)
  */
 static void gem_update_int_status(CadenceGEMState *s)
 {
-    if (s->regs[GEM_ISR]) {
-        DB_PRINT("asserting int. (0x%08x)\n", s->regs[GEM_ISR]);
-        qemu_set_irq(s->irq, 1);
+    int i;
+
+    if ((s->num_priority_queues == 1) && s->regs[GEM_ISR]) {
+        /* No priority queues, just trigger the interrupt */
+        DB_PRINT("asserting int.\n", i);
+        qemu_set_irq(s->irq[0], 1);
+        return;
+    }
+
+    for (i = 0; i < s->num_priority_queues; ++i) {
+        if (s->regs[GEM_INT_Q1_STATUS + i]) {
+            DB_PRINT("asserting int. (q=%d)\n", i);
+            qemu_set_irq(s->irq[i], 1);
+        }
     }
 }
 
@@ -601,17 +666,137 @@ static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet)
     return GEM_RX_REJECT;
 }
 
-static void gem_get_rx_desc(CadenceGEMState *s)
+/* Figure out which queue the received data should be sent to */
+static int get_queue_from_screen(CadenceGEMState *s, uint8_t *rxbuf_ptr,
+                                 unsigned rxbufsize)
 {
-    DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr);
+    uint32_t reg;
+    bool matched, mismatched;
+    int i, j;
+
+    for (i = 0; i < s->num_type1_screeners; i++) {
+        reg = s->regs[GEM_SCREENING_TYPE1_REGISTER_0 + i];
+        matched = false;
+        mismatched = false;
+
+        /* Screening is based on UDP Port */
+        if (reg & GEM_ST1R_UDP_PORT_MATCH_ENABLE) {
+            uint16_t udp_port = rxbuf_ptr[14 + 22] << 8 | rxbuf_ptr[14 + 23];
+            if (udp_port == extract32(reg, GEM_ST1R_UDP_PORT_MATCH_SHIFT,
+                                           GEM_ST1R_UDP_PORT_MATCH_WIDTH)) {
+                matched = true;
+            } else {
+                mismatched = true;
+            }
+        }
+
+        /* Screening is based on DS/TC */
+        if (reg & GEM_ST1R_DSTC_ENABLE) {
+            uint8_t dscp = rxbuf_ptr[14 + 1];
+            if (dscp == extract32(reg, GEM_ST1R_DSTC_MATCH_SHIFT,
+                                       GEM_ST1R_DSTC_MATCH_WIDTH)) {
+                matched = true;
+            } else {
+                mismatched = true;
+            }
+        }
+
+        if (matched && !mismatched) {
+            return extract32(reg, GEM_ST1R_QUEUE_SHIFT, GEM_ST1R_QUEUE_WIDTH);
+        }
+    }
+
+    for (i = 0; i < s->num_type2_screeners; i++) {
+        reg = s->regs[GEM_SCREENING_TYPE2_REGISTER_0 + i];
+        matched = false;
+        mismatched = false;
+
+        if (reg & GEM_ST2R_ETHERTYPE_ENABLE) {
+            uint16_t type = rxbuf_ptr[12] << 8 | rxbuf_ptr[13];
+            int et_idx = extract32(reg, GEM_ST2R_ETHERTYPE_INDEX_SHIFT,
+                                        GEM_ST2R_ETHERTYPE_INDEX_WIDTH);
+
+            if (et_idx > s->num_type2_screeners) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Out of range ethertype "
+                              "register index: %d\n", et_idx);
+            }
+            if (type == s->regs[GEM_SCREENING_TYPE2_ETHERTYPE_REG_0 +
+                                et_idx]) {
+                matched = true;
+            } else {
+                mismatched = true;
+            }
+        }
+
+        /* Compare A, B, C */
+        for (j = 0; j < 3; j++) {
+            uint32_t cr0, cr1, mask;
+            uint16_t rx_cmp;
+            int offset;
+            int cr_idx = extract32(reg, GEM_ST2R_COMPARE_A_SHIFT + j * 6,
+                                        GEM_ST2R_COMPARE_WIDTH);
+
+            if (!(reg & (GEM_ST2R_COMPARE_A_ENABLE << (j * 6)))) {
+                continue;
+            }
+            if (cr_idx > s->num_type2_screeners) {
+                qemu_log_mask(LOG_GUEST_ERROR, "Out of range compare "
+                              "register index: %d\n", cr_idx);
+            }
+
+            cr0 = s->regs[GEM_TYPE2_COMPARE_0_WORD_0 + cr_idx * 2];
+            cr1 = s->regs[GEM_TYPE2_COMPARE_0_WORD_0 + cr_idx * 2 + 1];
+            offset = extract32(cr1, GEM_T2CW1_OFFSET_VALUE_SHIFT,
+                                    GEM_T2CW1_OFFSET_VALUE_WIDTH);
+
+            switch (extract32(cr1, GEM_T2CW1_COMPARE_OFFSET_SHIFT,
+                                   GEM_T2CW1_COMPARE_OFFSET_WIDTH)) {
+            case 3: /* Skip UDP header */
+                qemu_log_mask(LOG_UNIMP, "TCP compare offsets"
+                              "unimplemented - assuming UDP\n");
+                offset += 8;
+                /* Fallthrough */
+            case 2: /* skip the IP header */
+                offset += 20;
+                /* Fallthrough */
+            case 1: /* Count from after the ethertype */
+                offset += 14;
+                break;
+            case 0:
+                /* Offset from start of frame */
+                break;
+            }
+
+            rx_cmp = rxbuf_ptr[offset] << 8 | rxbuf_ptr[offset];
+            mask = extract32(cr0, 0, 16);
+
+            if ((rx_cmp & mask) == (extract32(cr0, 16, 16) & mask)) {
+                matched = true;
+            } else {
+                mismatched = true;
+            }
+        }
+
+        if (matched && !mismatched) {
+            return extract32(reg, GEM_ST2R_QUEUE_SHIFT, GEM_ST2R_QUEUE_WIDTH);
+        }
+    }
+
+    /* We made it here, assume it's queue 0 */
+    return 0;
+}
+
+static void gem_get_rx_desc(CadenceGEMState *s, int q)
+{
+    DB_PRINT("read descriptor 0x%x\n", (unsigned)s->rx_desc_addr[q]);
     /* read current descriptor */
-    cpu_physical_memory_read(s->rx_desc_addr,
-                             (uint8_t *)s->rx_desc, sizeof(s->rx_desc));
+    cpu_physical_memory_read(s->rx_desc_addr[0],
+                             (uint8_t *)s->rx_desc[0], sizeof(s->rx_desc[0]));
 
     /* Descriptor owned by software ? */
-    if (rx_desc_get_ownership(s->rx_desc) == 1) {
+    if (rx_desc_get_ownership(s->rx_desc[q]) == 1) {
         DB_PRINT("descriptor 0x%x owned by sw.\n",
-                 (unsigned)s->rx_desc_addr);
+                 (unsigned)s->rx_desc_addr[q]);
         s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF;
         s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]);
         /* Handle interrupt consequences */
@@ -632,6 +817,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
     uint8_t   *rxbuf_ptr;
     bool first_desc = true;
     int maf;
+    int q = 0;
 
     s = qemu_get_nic_opaque(nc);
 
@@ -710,6 +896,9 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 
     DB_PRINT("config bufsize: %d packet size: %ld\n", rxbufsize, size);
 
+    /* Find which queue we are targetting */
+    q = get_queue_from_screen(s, rxbuf_ptr, rxbufsize);
+
     while (bytes_to_copy) {
         /* Do nothing if receive is not enabled. */
         if (!gem_can_receive(nc)) {
@@ -718,56 +907,59 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size)
         }
 
         DB_PRINT("copy %d bytes to 0x%x\n", MIN(bytes_to_copy, rxbufsize),
-                rx_desc_get_buffer(s->rx_desc));
+                rx_desc_get_buffer(s->rx_desc[q]));
 
         /* Copy packet data to emulated DMA buffer */
-        cpu_physical_memory_write(rx_desc_get_buffer(s->rx_desc) + rxbuf_offset,
+        cpu_physical_memory_write(rx_desc_get_buffer(s->rx_desc[q]) +
+                                                                 rxbuf_offset,
                                   rxbuf_ptr, MIN(bytes_to_copy, rxbufsize));
         rxbuf_ptr += MIN(bytes_to_copy, rxbufsize);
         bytes_to_copy -= MIN(bytes_to_copy, rxbufsize);
 
         /* Update the descriptor.  */
         if (first_desc) {
-            rx_desc_set_sof(s->rx_desc);
+            rx_desc_set_sof(s->rx_desc[q]);
             first_desc = false;
         }
         if (bytes_to_copy == 0) {
-            rx_desc_set_eof(s->rx_desc);
-            rx_desc_set_length(s->rx_desc, size);
+            rx_desc_set_eof(s->rx_desc[q]);
+            rx_desc_set_length(s->rx_desc[q], size);
         }
-        rx_desc_set_ownership(s->rx_desc);
+        rx_desc_set_ownership(s->rx_desc[q]);
 
         switch (maf) {
         case GEM_RX_PROMISCUOUS_ACCEPT:
             break;
         case GEM_RX_BROADCAST_ACCEPT:
-            rx_desc_set_broadcast(s->rx_desc);
+            rx_desc_set_broadcast(s->rx_desc[q]);
             break;
         case GEM_RX_UNICAST_HASH_ACCEPT:
-            rx_desc_set_unicast_hash(s->rx_desc);
+            rx_desc_set_unicast_hash(s->rx_desc[q]);
             break;
         case GEM_RX_MULTICAST_HASH_ACCEPT:
-            rx_desc_set_multicast_hash(s->rx_desc);
+            rx_desc_set_multicast_hash(s->rx_desc[q]);
             break;
         case GEM_RX_REJECT:
             abort();
         default: /* SAR */
-            rx_desc_set_sar(s->rx_desc, maf);
+            rx_desc_set_sar(s->rx_desc[q], maf);
         }
 
         /* Descriptor write-back.  */
-        cpu_physical_memory_write(s->rx_desc_addr,
-                                  (uint8_t *)s->rx_desc, sizeof(s->rx_desc));
+        cpu_physical_memory_write(s->rx_desc_addr[q],
+                                  (uint8_t *)s->rx_desc[q],
+                                  sizeof(s->rx_desc[q]));
 
         /* Next descriptor */
-        if (rx_desc_get_wrap(s->rx_desc)) {
+        if (rx_desc_get_wrap(s->rx_desc[q])) {
             DB_PRINT("wrapping RX descriptor list\n");
-            s->rx_desc_addr = s->regs[GEM_RXQBASE];
+            s->rx_desc_addr[q] = s->regs[GEM_RXQBASE];
         } else {
             DB_PRINT("incrementing RX descriptor list\n");
-            s->rx_desc_addr += 8;
+            s->rx_desc_addr[q] += 8;
         }
-        gem_get_rx_desc(s);
+
+        gem_get_rx_desc(s, q);
     }
 
     /* Count it */
@@ -839,6 +1031,7 @@ static void gem_transmit(CadenceGEMState *s)
     uint8_t     tx_packet[2048];
     uint8_t     *p;
     unsigned    total_bytes;
+    int q = 0;
 
     /* Do nothing if transmit is not enabled. */
     if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_TXENA)) {
@@ -854,110 +1047,123 @@ static void gem_transmit(CadenceGEMState *s)
     p = tx_packet;
     total_bytes = 0;
 
-    /* read current descriptor */
-    packet_desc_addr = s->tx_desc_addr;
-
-    DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
-    cpu_physical_memory_read(packet_desc_addr,
-                             (uint8_t *)desc, sizeof(desc));
-    /* Handle all descriptors owned by hardware */
-    while (tx_desc_get_used(desc) == 0) {
-
-        /* Do nothing if transmit is not enabled. */
-        if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_TXENA)) {
-            return;
-        }
-        print_gem_tx_desc(desc);
-
-        /* The real hardware would eat this (and possibly crash).
-         * For QEMU let's lend a helping hand.
-         */
-        if ((tx_desc_get_buffer(desc) == 0) ||
-            (tx_desc_get_length(desc) == 0)) {
-            DB_PRINT("Invalid TX descriptor @ 0x%x\n",
-                     (unsigned)packet_desc_addr);
-            break;
-        }
-
-        if (tx_desc_get_length(desc) > sizeof(tx_packet) - (p - tx_packet)) {
-            DB_PRINT("TX descriptor @ 0x%x too large: size 0x%x space 0x%x\n",
-                     (unsigned)packet_desc_addr,
-                     (unsigned)tx_desc_get_length(desc),
-                     sizeof(tx_packet) - (p - tx_packet));
-            break;
-        }
+    for (q = s->num_priority_queues - 1; q >= 0; q--) {
+        /* read current descriptor */
+        packet_desc_addr = s->tx_desc_addr[q];
 
-        /* Gather this fragment of the packet from "dma memory" to our contig.
-         * buffer.
-         */
-        cpu_physical_memory_read(tx_desc_get_buffer(desc), p,
-                                 tx_desc_get_length(desc));
-        p += tx_desc_get_length(desc);
-        total_bytes += tx_desc_get_length(desc);
+        DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
+        cpu_physical_memory_read(packet_desc_addr,
+                                 (uint8_t *)desc, sizeof(desc));
+        /* Handle all descriptors owned by hardware */
+        while (tx_desc_get_used(desc) == 0) {
 
-        /* Last descriptor for this packet; hand the whole thing off */
-        if (tx_desc_get_last(desc)) {
-            unsigned    desc_first[2];
+            /* Do nothing if transmit is not enabled. */
+            if (!(s->regs[GEM_NWCTRL] & GEM_NWCTRL_TXENA)) {
+                return;
+            }
+            print_gem_tx_desc(desc, q);
 
-            /* Modify the 1st descriptor of this packet to be owned by
-             * the processor.
+            /* The real hardware would eat this (and possibly crash).
+             * For QEMU let's lend a helping hand.
              */
-            cpu_physical_memory_read(s->tx_desc_addr, (uint8_t *)desc_first,
-                                     sizeof(desc_first));
-            tx_desc_set_used(desc_first);
-            cpu_physical_memory_write(s->tx_desc_addr, (uint8_t *)desc_first,
-                                      sizeof(desc_first));
-            /* Advance the hardware current descriptor past this packet */
-            if (tx_desc_get_wrap(desc)) {
-                s->tx_desc_addr = s->regs[GEM_TXQBASE];
-            } else {
-                s->tx_desc_addr = packet_desc_addr + 8;
+            if ((tx_desc_get_buffer(desc) == 0) ||
+                (tx_desc_get_length(desc) == 0)) {
+                DB_PRINT("Invalid TX descriptor @ 0x%x\n",
+                         (unsigned)packet_desc_addr);
+                break;
             }
-            DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr);
-
-            s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL;
-            s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]);
-
-            /* Handle interrupt consequences */
-            gem_update_int_status(s);
 
-            /* Is checksum offload enabled? */
-            if (s->regs[GEM_DMACFG] & GEM_DMACFG_TXCSUM_OFFL) {
-                net_checksum_calculate(tx_packet, total_bytes);
+            if (tx_desc_get_length(desc) > sizeof(tx_packet) -
+                                               (p - tx_packet)) {
+                DB_PRINT("TX descriptor @ 0x%x too large: size 0x%x space " \
+                         "0x%x\n", (unsigned)packet_desc_addr,
+                         (unsigned)tx_desc_get_length(desc),
+                         sizeof(tx_packet) - (p - tx_packet));
+                break;
             }
 
-            /* Update MAC statistics */
-            gem_transmit_updatestats(s, tx_packet, total_bytes);
+            /* Gather this fragment of the packet from "dma memory" to our
+             * contig buffer.
+             */
+            cpu_physical_memory_read(tx_desc_get_buffer(desc), p,
+                                     tx_desc_get_length(desc));
+            p += tx_desc_get_length(desc);
+            total_bytes += tx_desc_get_length(desc);
+
+            /* Last descriptor for this packet; hand the whole thing off */
+            if (tx_desc_get_last(desc)) {
+                unsigned    desc_first[2];
+
+                /* Modify the 1st descriptor of this packet to be owned by
+                 * the processor.
+                 */
+                cpu_physical_memory_read(s->tx_desc_addr[q],
+                                         (uint8_t *)desc_first,
+                                         sizeof(desc_first));
+                tx_desc_set_used(desc_first);
+                cpu_physical_memory_write(s->tx_desc_addr[q],
+                                          (uint8_t *)desc_first,
+                                          sizeof(desc_first));
+                /* Advance the hardware current descriptor past this packet */
+                if (tx_desc_get_wrap(desc)) {
+                    s->tx_desc_addr[q] = s->regs[GEM_TXQBASE];
+                } else {
+                    s->tx_desc_addr[q] = packet_desc_addr + 8;
+                }
+                DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr[q]);
+
+                s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL;
+                s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]);
+
+                /* Update queue interrupt status */
+                if (s->num_priority_queues > 1) {
+                    s->regs[GEM_INT_Q1_STATUS + q] |=
+                            GEM_INT_TXCMPL & ~(s->regs[GEM_INT_Q1_MASK + q]);
+                }
+
+                /* Handle interrupt consequences */
+                gem_update_int_status(s);
+
+                /* Is checksum offload enabled? */
+                if (s->regs[GEM_DMACFG] & GEM_DMACFG_TXCSUM_OFFL) {
+                    net_checksum_calculate(tx_packet, total_bytes);
+                }
+
+                /* Update MAC statistics */
+                gem_transmit_updatestats(s, tx_packet, total_bytes);
+
+                /* Send the packet somewhere */
+                if (s->phy_loop || (s->regs[GEM_NWCTRL] &
+                                    GEM_NWCTRL_LOCALLOOP)) {
+                    gem_receive(qemu_get_queue(s->nic), tx_packet,
+                                total_bytes);
+                } else {
+                    qemu_send_packet(qemu_get_queue(s->nic), tx_packet,
+                                     total_bytes);
+                }
+
+                /* Prepare for next packet */
+                p = tx_packet;
+                total_bytes = 0;
+            }
 
-            /* Send the packet somewhere */
-            if (s->phy_loop || (s->regs[GEM_NWCTRL] & GEM_NWCTRL_LOCALLOOP)) {
-                gem_receive(qemu_get_queue(s->nic), tx_packet, total_bytes);
+            /* read next descriptor */
+            if (tx_desc_get_wrap(desc)) {
+                tx_desc_set_last(desc);
+                packet_desc_addr = s->regs[GEM_TXQBASE];
             } else {
-                qemu_send_packet(qemu_get_queue(s->nic), tx_packet,
-                                 total_bytes);
+                packet_desc_addr += 8;
             }
-
-            /* Prepare for next packet */
-            p = tx_packet;
-            total_bytes = 0;
+            DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
+            cpu_physical_memory_read(packet_desc_addr,
+                                     (uint8_t *)desc, sizeof(desc));
         }
 
-        /* read next descriptor */
-        if (tx_desc_get_wrap(desc)) {
-            tx_desc_set_last(desc);
-            packet_desc_addr = s->regs[GEM_TXQBASE];
-        } else {
-            packet_desc_addr += 8;
+        if (tx_desc_get_used(desc)) {
+            s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED;
+            s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]);
+            gem_update_int_status(s);
         }
-        DB_PRINT("read descriptor 0x%" HWADDR_PRIx "\n", packet_desc_addr);
-        cpu_physical_memory_read(packet_desc_addr,
-                                 (uint8_t *)desc, sizeof(desc));
-    }
-
-    if (tx_desc_get_used(desc)) {
-        s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED;
-        s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]);
-        gem_update_int_status(s);
     }
 }
 
@@ -1065,7 +1271,7 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
 {
     CadenceGEMState *s;
     uint32_t retval;
-
+    int i;
     s = (CadenceGEMState *)opaque;
 
     offset >>= 2;
@@ -1075,8 +1281,10 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
 
     switch (offset) {
     case GEM_ISR:
-        DB_PRINT("lowering irq on ISR read\n");
-        qemu_set_irq(s->irq, 0);
+        DB_PRINT("lowering irqs on ISR read\n");
+        for (i = 0; i < s->num_priority_queues; ++i) {
+            qemu_set_irq(s->irq[i], 0);
+        }
         break;
     case GEM_PHYMNTNC:
         if (retval & GEM_PHYMNTNC_OP_R) {
@@ -1101,6 +1309,7 @@ static uint64_t gem_read(void *opaque, hwaddr offset, unsigned size)
     retval &= ~(s->regs_wo[offset]);
 
     DB_PRINT("0x%08x\n", retval);
+    gem_update_int_status(s);
     return retval;
 }
 
@@ -1113,6 +1322,7 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
 {
     CadenceGEMState *s = (CadenceGEMState *)opaque;
     uint32_t readonly;
+    int i;
 
     DB_PRINT("offset: 0x%04x write: 0x%08x ", (unsigned)offset, (unsigned)val);
     offset >>= 2;
@@ -1132,14 +1342,18 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
     switch (offset) {
     case GEM_NWCTRL:
         if (val & GEM_NWCTRL_RXENA) {
-            gem_get_rx_desc(s);
+            for (i = 0; i < s->num_priority_queues; ++i) {
+                gem_get_rx_desc(s, i);
+            }
         }
         if (val & GEM_NWCTRL_TXSTART) {
             gem_transmit(s);
         }
         if (!(val & GEM_NWCTRL_TXENA)) {
             /* Reset to start of Q when transmit disabled. */
-            s->tx_desc_addr = s->regs[GEM_TXQBASE];
+            for (i = 0; i < s->num_priority_queues; i++) {
+                s->tx_desc_addr[i] = s->regs[GEM_TXQBASE];
+            }
         }
         if (gem_can_receive(qemu_get_queue(s->nic))) {
             qemu_flush_queued_packets(qemu_get_queue(s->nic));
@@ -1150,10 +1364,16 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
         gem_update_int_status(s);
         break;
     case GEM_RXQBASE:
-        s->rx_desc_addr = val;
+        s->rx_desc_addr[0] = val;
+        break;
+    case GEM_RECEIVE_Q1_PTR ... GEM_RECEIVE_Q7_PTR:
+        s->rx_desc_addr[offset - GEM_RECEIVE_Q1_PTR + 1] = val;
         break;
     case GEM_TXQBASE:
-        s->tx_desc_addr = val;
+        s->tx_desc_addr[0] = val;
+        break;
+    case GEM_TRANSMIT_Q1_PTR ... GEM_TRANSMIT_Q7_PTR:
+        s->tx_desc_addr[offset - GEM_TRANSMIT_Q1_PTR + 1] = val;
         break;
     case GEM_RXSTATUS:
         gem_update_int_status(s);
@@ -1162,10 +1382,18 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val,
         s->regs[GEM_IMR] &= ~val;
         gem_update_int_status(s);
         break;
+    case GEM_INT_Q1_ENABLE ... GEM_INT_Q7_ENABLE:
+        s->regs[GEM_INT_Q1_MASK + offset - GEM_INT_Q1_ENABLE] &= ~val;
+        gem_update_int_status(s);
+        break;
     case GEM_IDR:
         s->regs[GEM_IMR] |= val;
         gem_update_int_status(s);
         break;
+    case GEM_INT_Q1_DISABLE ... GEM_INT_Q7_DISABLE:
+        s->regs[GEM_INT_Q1_MASK + offset - GEM_INT_Q1_DISABLE] |= val;
+        gem_update_int_status(s);
+        break;
     case GEM_SPADDR1LO:
     case GEM_SPADDR2LO:
     case GEM_SPADDR3LO:
@@ -1202,8 +1430,11 @@ static const MemoryRegionOps gem_ops = {
 
 static void gem_set_link(NetClientState *nc)
 {
+    CadenceGEMState *s = qemu_get_nic_opaque(nc);
+
     DB_PRINT("\n");
-    phy_update_link(qemu_get_nic_opaque(nc));
+    phy_update_link(s);
+    gem_update_int_status(s);
 }
 
 static NetClientInfo net_gem_info = {
@@ -1214,36 +1445,62 @@ static NetClientInfo net_gem_info = {
     .link_status_changed = gem_set_link,
 };
 
-static int gem_init(SysBusDevice *sbd)
+static void gem_realize(DeviceState *dev, Error **errp)
 {
-    DeviceState *dev = DEVICE(sbd);
     CadenceGEMState *s = CADENCE_GEM(dev);
+    int i;
+
+    if (s->num_priority_queues == 0 ||
+        s->num_priority_queues > MAX_PRIORITY_QUEUES) {
+        error_setg(errp, "Invalid num-priority-queues value: %" PRIx8,
+                   s->num_priority_queues);
+        return;
+    } else if (s->num_type1_screeners > MAX_TYPE1_SCREENERS) {
+        error_setg(errp, "Invalid num-type1-screeners value: %" PRIx8,
+                   s->num_type1_screeners);
+        return;
+    } else if (s->num_type2_screeners > MAX_TYPE2_SCREENERS) {
+        error_setg(errp, "Invalid num-type2-screeners value: %" PRIx8,
+                   s->num_type2_screeners);
+        return;
+    }
+
+    for (i = 0; i < s->num_priority_queues; ++i) {
+        sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
+    }
+
+    qemu_macaddr_default_if_unset(&s->conf.macaddr);
+
+    s->nic = qemu_new_nic(&net_gem_info, &s->conf,
+                          object_get_typename(OBJECT(dev)), dev->id, s);
+}
+
+static void gem_init(Object *obj)
+{
+    CadenceGEMState *s = CADENCE_GEM(obj);
+    DeviceState *dev = DEVICE(obj);
 
     DB_PRINT("\n");
 
     gem_init_register_masks(s);
     memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s,
                           "enet", sizeof(s->regs));
-    sysbus_init_mmio(sbd, &s->iomem);
-    sysbus_init_irq(sbd, &s->irq);
-    qemu_macaddr_default_if_unset(&s->conf.macaddr);
 
-    s->nic = qemu_new_nic(&net_gem_info, &s->conf,
-            object_get_typename(OBJECT(dev)), dev->id, s);
-
-    return 0;
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
 }
 
 static const VMStateDescription vmstate_cadence_gem = {
     .name = "cadence_gem",
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 4,
+    .minimum_version_id = 4,
     .fields = (VMStateField[]) {
         VMSTATE_UINT32_ARRAY(regs, CadenceGEMState, CADENCE_GEM_MAXREG),
         VMSTATE_UINT16_ARRAY(phy_regs, CadenceGEMState, 32),
         VMSTATE_UINT8(phy_loop, CadenceGEMState),
-        VMSTATE_UINT32(rx_desc_addr, CadenceGEMState),
-        VMSTATE_UINT32(tx_desc_addr, CadenceGEMState),
+        VMSTATE_UINT32_ARRAY(rx_desc_addr, CadenceGEMState,
+                             MAX_PRIORITY_QUEUES),
+        VMSTATE_UINT32_ARRAY(tx_desc_addr, CadenceGEMState,
+                             MAX_PRIORITY_QUEUES),
         VMSTATE_BOOL_ARRAY(sar_active, CadenceGEMState, 4),
         VMSTATE_END_OF_LIST(),
     }
@@ -1251,15 +1508,20 @@ static const VMStateDescription vmstate_cadence_gem = {
 
 static Property gem_properties[] = {
     DEFINE_NIC_PROPERTIES(CadenceGEMState, conf),
+    DEFINE_PROP_UINT8("num-priority-queues", CadenceGEMState,
+                      num_priority_queues, 1),
+    DEFINE_PROP_UINT8("num-type1-screeners", CadenceGEMState,
+                      num_type1_screeners, 4),
+    DEFINE_PROP_UINT8("num-type2-screeners", CadenceGEMState,
+                      num_type2_screeners, 4),
     DEFINE_PROP_END_OF_LIST(),
 };
 
 static void gem_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
-    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
 
-    sdc->init = gem_init;
+    dc->realize = gem_realize;
     dc->props = gem_properties;
     dc->vmsd = &vmstate_cadence_gem;
     dc->reset = gem_reset;
@@ -1269,6 +1531,7 @@ static const TypeInfo gem_info = {
     .name  = TYPE_CADENCE_GEM,
     .parent = TYPE_SYS_BUS_DEVICE,
     .instance_size  = sizeof(CadenceGEMState),
+    .instance_init = gem_init,
     .class_init = gem_class_init,
 };
 
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index bad43f474e..4994e1ca00 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -400,7 +400,7 @@ static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address,
 
     if (range_covers_byte(address, len, PCI_COMMAND) &&
         (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
-        qemu_flush_queued_packets(qemu_get_queue(s->nic));
+        e1000e_start_recv(&s->core);
     }
 }
 
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index badb1feb7d..2b11499829 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -52,7 +52,7 @@
                                      second according to spec 10.2.4.2 */
 #define E1000E_MAX_TX_FRAGS (64)
 
-static void
+static inline void
 e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val);
 
 static inline void
@@ -953,7 +953,7 @@ e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
                          core->rx_desc_buf_size;
 }
 
-static inline void
+void
 e1000e_start_recv(E1000ECore *core)
 {
     int i;
@@ -1278,11 +1278,10 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc,
 
     struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc;
 
-    memset(d, 0, sizeof(*d));
-
     assert(!rss_info->enabled);
 
     d->length = cpu_to_le16(length);
+    d->csum = 0;
 
     e1000e_build_rx_metadata(core, pkt, pkt != NULL,
                              rss_info,
@@ -1291,6 +1290,7 @@ e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc,
                              &d->special);
     d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
     d->status = (uint8_t) le32_to_cpu(status_flags);
+    d->special = 0;
 }
 
 static inline void
@@ -1301,7 +1301,7 @@ e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc,
 {
     union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc;
 
-    memset(d, 0, sizeof(*d));
+    memset(&d->wb, 0, sizeof(d->wb));
 
     d->wb.upper.length = cpu_to_le16(length);
 
@@ -1325,7 +1325,7 @@ e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc,
     union e1000_rx_desc_packet_split *d =
         (union e1000_rx_desc_packet_split *) desc;
 
-    memset(d, 0, sizeof(*d));
+    memset(&d->wb, 0, sizeof(d->wb));
 
     d->wb.middle.length0 = cpu_to_le16((*written)[0]);
 
@@ -1710,7 +1710,8 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
         }
 
         /* Perform ACK receive detection */
-        if (e1000e_is_tcp_ack(core, core->rx_pkt)) {
+        if  (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) &&
+             (e1000e_is_tcp_ack(core, core->rx_pkt))) {
             n |= E1000_ICS_ACK;
         }
 
@@ -1807,6 +1808,7 @@ e1000e_core_set_link_status(E1000ECore *core)
                                    core->autoneg_timer);
         } else {
             e1000x_update_regs_on_link_up(core->mac, core->phy[0]);
+            e1000e_start_recv(core);
         }
     }
 
@@ -2007,19 +2009,23 @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
     }
 
     if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) {
-        trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause);
-        e1000e_clear_ims_bits(core, core->mac[IAM] & cause);
+        trace_e1000e_irq_iam_clear_eiame(core->mac[IAM], cause);
+        core->mac[IAM] &= ~cause;
     }
 
     trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]);
 
-    if (core->mac[EIAC] & E1000_ICR_OTHER) {
-        effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) |
-                         E1000_ICR_OTHER_CAUSES;
-    } else {
-        effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK;
+    effective_eiac = core->mac[EIAC] & cause;
+
+    if (effective_eiac == E1000_ICR_OTHER) {
+        effective_eiac |= E1000_ICR_OTHER_CAUSES;
     }
+
     core->mac[ICR] &= ~effective_eiac;
+
+    if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
+        core->mac[IMS] &= ~effective_eiac;
+    }
 }
 
 static void
@@ -2130,7 +2136,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
 
     /* Set ICR[OTHER] for MSI-X */
     if (is_msix) {
-        if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) {
+        if (core->mac[ICR] & E1000_ICR_OTHER_CAUSES) {
             core->mac[ICR] |= E1000_ICR_OTHER;
             trace_e1000e_irq_add_msi_other(core->mac[ICR]);
         }
@@ -2168,7 +2174,7 @@ e1000e_update_interrupt_state(E1000ECore *core)
     }
 }
 
-static inline void
+static void
 e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
 {
     trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
@@ -2187,6 +2193,8 @@ e1000e_autoneg_timer(void *opaque)
     E1000ECore *core = opaque;
     if (!qemu_get_queue(core->owner_nic)->link_down) {
         e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]);
+        e1000e_start_recv(core);
+
         e1000e_update_flowctl_status(core);
         /* signal link status change to the guest */
         e1000e_set_interrupt_cause(core, E1000_ICR_LSC);
@@ -2344,7 +2352,7 @@ e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val)
 
     core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK;
 
-    if (msix_enabled(core->owner)) {
+    if (!msix_enabled(core->owner)) {
         return;
     }
 
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
index 5f413a9e08..1ff6978ca1 100644
--- a/hw/net/e1000e_core.h
+++ b/hw/net/e1000e_core.h
@@ -144,3 +144,6 @@ e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size);
 
 ssize_t
 e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt);
+
+void
+e1000e_start_recv(E1000ECore *core);
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index bab4dbfc98..4bf71f2d85 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -1843,6 +1843,7 @@ static void pci_nic_uninit(PCIDevice *pci_dev)
     EEPRO100State *s = DO_UPCAST(EEPRO100State, dev, pci_dev);
 
     vmstate_unregister(&pci_dev->qdev, s->vmstate, s);
+    g_free(s->vmstate);
     eeprom93xx_free(&pci_dev->qdev, s->eeprom);
     qemu_del_nic(s->nic);
 }
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index b5c777fbf6..fadf9c8faf 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -348,8 +348,8 @@ static ssize_t etsec_receive(NetClientState *nc,
     eTSEC *etsec = qemu_get_nic_opaque(nc);
 
 #if defined(HEX_DUMP)
-    fprintf(stderr, "%s receive size:%d\n", etsec->nic->nc.name, size);
-    qemu_hexdump(buf, stderr, "", size);
+    fprintf(stderr, "%s receive size:%zd\n", nc->name, size);
+    qemu_hexdump((void *)buf, stderr, "", size);
 #endif
     /* Flush is unnecessary as are already in receiving path */
     etsec->need_flush = false;
@@ -387,7 +387,7 @@ static void etsec_realize(DeviceState *dev, Error **errp)
 
 
     etsec->bh     = qemu_bh_new(etsec_timer_hit, etsec);
-    etsec->ptimer = ptimer_init(etsec->bh);
+    etsec->ptimer = ptimer_init(etsec->bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(etsec->ptimer, 100);
 }
 
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 79d2f14dd8..54c01275d4 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -474,6 +474,14 @@ static void rx_init_frame(eTSEC *etsec, const uint8_t *buf, size_t size)
     /* CRC padding (We don't have to compute the CRC) */
     etsec->rx_padding = 4;
 
+    /*
+     * Ensure that payload length + CRC length is at least 802.3
+     * minimum MTU size bytes long (64)
+     */
+    if (etsec->rx_buffer_len < 60) {
+        etsec->rx_padding += 60 - etsec->rx_buffer_len;
+    }
+
     etsec->rx_first_in_frame = 1;
     etsec->rx_remaining_data = etsec->rx_buffer_len;
     RING_DEBUG("%s: rx_buffer_len:%u rx_padding+crc:%u\n", __func__,
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 1c415ab3b1..50c75642c6 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -429,7 +429,7 @@ static void imx_fec_do_tx(IMXFECState *s)
         frame_size += len;
         if (bd.flags & ENET_BD_L) {
             /* Last buffer in frame.  */
-            qemu_send_packet(qemu_get_queue(s->nic), frame, len);
+            qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
             ptr = frame;
             frame_size = 0;
             s->regs[ENET_EIR] |= ENET_INT_TXF;
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index 4615d873b1..3db8937cac 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -1345,7 +1345,7 @@ static int lan9118_init1(SysBusDevice *sbd)
     s->txp = &s->tx_packet;
 
     bh = qemu_bh_new(lan9118_tick, s);
-    s->timer = ptimer_init(bh);
+    s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(s->timer, 10000);
     ptimer_set_limit(s->timer, 0xffff, 1);
 
diff --git a/hw/net/mcf_fec.c b/hw/net/mcf_fec.c
index 0ee8ad9d66..4025eb3b33 100644
--- a/hw/net/mcf_fec.c
+++ b/hw/net/mcf_fec.c
@@ -23,6 +23,7 @@ do { printf("mcf_fec: " fmt , ## __VA_ARGS__); } while (0)
 #define DPRINTF(fmt, ...) do {} while(0)
 #endif
 
+#define FEC_MAX_DESC 1024
 #define FEC_MAX_FRAME_SIZE 2032
 
 typedef struct {
@@ -149,7 +150,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
     uint32_t addr;
     mcf_fec_bd bd;
     int frame_size;
-    int len;
+    int len, descnt = 0;
     uint8_t frame[FEC_MAX_FRAME_SIZE];
     uint8_t *ptr;
 
@@ -157,7 +158,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
     ptr = frame;
     frame_size = 0;
     addr = s->tx_descriptor;
-    while (1) {
+    while (descnt++ < FEC_MAX_DESC) {
         mcf_fec_read_bd(&bd, addr);
         DPRINTF("tx_bd %x flags %04x len %d data %08x\n",
                 addr, bd.flags, bd.length, bd.data);
@@ -176,7 +177,7 @@ static void mcf_fec_do_tx(mcf_fec_state *s)
         if (bd.flags & FEC_BD_L) {
             /* Last buffer in frame.  */
             DPRINTF("Sending packet\n");
-            qemu_send_packet(qemu_get_queue(s->nic), frame, len);
+            qemu_send_packet(qemu_get_queue(s->nic), frame, frame_size);
             ptr = frame;
             frame_size = 0;
             s->eir |= FEC_INT_TXF;
@@ -392,7 +393,7 @@ static void mcf_fec_write(void *opaque, hwaddr addr,
         s->tx_descriptor = s->etdsr;
         break;
     case 0x188:
-        s->emrbr = value & 0x7f0;
+        s->emrbr = value > 0 ? value & 0x7F0 : 0x7F0;
         break;
     default:
         hw_error("mcf_fec_write Bad address 0x%x\n", (int)addr);
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
index 198a01f92d..654455355f 100644
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -302,7 +302,7 @@ static inline void pcnet_tmd_load(PCNetState *s, struct pcnet_TMD *tmd,
             uint32_t tbadr;
             int16_t length;
             int16_t status;
-	} xda;
+        } xda;
         s->phys_mem_read(s->dma_opaque, addr, (void *)&xda, sizeof(xda), 0);
         tmd->tbadr = le32_to_cpu(xda.tbadr) & 0xffffff;
         tmd->length = le16_to_cpu(xda.length);
@@ -664,7 +664,9 @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size)
 
 static inline hwaddr pcnet_rdra_addr(PCNetState *s, int idx)
 {
-    while (idx < 1) idx += CSR_RCVRL(s);
+    while (idx < 1) {
+        idx += CSR_RCVRL(s);
+    }
     return s->rdra + ((CSR_RCVRL(s) - idx) * (BCR_SWSTYLE(s) ? 16 : 8));
 }
 
@@ -672,8 +674,10 @@ static inline int64_t pcnet_get_next_poll_time(PCNetState *s, int64_t current_ti
 {
     int64_t next_time = current_time +
                         (65536 - (CSR_SPND(s) ? 0 : CSR_POLL(s))) * 30;
-    if (next_time <= current_time)
+
+    if (next_time <= current_time) {
         next_time = current_time + 1;
+    }
     return next_time;
 }
 
@@ -795,13 +799,13 @@ static void pcnet_init(PCNetState *s)
         mode = le16_to_cpu(initblk.mode);
         rlen = initblk.rlen >> 4;
         tlen = initblk.tlen >> 4;
-	ladrf[0] = le16_to_cpu(initblk.ladrf[0]);
-	ladrf[1] = le16_to_cpu(initblk.ladrf[1]);
-	ladrf[2] = le16_to_cpu(initblk.ladrf[2]);
-	ladrf[3] = le16_to_cpu(initblk.ladrf[3]);
-	padr[0] = le16_to_cpu(initblk.padr[0]);
-	padr[1] = le16_to_cpu(initblk.padr[1]);
-	padr[2] = le16_to_cpu(initblk.padr[2]);
+        ladrf[0] = le16_to_cpu(initblk.ladrf[0]);
+        ladrf[1] = le16_to_cpu(initblk.ladrf[1]);
+        ladrf[2] = le16_to_cpu(initblk.ladrf[2]);
+        ladrf[3] = le16_to_cpu(initblk.ladrf[3]);
+        padr[0] = le16_to_cpu(initblk.padr[0]);
+        padr[1] = le16_to_cpu(initblk.padr[1]);
+        padr[2] = le16_to_cpu(initblk.padr[2]);
         rdra = le32_to_cpu(initblk.rdra);
         tdra = le32_to_cpu(initblk.tdra);
     } else {
@@ -809,13 +813,13 @@ static void pcnet_init(PCNetState *s)
         s->phys_mem_read(s->dma_opaque, PHYSADDR(s,CSR_IADR(s)),
                 (uint8_t *)&initblk, sizeof(initblk), 0);
         mode = le16_to_cpu(initblk.mode);
-	ladrf[0] = le16_to_cpu(initblk.ladrf[0]);
-	ladrf[1] = le16_to_cpu(initblk.ladrf[1]);
-	ladrf[2] = le16_to_cpu(initblk.ladrf[2]);
-	ladrf[3] = le16_to_cpu(initblk.ladrf[3]);
-	padr[0] = le16_to_cpu(initblk.padr[0]);
-	padr[1] = le16_to_cpu(initblk.padr[1]);
-	padr[2] = le16_to_cpu(initblk.padr[2]);
+        ladrf[0] = le16_to_cpu(initblk.ladrf[0]);
+        ladrf[1] = le16_to_cpu(initblk.ladrf[1]);
+        ladrf[2] = le16_to_cpu(initblk.ladrf[2]);
+        ladrf[3] = le16_to_cpu(initblk.ladrf[3]);
+        padr[0] = le16_to_cpu(initblk.padr[0]);
+        padr[1] = le16_to_cpu(initblk.padr[1]);
+        padr[2] = le16_to_cpu(initblk.padr[2]);
         rdra = le32_to_cpu(initblk.rdra);
         tdra = le32_to_cpu(initblk.tdra);
         rlen = rdra >> 29;
@@ -858,12 +862,12 @@ static void pcnet_start(PCNetState *s)
     printf("pcnet_start\n");
 #endif
 
-    if (!CSR_DTX(s))
+    if (!CSR_DTX(s)) {
         s->csr[0] |= 0x0010;    /* set TXON */
-
-    if (!CSR_DRX(s))
+    }
+    if (!CSR_DRX(s)) {
         s->csr[0] |= 0x0020;    /* set RXON */
-
+    }
     s->csr[0] &= ~0x0004;       /* clear STOP bit */
     s->csr[0] |= 0x0002;
     pcnet_poll_timer(s);
@@ -925,8 +929,7 @@ static void pcnet_rdte_poll(PCNetState *s)
                        crda);
             }
         } else {
-            printf("pcnet: BAD RMD RDA=0x" TARGET_FMT_plx "\n",
-                   crda);
+            printf("pcnet: BAD RMD RDA=0x" TARGET_FMT_plx "\n", crda);
 #endif
         }
     }
@@ -1168,10 +1171,11 @@ ssize_t pcnet_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
 #endif
 
             while (pktcount--) {
-                if (CSR_RCVRC(s) <= 1)
+                if (CSR_RCVRC(s) <= 1) {
                     CSR_RCVRC(s) = CSR_RCVRL(s);
-                else
+                } else {
                     CSR_RCVRC(s)--;
+                }
             }
 
             pcnet_rdte_poll(s);
@@ -1207,7 +1211,7 @@ static void pcnet_transmit(PCNetState *s)
 
     s->tx_busy = 1;
 
-    txagain:
+txagain:
     if (pcnet_tdte_poll(s)) {
         struct pcnet_TMD tmd;
 
@@ -1251,7 +1255,7 @@ static void pcnet_transmit(PCNetState *s)
         s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr),
                          s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s));
         s->xmit_pos += bcnt;
-        
+
         if (!GET_FIELD(tmd.status, TMDS, ENP)) {
             goto txdone;
         }
@@ -1276,21 +1280,22 @@ static void pcnet_transmit(PCNetState *s)
         s->csr[4] |= 0x0004;    /* set TXSTRT */
         s->xmit_pos = -1;
 
-    txdone:
+txdone:
         SET_FIELD(&tmd.status, TMDS, OWN, 0);
         TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s)));
-        if (!CSR_TOKINTD(s) || (CSR_LTINTEN(s) && GET_FIELD(tmd.status, TMDS, LTINT)))
+        if (!CSR_TOKINTD(s)
+            || (CSR_LTINTEN(s) && GET_FIELD(tmd.status, TMDS, LTINT))) {
             s->csr[0] |= 0x0200;    /* set TINT */
-
-        if (CSR_XMTRC(s)<=1)
+        }
+        if (CSR_XMTRC(s) <= 1) {
             CSR_XMTRC(s) = CSR_XMTRL(s);
-        else
+        } else {
             CSR_XMTRC(s)--;
-        if (count--)
+        }
+        if (count--) {
             goto txagain;
-
-    } else
-    if (s->xmit_pos >= 0) {
+        }
+    } else if (s->xmit_pos >= 0) {
         struct pcnet_TMD tmd;
         TMDLOAD(&tmd, xmit_cxda);
         SET_FIELD(&tmd.misc, TMDM, BUFF, 1);
@@ -1301,9 +1306,9 @@ static void pcnet_transmit(PCNetState *s)
         s->csr[0] |= 0x0200;    /* set TINT */
         if (!CSR_DXSUFLO(s)) {
             s->csr[0] &= ~0x0010;
-        } else
-        if (count--)
-          goto txagain;
+        } else if (count--) {
+            goto txagain;
+        }
     }
 
     s->tx_busy = 0;
@@ -1315,13 +1320,11 @@ static void pcnet_poll(PCNetState *s)
         pcnet_rdte_poll(s);
     }
 
-    if (CSR_TDMD(s) ||
-        (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s)))
-    {
+    if (CSR_TDMD(s) || (CSR_TXON(s) && !CSR_DPOLL(s) && pcnet_tdte_poll(s))) {
         /* prevent recursion */
-        if (s->tx_busy)
+        if (s->tx_busy) {
             return;
-
+        }
         pcnet_transmit(s);
     }
 }
@@ -1340,15 +1343,16 @@ static void pcnet_poll_timer(void *opaque)
 
     if (!CSR_STOP(s) && !CSR_SPND(s) && !CSR_DPOLL(s)) {
         uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) * 33;
-        if (!s->timer || !now)
+        if (!s->timer || !now) {
             s->timer = now;
-        else {
+        } else {
             uint64_t t = now - s->timer + CSR_POLL(s);
             if (t > 0xffffLL) {
                 pcnet_poll(s);
                 CSR_POLL(s) = CSR_PINT(s);
-            } else
+            } else {
                 CSR_POLL(s) = t;
+            }
         }
         timer_mod(s->poll_timer,
             pcnet_get_next_poll_time(s,qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)));
@@ -1371,21 +1375,21 @@ static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value)
         val = (val & 0x007f) | (s->csr[0] & 0x7f00);
 
         /* IFF STOP, STRT and INIT are set, clear STRT and INIT */
-        if ((val&7) == 7)
-          val &= ~3;
-
-        if (!CSR_STOP(s) && (val & 4))
+        if ((val & 7) == 7) {
+            val &= ~3;
+        }
+        if (!CSR_STOP(s) && (val & 4)) {
             pcnet_stop(s);
-
-        if (!CSR_INIT(s) && (val & 1))
+        }
+        if (!CSR_INIT(s) && (val & 1)) {
             pcnet_init(s);
-
-        if (!CSR_STRT(s) && (val & 2))
+        }
+        if (!CSR_STRT(s) && (val & 2)) {
             pcnet_start(s);
-
-        if (CSR_TDMD(s))
+        }
+        if (CSR_TDMD(s)) {
             pcnet_transmit(s);
-
+        }
         return;
     case 1:
     case 2:
@@ -1429,12 +1433,16 @@ static void pcnet_csr_writew(PCNetState *s, uint32_t rap, uint32_t new_value)
     case 47: /* POLLINT */
     case 72:
     case 74:
+        break;
     case 76: /* RCVRL */
     case 78: /* XMTRL */
+        val = (val > 0) ? val : 512;
+        break;
     case 112:
-       if (CSR_STOP(s) || CSR_SPND(s))
-           break;
-       return;
+        if (CSR_STOP(s) || CSR_SPND(s)) {
+            break;
+        }
+        return;
     case 3:
         break;
     case 4:
@@ -1651,8 +1659,7 @@ void pcnet_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
             pcnet_bcr_writew(s, s->rap, val & 0xffff);
             break;
         }
-    } else
-    if ((addr & 0x0f) == 0) {
+    } else if ((addr & 0x0f) == 0) {
         /* switch device to dword i/o mode */
         pcnet_bcr_writew(s, BCR_BSBC, pcnet_bcr_readw(s, BCR_BSBC) | 0x0080);
 #ifdef PCNET_DEBUG_IO
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 30f2ce417b..e9d215aa4d 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -860,7 +860,7 @@ static void rocker_io_writel(void *opaque, hwaddr addr, uint32_t val)
         rocker_msix_irq(r, val);
         break;
     case ROCKER_TEST_DMA_SIZE:
-        r->test_dma_size = val;
+        r->test_dma_size = val & 0xFFFF;
         break;
     case ROCKER_TEST_DMA_ADDR + 4:
         r->test_dma_addr = ((uint64_t)val) << 32 | r->lower32;
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 3345bc6b5e..f05e59c85f 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -2350,7 +2350,7 @@ static void rtl8139_cplus_transmit(RTL8139State *s)
 {
     int txcount = 0;
 
-    while (rtl8139_cplus_transmit_one(s))
+    while (txcount < 64 && rtl8139_cplus_transmit_one(s))
     {
         ++txcount;
     }
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index b273eda933..01ecb02773 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -34,20 +34,13 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "sysemu/sysemu.h"
+#include "trace.h"
 
 #include <libfdt.h>
 
 #define ETH_ALEN        6
 #define MAX_PACKET_SIZE 65536
 
-/*#define DEBUG*/
-
-#ifdef DEBUG
-#define DPRINTF(fmt...) do { fprintf(stderr, fmt); } while (0)
-#else
-#define DPRINTF(fmt...)
-#endif
-
 /* Compatibility flags for migration */
 #define SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT  0
 #define SPAPRVLAN_FLAG_RX_BUF_POOLS      (1 << SPAPRVLAN_FLAG_RX_BUF_POOLS_BIT)
@@ -106,6 +99,7 @@ typedef struct VIOsPAPRVLANDevice {
     VIOsPAPRDevice sdev;
     NICConf nicconf;
     NICState *nic;
+    MACAddr perm_mac;
     bool isopen;
     hwaddr buf_list;
     uint32_t add_buf_ptr, use_buf_ptr, rx_bufs;
@@ -157,8 +151,10 @@ static vlan_bd_t spapr_vlan_get_rx_bd_from_pool(VIOsPAPRVLANDevice *dev,
         return 0;
     }
 
-    DPRINTF("Found buffer: pool=%d count=%d rxbufs=%d\n", pool,
-            dev->rx_pool[pool]->count, dev->rx_bufs);
+
+    trace_spapr_vlan_get_rx_bd_from_pool_found(pool,
+                                               dev->rx_pool[pool]->count,
+                                               dev->rx_bufs);
 
     /* Remove the buffer from the pool */
     dev->rx_pool[pool]->count--;
@@ -185,8 +181,8 @@ static vlan_bd_t spapr_vlan_get_rx_bd_from_page(VIOsPAPRVLANDevice *dev,
         }
 
         bd = vio_ldq(&dev->sdev, dev->buf_list + buf_ptr);
-        DPRINTF("use_buf_ptr=%d bd=0x%016llx\n",
-                buf_ptr, (unsigned long long)bd);
+
+        trace_spapr_vlan_get_rx_bd_from_page(buf_ptr, (uint64_t)bd);
     } while ((!(bd & VLAN_BD_VALID) || VLAN_BD_LEN(bd) < size + 8)
              && buf_ptr != dev->use_buf_ptr);
 
@@ -199,7 +195,7 @@ static vlan_bd_t spapr_vlan_get_rx_bd_from_page(VIOsPAPRVLANDevice *dev,
     dev->use_buf_ptr = buf_ptr;
     vio_stq(&dev->sdev, dev->buf_list + dev->use_buf_ptr, 0);
 
-    DPRINTF("Found buffer: ptr=%d rxbufs=%d\n", dev->use_buf_ptr, dev->rx_bufs);
+    trace_spapr_vlan_get_rx_bd_from_page_found(dev->use_buf_ptr, dev->rx_bufs);
 
     return bd;
 }
@@ -214,8 +210,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
     uint64_t handle;
     uint8_t control;
 
-    DPRINTF("spapr_vlan_receive() [%s] rx_bufs=%d\n", sdev->qdev.id,
-            dev->rx_bufs);
+    trace_spapr_vlan_receive(sdev->qdev.id, dev->rx_bufs);
 
     if (!dev->isopen) {
         return -1;
@@ -243,7 +238,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
         return -1;
     }
 
-    DPRINTF("spapr_vlan_receive: DMA write completed\n");
+    trace_spapr_vlan_receive_dma_completed();
 
     /* Update the receive queue */
     control = VLAN_RXQC_TOGGLE | VLAN_RXQC_VALID;
@@ -257,12 +252,11 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
     vio_sth(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr + 2, 8);
     vio_stb(sdev, VLAN_BD_ADDR(rxq_bd) + dev->rxq_ptr, control);
 
-    DPRINTF("wrote rxq entry (ptr=0x%llx): 0x%016llx 0x%016llx\n",
-            (unsigned long long)dev->rxq_ptr,
-            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
-                                        dev->rxq_ptr),
-            (unsigned long long)vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
-                                        dev->rxq_ptr + 8));
+    trace_spapr_vlan_receive_wrote(dev->rxq_ptr,
+                                   vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
+                                                 dev->rxq_ptr),
+                                   vio_ldq(sdev, VLAN_BD_ADDR(rxq_bd) +
+                                                 dev->rxq_ptr + 8));
 
     dev->rxq_ptr += 16;
     if (dev->rxq_ptr >= VLAN_BD_LEN(rxq_bd)) {
@@ -316,6 +310,10 @@ static void spapr_vlan_reset(VIOsPAPRDevice *sdev)
             spapr_vlan_reset_rx_pool(dev->rx_pool[i]);
         }
     }
+
+    memcpy(&dev->nicconf.macaddr.a, &dev->perm_mac.a,
+           sizeof(dev->nicconf.macaddr.a));
+    qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);
 }
 
 static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp)
@@ -324,6 +322,8 @@ static void spapr_vlan_realize(VIOsPAPRDevice *sdev, Error **errp)
 
     qemu_macaddr_default_if_unset(&dev->nicconf.macaddr);
 
+    memcpy(&dev->perm_mac.a, &dev->nicconf.macaddr.a, sizeof(dev->perm_mac.a));
+
     dev->nic = qemu_new_nic(&net_spapr_vlan_info, &dev->nicconf,
                             object_get_typename(OBJECT(sdev)), sdev->qdev.id, dev);
     qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);
@@ -573,8 +573,8 @@ static target_long spapr_vlan_add_rxbuf_to_pool(VIOsPAPRVLANDevice *dev,
                 qsort(dev->rx_pool, RX_MAX_POOLS, sizeof(dev->rx_pool[0]),
                       rx_pool_size_compare);
                 pool = spapr_vlan_get_rx_pool_id(dev, size);
-                DPRINTF("created RX pool %d for size %lld\n", pool,
-                        VLAN_BD_LEN(buf));
+                trace_spapr_vlan_add_rxbuf_to_pool_create(pool,
+                                                          VLAN_BD_LEN(buf));
                 break;
             }
         }
@@ -584,8 +584,8 @@ static target_long spapr_vlan_add_rxbuf_to_pool(VIOsPAPRVLANDevice *dev,
         return H_RESOURCE;
     }
 
-    DPRINTF("h_add_llan_buf():  Add buf using pool %i (size %lli, count=%i)\n",
-            pool, VLAN_BD_LEN(buf), dev->rx_pool[pool]->count);
+    trace_spapr_vlan_add_rxbuf_to_pool(pool, VLAN_BD_LEN(buf),
+                                       dev->rx_pool[pool]->count);
 
     dev->rx_pool[pool]->bds[dev->rx_pool[pool]->count++] = buf;
 
@@ -616,8 +616,7 @@ static target_long spapr_vlan_add_rxbuf_to_page(VIOsPAPRVLANDevice *dev,
 
     vio_stq(&dev->sdev, dev->buf_list + dev->add_buf_ptr, buf);
 
-    DPRINTF("h_add_llan_buf():  Added buf  ptr=%d  rx_bufs=%d bd=0x%016llx\n",
-            dev->add_buf_ptr, dev->rx_bufs, (unsigned long long)buf);
+    trace_spapr_vlan_add_rxbuf_to_page(dev->add_buf_ptr, dev->rx_bufs, buf);
 
     return 0;
 }
@@ -633,8 +632,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
     VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev);
     target_long ret;
 
-    DPRINTF("H_ADD_LOGICAL_LAN_BUFFER(0x" TARGET_FMT_lx
-            ", 0x" TARGET_FMT_lx ")\n", reg, buf);
+    trace_spapr_vlan_h_add_logical_lan_buffer(reg, buf);
 
     if (!sdev) {
         hcall_dprintf("Bad device\n");
@@ -687,14 +685,13 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu,
     int i, nbufs;
     int ret;
 
-    DPRINTF("H_SEND_LOGICAL_LAN(0x" TARGET_FMT_lx ", <bufs>, 0x"
-            TARGET_FMT_lx ")\n", reg, continue_token);
+    trace_spapr_vlan_h_send_logical_lan(reg, continue_token);
 
     if (!sdev) {
         return H_PARAMETER;
     }
 
-    DPRINTF("rxbufs = %d\n", dev->rx_bufs);
+    trace_spapr_vlan_h_send_logical_lan_rxbufs(dev->rx_bufs);
 
     if (!dev->isopen) {
         return H_DROPPED;
@@ -706,7 +703,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu,
 
     total_len = 0;
     for (i = 0; i < 6; i++) {
-        DPRINTF("   buf desc: 0x" TARGET_FMT_lx "\n", bufs[i]);
+        trace_spapr_vlan_h_send_logical_lan_buf_desc(bufs[i]);
         if (!(bufs[i] & VLAN_BD_VALID)) {
             break;
         }
@@ -714,8 +711,7 @@ static target_ulong h_send_logical_lan(PowerPCCPU *cpu,
     }
 
     nbufs = i;
-    DPRINTF("h_send_logical_lan() %d buffers, total length 0x%x\n",
-            nbufs, total_len);
+    trace_spapr_vlan_h_send_logical_lan_total(nbufs, total_len);
 
     if (total_len == 0) {
         return H_SUCCESS;
@@ -756,6 +752,27 @@ static target_ulong h_multicast_ctrl(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return H_SUCCESS;
 }
 
+static target_ulong h_change_logical_lan_mac(PowerPCCPU *cpu,
+                                             sPAPRMachineState *spapr,
+                                             target_ulong opcode,
+                                             target_ulong *args)
+{
+    target_ulong reg = args[0];
+    target_ulong macaddr = args[1];
+    VIOsPAPRDevice *sdev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+    VIOsPAPRVLANDevice *dev = VIO_SPAPR_VLAN_DEVICE(sdev);
+    int i;
+
+    for (i = 0; i < ETH_ALEN; i++) {
+        dev->nicconf.macaddr.a[ETH_ALEN - i - 1] = macaddr & 0xff;
+        macaddr >>= 8;
+    }
+
+    qemu_format_nic_info_str(qemu_get_queue(dev->nic), dev->nicconf.macaddr.a);
+
+    return H_SUCCESS;
+}
+
 static Property spapr_vlan_properties[] = {
     DEFINE_SPAPR_PROPERTIES(VIOsPAPRVLANDevice, sdev),
     DEFINE_NIC_PROPERTIES(VIOsPAPRVLANDevice, nicconf),
@@ -854,6 +871,8 @@ static void spapr_vlan_register_types(void)
     spapr_register_hypercall(H_ADD_LOGICAL_LAN_BUFFER,
                              h_add_logical_lan_buffer);
     spapr_register_hypercall(H_MULTICAST_CTRL, h_multicast_ctrl);
+    spapr_register_hypercall(H_CHANGE_LOGICAL_LAN_MAC,
+                             h_change_logical_lan_mac);
     type_register_static(&spapr_vlan_info);
 }
 
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 8d38d7724d..1a5c909939 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -223,7 +223,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
 e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
 e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
 e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
-e1000e_irq_ims_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
+e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
 e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits due to EIAC, ICR: 0x%X, EIAC: 0x%X"
 e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC write 0x%x"
 e1000e_irq_fire_delayed_interrupts(void) "Firing delayed interrupts"
@@ -270,3 +270,19 @@ e1000e_cfg_support_virtio(bool support) "Virtio header supported: %d"
 
 e1000e_vm_state_running(void) "VM state is running"
 e1000e_vm_state_stopped(void) "VM state is stopped"
+
+# hw/net/spapr_llan.c
+spapr_vlan_get_rx_bd_from_pool_found(int pool, int32_t count, uint32_t rx_bufs) "pool=%d count=%"PRId32" rxbufs=%"PRIu32
+spapr_vlan_get_rx_bd_from_page(int buf_ptr, uint64_t bd) "use_buf_ptr=%d bd=0x%016"PRIx64
+spapr_vlan_get_rx_bd_from_page_found(uint32_t use_buf_ptr, uint32_t rx_bufs) "ptr=%"PRIu32" rxbufs=%"PRIu32
+spapr_vlan_receive(const char *id, uint32_t rx_bufs) "[%s] rx_bufs=%"PRIu32
+spapr_vlan_receive_dma_completed(void) "DMA write completed"
+spapr_vlan_receive_wrote(uint64_t ptr, uint64_t hi, uint64_t lo) "rxq entry (ptr=0x%"PRIx64"): 0x%016"PRIx64" 0x%016"PRIx64
+spapr_vlan_add_rxbuf_to_pool_create(int pool, uint64_t len) "created RX pool %d for size %"PRIu64
+spapr_vlan_add_rxbuf_to_pool(int pool, uint64_t len, int32_t count) "add buf using pool %d (size %"PRIu64", count=%"PRId32")"
+spapr_vlan_add_rxbuf_to_page(uint32_t ptr, uint32_t rx_bufs, uint64_t bd) "added buf ptr=%"PRIu32"  rx_bufs=%"PRIu32" bd=0x%016"PRIx64
+spapr_vlan_h_add_logical_lan_buffer(uint64_t reg, uint64_t buf) "H_ADD_LOGICAL_LAN_BUFFER(0x%"PRIx64", 0x%"PRIx64")"
+spapr_vlan_h_send_logical_lan(uint64_t reg, uint64_t continue_token) "H_SEND_LOGICAL_LAN(0x%"PRIx64", <bufs>, 0x%"PRIx64")"
+spapr_vlan_h_send_logical_lan_rxbufs(uint32_t rx_bufs) "rxbufs = %"PRIu32
+spapr_vlan_h_send_logical_lan_buf_desc(uint64_t buf) "   buf desc: 0x%"PRIx64
+spapr_vlan_h_send_logical_lan_total(int nbufs, unsigned total_len) "%d buffers, total length 0x%x"
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 01f1351554..5009533cfa 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -31,6 +31,11 @@
 #define MAC_TABLE_ENTRIES    64
 #define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
 
+/* previously fixed value */
+#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
+/* for now, only allow larger queues; with virtio-1, guest can downsize */
+#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE
+
 /*
  * Calculate the number of bytes up to and including the given 'field' of
  * 'container'.
@@ -503,6 +508,10 @@ static void virtio_net_set_queues(VirtIONet *n)
     int i;
     int r;
 
+    if (n->nic->peer_deleted) {
+        return;
+    }
+
     for (i = 0; i < n->max_queues; i++) {
         if (i < n->curr_queues) {
             r = peer_attach(n, i);
@@ -875,6 +884,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
 
     return VIRTIO_NET_OK;
 }
+
 static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIONet *n = VIRTIO_NET(vdev);
@@ -892,8 +902,10 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
         }
         if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
             iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-            error_report("virtio-net ctrl missing headers");
-            exit(1);
+            virtio_error(vdev, "virtio-net ctrl missing headers");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
         }
 
         iov_cnt = elem->out_num;
@@ -1122,21 +1134,24 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
 
         elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
         if (!elem) {
-            if (i == 0)
-                return -1;
-            error_report("virtio-net unexpected empty queue: "
-                         "i %zd mergeable %d offset %zd, size %zd, "
-                         "guest hdr len %zd, host hdr len %zd "
-                         "guest features 0x%" PRIx64,
-                         i, n->mergeable_rx_bufs, offset, size,
-                         n->guest_hdr_len, n->host_hdr_len,
-                         vdev->guest_features);
-            exit(1);
+            if (i) {
+                virtio_error(vdev, "virtio-net unexpected empty queue: "
+                             "i %zd mergeable %d offset %zd, size %zd, "
+                             "guest hdr len %zd, host hdr len %zd "
+                             "guest features 0x%" PRIx64,
+                             i, n->mergeable_rx_bufs, offset, size,
+                             n->guest_hdr_len, n->host_hdr_len,
+                             vdev->guest_features);
+            }
+            return -1;
         }
 
         if (elem->in_num < 1) {
-            error_report("virtio-net receive queue contains no in buffers");
-            exit(1);
+            virtio_error(vdev,
+                         "virtio-net receive queue contains no in buffers");
+            virtqueue_detach_element(q->rx_vq, elem, 0);
+            g_free(elem);
+            return -1;
         }
 
         sg = elem->in_sg;
@@ -1166,7 +1181,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
          * must have consumed the complete packet.
          * Otherwise, drop it. */
         if (!n->mergeable_rx_bufs && offset < size) {
-            virtqueue_discard(q->rx_vq, elem, total);
+            virtqueue_unpop(q->rx_vq, elem, total);
             g_free(elem);
             return size;
         }
@@ -1238,15 +1253,19 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
         out_num = elem->out_num;
         out_sg = elem->out_sg;
         if (out_num < 1) {
-            error_report("virtio-net header not in first element");
-            exit(1);
+            virtio_error(vdev, "virtio-net header not in first element");
+            virtqueue_detach_element(q->tx_vq, elem, 0);
+            g_free(elem);
+            return -EINVAL;
         }
 
         if (n->has_vnet_hdr) {
             if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
                 n->guest_hdr_len) {
-                error_report("virtio-net header incorrect");
-                exit(1);
+                virtio_error(vdev, "virtio-net header incorrect");
+                virtqueue_detach_element(q->tx_vq, elem, 0);
+                g_free(elem);
+                return -EINVAL;
             }
             if (n->needs_vnet_hdr_swap) {
                 virtio_net_hdr_swap(vdev, (void *) &mhdr);
@@ -1314,7 +1333,9 @@ static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
         virtio_queue_set_notification(vq, 1);
         timer_del(q->tx_timer);
         q->tx_waiting = 0;
-        virtio_net_flush_tx(q);
+        if (virtio_net_flush_tx(q) == -EINVAL) {
+            return;
+        }
     } else {
         timer_mod(q->tx_timer,
                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -1385,8 +1406,9 @@ static void virtio_net_tx_bh(void *opaque)
     }
 
     ret = virtio_net_flush_tx(q);
-    if (ret == -EBUSY) {
-        return; /* Notification re-enable handled by tx_complete */
+    if (ret == -EBUSY || ret == -EINVAL) {
+        return; /* Notification re-enable handled by tx_complete or device
+                 * broken */
     }
 
     /* If we flush a full burst of packets, assume there are
@@ -1401,7 +1423,10 @@ static void virtio_net_tx_bh(void *opaque)
      * anything that may have come in while we weren't looking.  If
      * we find something, assume the guest is still active and reschedule */
     virtio_queue_set_notification(q->tx_vq, 1);
-    if (virtio_net_flush_tx(q) > 0) {
+    ret = virtio_net_flush_tx(q);
+    if (ret == -EINVAL) {
+        return;
+    } else if (ret > 0) {
         virtio_queue_set_notification(q->tx_vq, 0);
         qemu_bh_schedule(q->tx_bh);
         q->tx_waiting = 1;
@@ -1412,7 +1437,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(n);
 
-    n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
+    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
+                                           virtio_net_handle_rx);
     if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
         n->vqs[index].tx_vq =
             virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
@@ -1492,17 +1518,6 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
     virtio_net_set_queues(n);
 }
 
-static void virtio_net_save(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIONet *n = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(n);
-
-    /* At this point, backend must be stopped, otherwise
-     * it might keep writing to memory. */
-    assert(!n->vhost_started);
-    virtio_save(vdev, f);
-}
-
 static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
     VirtIONet *n = VIRTIO_NET(vdev);
@@ -1538,14 +1553,6 @@ static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
     }
 }
 
-static int virtio_net_load(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIONet *n = opaque;
-    VirtIODevice *vdev = VIRTIO_DEVICE(n);
-
-    return virtio_load(vdev, f, VIRTIO_NET_VM_VERSION);
-}
-
 static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
                                   int version_id)
 {
@@ -1720,6 +1727,22 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
     virtio_net_set_config_size(n, n->host_features);
     virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
 
+    /*
+     * We set a lower limit on RX queue size to what it always was.
+     * Guests that want a smaller ring can always resize it without
+     * help from us (using virtio 1 and up).
+     */
+    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
+        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
+        (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
+        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
+                   "must be a power of 2 between %d and %d.",
+                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
+                   VIRTQUEUE_MAX_SIZE);
+        virtio_cleanup(vdev);
+        return;
+    }
+
     n->max_queues = MAX(n->nic_conf.peers.queues, 1);
     if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
         error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
@@ -1832,8 +1855,25 @@ static void virtio_net_instance_init(Object *obj)
                                   DEVICE(n), NULL);
 }
 
-VMSTATE_VIRTIO_DEVICE(net, VIRTIO_NET_VM_VERSION, virtio_net_load,
-                      virtio_net_save);
+static void virtio_net_pre_save(void *opaque)
+{
+    VirtIONet *n = opaque;
+
+    /* At this point, backend must be stopped, otherwise
+     * it might keep writing to memory. */
+    assert(!n->vhost_started);
+}
+
+static const VMStateDescription vmstate_virtio_net = {
+    .name = "virtio-net",
+    .minimum_version_id = VIRTIO_NET_VM_VERSION,
+    .version_id = VIRTIO_NET_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .pre_save = virtio_net_pre_save,
+};
 
 static Property virtio_net_properties[] = {
     DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
@@ -1880,6 +1920,8 @@ static Property virtio_net_properties[] = {
                        TX_TIMER_INTERVAL),
     DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
     DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
+    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
+                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1904,6 +1946,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
     vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
     vdc->load = virtio_net_load_device;
     vdc->save = virtio_net_save_device;
+    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
 }
 
 static const TypeInfo virtio_net_info = {
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 90f6943668..92f6af9620 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -531,6 +531,7 @@ static void vmxnet3_complete_packet(VMXNET3State *s, int qidx, uint32_t tx_ridx)
 
     VMXNET3_RING_DUMP(VMW_RIPRN, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
 
+    memset(&txcq_descr, 0, sizeof(txcq_descr));
     txcq_descr.txdIdx = tx_ridx;
     txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
 
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 6856b52999..20c43a61b3 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -69,7 +69,7 @@ static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, i
     netdev->tx_ring.rsp_prod_pvt = ++i;
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify);
     if (notify) {
-        xen_be_send_notify(&netdev->xendev);
+        xen_pv_send_notify(&netdev->xendev);
     }
 
     if (i == netdev->tx_ring.req_cons) {
@@ -128,30 +128,32 @@ static void net_tx_packets(struct XenNetDev *netdev)
             /* should not happen in theory, we don't announce the *
              * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
             if (txreq.flags & NETTXF_extra_info) {
-                xen_be_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
+                xen_pv_printf(&netdev->xendev, 0, "FIXME: extra info flag\n");
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
             if (txreq.flags & NETTXF_more_data) {
-                xen_be_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
+                xen_pv_printf(&netdev->xendev, 0, "FIXME: more data flag\n");
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 #endif
 
             if (txreq.size < 14) {
-                xen_be_printf(&netdev->xendev, 0, "bad packet size: %d\n", txreq.size);
+                xen_pv_printf(&netdev->xendev, 0, "bad packet size: %d\n",
+                              txreq.size);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 
             if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
-                xen_be_printf(&netdev->xendev, 0, "error: page crossing\n");
+                xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
 
-            xen_be_printf(&netdev->xendev, 3, "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
+            xen_pv_printf(&netdev->xendev, 3,
+                          "tx packet ref %d, off %d, len %d, flags 0x%x%s%s%s%s\n",
                           txreq.gref, txreq.offset, txreq.size, txreq.flags,
                           (txreq.flags & NETTXF_csum_blank)     ? " csum_blank"     : "",
                           (txreq.flags & NETTXF_data_validated) ? " data_validated" : "",
@@ -162,8 +164,9 @@ static void net_tx_packets(struct XenNetDev *netdev)
                                            netdev->xendev.dom,
                                            txreq.gref, PROT_READ);
             if (page == NULL) {
-                xen_be_printf(&netdev->xendev, 0, "error: tx gref dereference failed (%d)\n",
-                              txreq.gref);
+                xen_pv_printf(&netdev->xendev, 0,
+                              "error: tx gref dereference failed (%d)\n",
+                             txreq.gref);
                 net_tx_error(netdev, &txreq, rc);
                 continue;
             }
@@ -211,13 +214,14 @@ static void net_rx_response(struct XenNetDev *netdev,
         resp->status = (int16_t)st;
     }
 
-    xen_be_printf(&netdev->xendev, 3, "rx response: idx %d, status %d, flags 0x%x\n",
+    xen_pv_printf(&netdev->xendev, 3,
+                  "rx response: idx %d, status %d, flags 0x%x\n",
                   i, resp->status, resp->flags);
 
     netdev->rx_ring.rsp_prod_pvt = ++i;
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify);
     if (notify) {
-        xen_be_send_notify(&netdev->xendev);
+        xen_pv_send_notify(&netdev->xendev);
     }
 }
 
@@ -242,7 +246,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
         return 0;
     }
     if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
-        xen_be_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
+        xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
                       (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
         return -1;
     }
@@ -254,7 +258,8 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
                                    netdev->xendev.dom,
                                    rxreq.gref, PROT_WRITE);
     if (page == NULL) {
-        xen_be_printf(&netdev->xendev, 0, "error: rx gref dereference failed (%d)\n",
+        xen_pv_printf(&netdev->xendev, 0,
+                      "error: rx gref dereference failed (%d)\n",
                       rxreq.gref);
         net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0);
         return -1;
@@ -328,7 +333,8 @@ static int net_connect(struct XenDevice *xendev)
         rx_copy = 0;
     }
     if (rx_copy == 0) {
-        xen_be_printf(&netdev->xendev, 0, "frontend doesn't support rx-copy.\n");
+        xen_pv_printf(&netdev->xendev, 0,
+                      "frontend doesn't support rx-copy.\n");
         return -1;
     }
 
@@ -353,7 +359,7 @@ static int net_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(&netdev->xendev);
 
-    xen_be_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
+    xen_pv_printf(&netdev->xendev, 1, "ok: tx-ring-ref %d, rx-ring-ref %d, "
                   "remote port %d, local port %d\n",
                   netdev->tx_ring_ref, netdev->rx_ring_ref,
                   netdev->xendev.remote_port, netdev->xendev.local_port);
@@ -366,7 +372,7 @@ static void net_disconnect(struct XenDevice *xendev)
 {
     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
 
-    xen_be_unbind_evtchn(&netdev->xendev);
+    xen_pv_unbind_evtchn(&netdev->xendev);
 
     if (netdev->txs) {
         xengnttab_unmap(netdev->xendev.gnttabdev, netdev->txs, 1);
diff --git a/hw/nvram/Makefile.objs b/hw/nvram/Makefile.objs
index e9a66940e0..c018f6b2ff 100644
--- a/hw/nvram/Makefile.objs
+++ b/hw/nvram/Makefile.objs
@@ -1,5 +1,6 @@
 common-obj-$(CONFIG_DS1225Y) += ds1225y.o
 common-obj-y += eeprom93xx.o
 common-obj-y += fw_cfg.o
+common-obj-y += chrp_nvram.o
 common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
 obj-$(CONFIG_PSERIES) += spapr_nvram.o
diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c
new file mode 100644
index 0000000000..3837510dd2
--- /dev/null
+++ b/hw/nvram/chrp_nvram.c
@@ -0,0 +1,85 @@
+/*
+ * Common Hardware Reference Platform NVRAM helper functions.
+ *
+ * The CHRP NVRAM layout is used by OpenBIOS and SLOF. See CHRP
+ * specification, chapter 8, or the LoPAPR specification for details
+ * about the NVRAM layout.
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "hw/hw.h"
+#include "hw/nvram/chrp_nvram.h"
+#include "sysemu/sysemu.h"
+
+static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str)
+{
+    int len;
+
+    len = strlen(str) + 1;
+    memcpy(&nvram[addr], str, len);
+
+    return addr + len;
+}
+
+/**
+ * Create a "system partition", used for the Open Firmware
+ * environment variables.
+ */
+int chrp_nvram_create_system_partition(uint8_t *data, int min_len)
+{
+    ChrpNvramPartHdr *part_header;
+    unsigned int i;
+    int end;
+
+    part_header = (ChrpNvramPartHdr *)data;
+    part_header->signature = CHRP_NVPART_SYSTEM;
+    pstrcpy(part_header->name, sizeof(part_header->name), "system");
+
+    end = sizeof(ChrpNvramPartHdr);
+    for (i = 0; i < nb_prom_envs; i++) {
+        end = chrp_nvram_set_var(data, end, prom_envs[i]);
+    }
+
+    /* End marker */
+    data[end++] = '\0';
+
+    end = (end + 15) & ~15;
+    /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for
+       new variables. */
+    if (end < min_len) {
+        end = min_len;
+    }
+    chrp_nvram_finish_partition(part_header, end);
+
+    return end;
+}
+
+/**
+ * Create a "free space" partition
+ */
+int chrp_nvram_create_free_partition(uint8_t *data, int len)
+{
+    ChrpNvramPartHdr *part_header;
+
+    part_header = (ChrpNvramPartHdr *)data;
+    part_header->signature = CHRP_NVPART_FREE;
+    pstrcpy(part_header->name, sizeof(part_header->name), "free");
+
+    chrp_nvram_finish_partition(part_header, len);
+
+    return len;
+}
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 6a68e594d5..3ebecb2260 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -29,7 +29,6 @@
 #include "hw/isa/isa.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/sysbus.h"
-#include "hw/boards.h"
 #include "trace.h"
 #include "qemu/error-report.h"
 #include "qemu/config-file.h"
@@ -180,7 +179,7 @@ static void fw_cfg_bootsplash(FWCfgState *s)
         temp = qemu_opt_get(opts, "splash-time");
         if (temp != NULL) {
             p = (char *)temp;
-            boot_splash_time = strtol(p, (char **)&p, 10);
+            boot_splash_time = strtol(p, &p, 10);
         }
     }
 
@@ -240,7 +239,7 @@ static void fw_cfg_reboot(FWCfgState *s)
         temp = qemu_opt_get(opts, "reboot-timeout");
         if (temp != NULL) {
             p = (char *)temp;
-            reboot_timeout = strtol(p, (char **)&p, 10);
+            reboot_timeout = strtol(p, &p, 10);
         }
     }
     /* validate the input */
@@ -883,9 +882,8 @@ static void fw_cfg_init1(DeviceState *dev)
     qdev_init_nofail(dev);
 
     fw_cfg_add_bytes(s, FW_CFG_SIGNATURE, (char *)"QEMU", 4);
-    fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
+    fw_cfg_add_bytes(s, FW_CFG_UUID, &qemu_uuid, 16);
     fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)!machine->enable_graphics);
-    fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
     fw_cfg_reboot(s);
diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 24f61212ba..63f9ed1d82 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -24,8 +24,7 @@
  */
 #include "qemu/osdep.h"
 #include "hw/hw.h"
-#include "hw/nvram/openbios_firmware_abi.h"
-#include "sysemu/sysemu.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/ppc/mac.h"
 #include "qemu/cutils.h"
 #include <zlib.h>
@@ -146,38 +145,14 @@ static void macio_nvram_register_types(void)
 static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
                                            int len)
 {
-    unsigned int i;
-    uint32_t start = off, end;
-    struct OpenBIOS_nvpart_v1 *part_header;
-
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(nvr->data, end, prom_envs[i]);
-
-    // End marker
-    nvr->data[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    /* XXX: OpenBIOS is not able to grow up a partition. Leave some space for
-       new variables. */
-    if (end < DEF_SYSTEM_SIZE)
-        end = DEF_SYSTEM_SIZE;
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = len;
-    OpenBIOS_finish_partition(part_header, end - start);
+    int sysp_end;
+
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(&nvr->data[off],
+                                                  DEF_SYSTEM_SIZE) + off;
+
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end);
 }
 
 #define OSX_NVRAM_SIGNATURE     (0x5A)
@@ -187,15 +162,15 @@ static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off,
                                             int len)
 {
     uint32_t start = off;
-    struct OpenBIOS_nvpart_v1 *part_header;
+    ChrpNvramPartHdr *part_header;
     unsigned char *data = &nvr->data[start];
 
     /* empty partition */
-    part_header = (struct OpenBIOS_nvpart_v1 *)data;
+    part_header = (ChrpNvramPartHdr *)data;
     part_header->signature = OSX_NVRAM_SIGNATURE;
     pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww");
 
-    OpenBIOS_finish_partition(part_header, len);
+    chrp_nvram_finish_partition(part_header, len);
 
     /* Generation */
     stl_be_p(&data[20], 2);
diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c
index 4de5f705d8..eb42ea323f 100644
--- a/hw/nvram/spapr_nvram.c
+++ b/hw/nvram/spapr_nvram.c
@@ -31,6 +31,7 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/device_tree.h"
 #include "hw/sysbus.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 
@@ -162,6 +163,11 @@ static void spapr_nvram_realize(VIOsPAPRDevice *dev, Error **errp)
             error_setg(errp, "can't read spapr-nvram contents");
             return;
         }
+    } else if (nb_prom_envs > 0) {
+        /* Create a system partition to pass the -prom-env variables */
+        chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4);
+        chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4],
+                                         nvram->size - MIN_NVRAM_SIZE / 4);
     }
 
     spapr_rtas_register(RTAS_NVRAM_FETCH, "nvram-fetch", rtas_nvram_fetch);
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 1cc598f7e9..6ac187fa32 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -15,7 +15,6 @@
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_host.h"
-#include "hw/pci/pci_bus.h"
 #include "hw/pci/pci_bridge.h"
 #include "hw/i386/pc.h"
 #include "qemu/range.h"
diff --git a/hw/pci-host/uninorth.c b/hw/pci-host/uninorth.c
index 7aac4d67a4..df342ac3cb 100644
--- a/hw/pci-host/uninorth.c
+++ b/hw/pci-host/uninorth.c
@@ -62,9 +62,7 @@ typedef struct UNINState {
 
 static int pci_unin_map_irq(PCIDevice *pci_dev, int irq_num)
 {
-    int devfn = pci_dev->devfn & 0x00FFFFFF;
-
-    return (((devfn >> 11) & 0x1F) + irq_num) & 3;
+    return (irq_num + (pci_dev->devfn >> 3)) & 3;
 }
 
 static void pci_unin_set_irq(void *opaque, int irq_num, int level)
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 91a3420f47..8025129377 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -1,10 +1,12 @@
 # shared objects
-obj-y += ppc.o ppc_booke.o
+obj-y += ppc.o ppc_booke.o fdt.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o
-obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
+obj-$(CONFIG_PSERIES) += spapr_cpu_core.o spapr_ovec.o
+# IBM PowerNV
+obj-$(CONFIG_POWERNV) += pnv.o pnv_xscom.o pnv_core.o pnv_lpc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 0cd534df55..cf8b122afe 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -196,7 +196,7 @@ static int create_devtree_etsec(SysBusDevice *sbdev, PlatformDevtreeData *data)
     return 0;
 }
 
-static int sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
+static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
 {
     PlatformDevtreeData *data = opaque;
     bool matched = false;
@@ -211,8 +211,6 @@ static int sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
                      qdev_fw_name(DEVICE(sbdev)));
         exit(1);
     }
-
-    return 0;
 }
 
 static void platform_bus_create_devtree(PPCE500Params *params, void *fdt,
diff --git a/hw/ppc/fdt.c b/hw/ppc/fdt.c
new file mode 100644
index 0000000000..e67d60d03c
--- /dev/null
+++ b/hw/ppc/fdt.c
@@ -0,0 +1,49 @@
+/*
+ * QEMU PowerPC helper routines for the device tree.
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "target-ppc/cpu.h"
+
+#include "hw/ppc/fdt.h"
+
+#if defined(TARGET_PPC64)
+size_t ppc_create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
+                                     size_t maxsize)
+{
+    size_t maxcells = maxsize / sizeof(uint32_t);
+    int i, j, count;
+    uint32_t *p = prop;
+
+    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
+        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
+
+        if (!sps->page_shift) {
+            break;
+        }
+        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
+            if (sps->enc[count].page_shift == 0) {
+                break;
+            }
+        }
+        if ((p - prop) >= (maxcells - 3 - count * 2)) {
+            break;
+        }
+        *(p++) = cpu_to_be32(sps->page_shift);
+        *(p++) = cpu_to_be32(sps->slb_enc);
+        *(p++) = cpu_to_be32(count);
+        for (j = 0; j < count; j++) {
+            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
+            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
+        }
+    }
+
+    return (p - prop) * sizeof(uint32_t);
+}
+#endif
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 7d2510658d..2bfdb643df 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -466,6 +466,7 @@ static void ppc_core99_init(MachineState *machine)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init_mem(CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 447948746b..56282c5bc6 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -319,6 +319,7 @@ static void ppc_heathrow_init(MachineState *machine)
     /* No PCI init: the BIOS will do it */
 
     fw_cfg = fw_cfg_init_mem(CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
new file mode 100644
index 0000000000..9df7b25315
--- /dev/null
+++ b/hw/ppc/pnv.c
@@ -0,0 +1,821 @@
+/*
+ * QEMU PowerPC PowerNV machine model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
+#include "hw/hw.h"
+#include "target-ppc/cpu.h"
+#include "qemu/log.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/loader.h"
+#include "exec/address-spaces.h"
+#include "qemu/cutils.h"
+#include "qapi/visitor.h"
+
+#include "hw/ppc/pnv_xscom.h"
+
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "hw/timer/mc146818rtc.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE            0x00100000
+
+#define FW_FILE_NAME            "skiboot.lid"
+#define FW_LOAD_ADDR            0x0
+#define FW_MAX_SIZE             0x00400000
+
+#define KERNEL_LOAD_ADDR        0x20000000
+#define INITRD_LOAD_ADDR        0x40000000
+
+/*
+ * On Power Systems E880 (POWER8), the max cpus (threads) should be :
+ *     4 * 4 sockets * 12 cores * 8 threads = 1536
+ * Let's make it 2^11
+ */
+#define MAX_CPUS                2048
+
+/*
+ * Memory nodes are created by hostboot, one for each range of memory
+ * that has a different "affinity". In practice, it means one range
+ * per chip.
+ */
+static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start,
+                                         hwaddr size)
+{
+    char *mem_name;
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    g_free(mem_name);
+
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                       sizeof(mem_reg_property))));
+    _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
+}
+
+static int get_cpus_node(void *fdt)
+{
+    int cpus_offset = fdt_path_offset(fdt, "/cpus");
+
+    if (cpus_offset < 0) {
+        cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+                                      "cpus");
+        if (cpus_offset) {
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+        }
+    }
+    _FDT(cpus_offset);
+    return cpus_offset;
+}
+
+/*
+ * The PowerNV cores (and threads) need to use real HW ids and not an
+ * incremental index like it has been done on other platforms. This HW
+ * id is stored in the CPU PIR, it is used to create cpu nodes in the
+ * device tree, used in XSCOM to address cores and in interrupt
+ * servers.
+ */
+static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt)
+{
+    CPUState *cs = CPU(DEVICE(pc->threads));
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    int smt_threads = CPU_CORE(pc)->nr_threads;
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    uint32_t servers_prop[smt_threads];
+    int i;
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
+    uint32_t cpufreq = 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    const uint8_t pa_features[] = { 24, 0,
+                                    0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
+                                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+                                    0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    int offset;
+    char *nodename;
+    int cpus_offset = get_cpus_node(fdt);
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
+    offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+    _FDT(offset);
+    g_free(nodename);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        error_report("Warning: Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        error_report("Warning: Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (env->spr_cb[SPR_PURR].oea_read) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (env->mmu_model & POWERPC_MMU_1TSEG) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /* Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
+                                                  sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
+                       pa_features, sizeof(pa_features))));
+
+    /* Build interrupt servers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(pc->pir + i);
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop))));
+}
+
+static void powernv_populate_chip(PnvChip *chip, void *fdt)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    char *typename = pnv_core_typename(pcc->cpu_model);
+    size_t typesize = object_type_get_instance_size(typename);
+    int i;
+
+    pnv_xscom_populate(chip, fdt, 0);
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
+
+        powernv_create_core_node(chip, pnv_core, fdt);
+    }
+
+    if (chip->ram_size) {
+        powernv_populate_memory_node(fdt, chip->chip_id, chip->ram_start,
+                                     chip->ram_size);
+    }
+    g_free(typename);
+}
+
+static void *powernv_create_fdt(MachineState *machine)
+{
+    const char plat_compat[] = "qemu,powernv\0ibm,powernv";
+    PnvMachineState *pnv = POWERNV_MACHINE(machine);
+    void *fdt;
+    char *buf;
+    int off;
+    int i;
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
+
+    /* Root node */
+    _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
+    _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
+    _FDT((fdt_setprop_string(fdt, 0, "model",
+                             "IBM PowerNV (emulated by qemu)")));
+    _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
+                      sizeof(plat_compat))));
+
+    buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
+    _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
+    if (qemu_uuid_set) {
+        _FDT((fdt_property_string(fdt, "system-id", buf)));
+    }
+    g_free(buf);
+
+    off = fdt_add_subnode(fdt, 0, "chosen");
+    if (machine->kernel_cmdline) {
+        _FDT((fdt_setprop_string(fdt, off, "bootargs",
+                                 machine->kernel_cmdline)));
+    }
+
+    if (pnv->initrd_size) {
+        uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
+        uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
+
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
+                               &start_prop, sizeof(start_prop))));
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
+                               &end_prop, sizeof(end_prop))));
+    }
+
+    /* Populate device tree for each chip */
+    for (i = 0; i < pnv->num_chips; i++) {
+        powernv_populate_chip(pnv->chips[i], fdt);
+    }
+    return fdt;
+}
+
+static void ppc_powernv_reset(void)
+{
+    MachineState *machine = MACHINE(qdev_get_machine());
+    void *fdt;
+
+    qemu_devices_reset();
+
+    fdt = powernv_create_fdt(machine);
+
+    /* Pack resulting tree */
+    _FDT((fdt_pack(fdt)));
+
+    cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
+}
+
+/* If we don't use the built-in LPC interrupt deserializer, we need
+ * to provide a set of qirqs for the ISA bus or things will go bad.
+ *
+ * Most machines using pre-Naples chips (without said deserializer)
+ * have a CPLD that will collect the SerIRQ and shoot them as a
+ * single level interrupt to the P8 chip. So let's setup a hook
+ * for doing just that.
+ *
+ * Note: The actual interrupt input isn't emulated yet, this will
+ * come with the PSI bridge model.
+ */
+static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
+{
+    /* We don't yet emulate the PSI bridge which provides the external
+     * interrupt, so just drop interrupts on the floor
+     */
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+     /* XXX TODO */
+}
+
+static ISABus *pnv_isa_create(PnvChip *chip)
+{
+    PnvLpcController *lpc = &chip->lpc;
+    ISABus *isa_bus;
+    qemu_irq *irqs;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    /* let isa_bus_new() create its own bridge on SysBus otherwise
+     * devices speficied on the command line won't find the bus and
+     * will fail to create.
+     */
+    isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
+                          &error_fatal);
+
+    /* Not all variants have a working serial irq decoder. If not,
+     * handling of LPC interrupts becomes a platform issue (some
+     * platforms have a CPLD to do it).
+     */
+    if (pcc->chip_type == PNV_CHIP_POWER8NVL) {
+        irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler, chip, ISA_NUM_IRQS);
+    } else {
+        irqs = qemu_allocate_irqs(pnv_lpc_isa_irq_handler_cpld, chip,
+                                  ISA_NUM_IRQS);
+    }
+
+    isa_bus_irqs(isa_bus, irqs);
+    return isa_bus;
+}
+
+static void ppc_powernv_init(MachineState *machine)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(machine);
+    MemoryRegion *ram;
+    char *fw_filename;
+    long fw_size;
+    int i;
+    char *chip_typename;
+
+    /* allocate RAM */
+    if (machine->ram_size < (1 * G_BYTE)) {
+        error_report("Warning: skiboot may not work with < 1GB of RAM");
+    }
+
+    ram = g_new(MemoryRegion, 1);
+    memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram",
+                                         machine->ram_size);
+    memory_region_add_subregion(get_system_memory(), 0, ram);
+
+    /* load skiboot firmware  */
+    if (bios_name == NULL) {
+        bios_name = FW_FILE_NAME;
+    }
+
+    fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+
+    fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
+    if (fw_size < 0) {
+        hw_error("qemu: could not load OPAL '%s'\n", fw_filename);
+        exit(1);
+    }
+    g_free(fw_filename);
+
+    /* load kernel */
+    if (machine->kernel_filename) {
+        long kernel_size;
+
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          KERNEL_LOAD_ADDR, 0x2000000);
+        if (kernel_size < 0) {
+            hw_error("qemu: could not load kernel'%s'\n",
+                     machine->kernel_filename);
+            exit(1);
+        }
+    }
+
+    /* load initrd */
+    if (machine->initrd_filename) {
+        pnv->initrd_base = INITRD_LOAD_ADDR;
+        pnv->initrd_size = load_image_targphys(machine->initrd_filename,
+                                  pnv->initrd_base, 0x10000000); /* 128MB max */
+        if (pnv->initrd_size < 0) {
+            error_report("qemu: could not load initial ram disk '%s'",
+                         machine->initrd_filename);
+            exit(1);
+        }
+    }
+
+    /* We need some cpu model to instantiate the PnvChip class */
+    if (machine->cpu_model == NULL) {
+        machine->cpu_model = "POWER8";
+    }
+
+    /* Create the processor chips */
+    chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model);
+    if (!object_class_by_name(chip_typename)) {
+        error_report("qemu: invalid CPU model '%s' for %s machine",
+                     machine->cpu_model, MACHINE_GET_CLASS(machine)->name);
+        exit(1);
+    }
+
+    pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+    for (i = 0; i < pnv->num_chips; i++) {
+        char chip_name[32];
+        Object *chip = object_new(chip_typename);
+
+        pnv->chips[i] = PNV_CHIP(chip);
+
+        /* TODO: put all the memory in one node on chip 0 until we find a
+         * way to specify different ranges for each chip
+         */
+        if (i == 0) {
+            object_property_set_int(chip, machine->ram_size, "ram-size",
+                                    &error_fatal);
+        }
+
+        snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
+        object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
+        object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
+                                &error_fatal);
+        object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
+        object_property_set_bool(chip, true, "realized", &error_fatal);
+    }
+    g_free(chip_typename);
+
+    /* Instantiate ISA bus on chip 0 */
+    pnv->isa_bus = pnv_isa_create(pnv->chips[0]);
+
+    /* Create serial port */
+    serial_hds_isa_init(pnv->isa_bus, 0, MAX_SERIAL_PORTS);
+
+    /* Create an RTC ISA device too */
+    rtc_init(pnv->isa_bus, 2000, NULL);
+}
+
+/*
+ *    0:21  Reserved - Read as zeros
+ *   22:24  Chip ID
+ *   25:28  Core number
+ *   29:31  Thread ID
+ */
+static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 7) | (core_id << 3);
+}
+
+/*
+ *    0:48  Reserved - Read as zeroes
+ *   49:52  Node ID
+ *   53:55  Chip ID
+ *   56     Reserved - Read as zero
+ *   57:61  Core number
+ *   62:63  Thread ID
+ *
+ * We only care about the lower bits. uint32_t is fine for the moment.
+ */
+static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 8) | (core_id << 2);
+}
+
+/* Allowed core identifiers on a POWER8 Processor Chip :
+ *
+ * <EX0 reserved>
+ *  EX1  - Venice only
+ *  EX2  - Venice only
+ *  EX3  - Venice only
+ *  EX4
+ *  EX5
+ *  EX6
+ * <EX7,8 reserved> <reserved>
+ *  EX9  - Venice only
+ *  EX10 - Venice only
+ *  EX11 - Venice only
+ *  EX12
+ *  EX13
+ *  EX14
+ * <EX15 reserved>
+ */
+#define POWER8E_CORE_MASK  (0x7070ull)
+#define POWER8_CORE_MASK   (0x7e7eull)
+
+/*
+ * POWER9 has 24 cores, ids starting at 0x20
+ */
+#define POWER9_CORE_MASK   (0xffffff00000000ull)
+
+static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8E";
+    k->chip_type = PNV_CHIP_POWER8E;
+    k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
+    k->cores_mask = POWER8E_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    k->xscom_core_base = 0x10000000ull;
+    dc->desc = "PowerNV Chip POWER8E";
+}
+
+static const TypeInfo pnv_chip_power8e_info = {
+    .name          = TYPE_PNV_CHIP_POWER8E,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8e_class_init,
+};
+
+static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8";
+    k->chip_type = PNV_CHIP_POWER8;
+    k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    k->xscom_core_base = 0x10000000ull;
+    dc->desc = "PowerNV Chip POWER8";
+}
+
+static const TypeInfo pnv_chip_power8_info = {
+    .name          = TYPE_PNV_CHIP_POWER8,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8_class_init,
+};
+
+static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER8NVL";
+    k->chip_type = PNV_CHIP_POWER8NVL;
+    k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->xscom_base = 0x003fc0000000000ull;
+    k->xscom_core_base = 0x10000000ull;
+    dc->desc = "PowerNV Chip POWER8NVL";
+}
+
+static const TypeInfo pnv_chip_power8nvl_info = {
+    .name          = TYPE_PNV_CHIP_POWER8NVL,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power8nvl_class_init,
+};
+
+static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->cpu_model = "POWER9";
+    k->chip_type = PNV_CHIP_POWER9;
+    k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */
+    k->cores_mask = POWER9_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p9;
+    k->xscom_base = 0x00603fc00000000ull;
+    k->xscom_core_base = 0x0ull;
+    dc->desc = "PowerNV Chip POWER9";
+}
+
+static const TypeInfo pnv_chip_power9_info = {
+    .name          = TYPE_PNV_CHIP_POWER9,
+    .parent        = TYPE_PNV_CHIP,
+    .instance_size = sizeof(PnvChip),
+    .class_init    = pnv_chip_power9_class_init,
+};
+
+static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    int cores_max;
+
+    /*
+     * No custom mask for this chip, let's use the default one from *
+     * the chip class
+     */
+    if (!chip->cores_mask) {
+        chip->cores_mask = pcc->cores_mask;
+    }
+
+    /* filter alien core ids ! some are reserved */
+    if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
+        error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
+                   chip->cores_mask);
+        return;
+    }
+    chip->cores_mask &= pcc->cores_mask;
+
+    /* now that we have a sane layout, let check the number of cores */
+    cores_max = ctpop64(chip->cores_mask);
+    if (chip->nr_cores > cores_max) {
+        error_setg(errp, "warning: too many cores for chip ! Limit is %d",
+                   cores_max);
+        return;
+    }
+}
+
+static void pnv_chip_init(Object *obj)
+{
+    PnvChip *chip = PNV_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    chip->xscom_base = pcc->xscom_base;
+
+    object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
+    object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
+}
+
+static void pnv_chip_realize(DeviceState *dev, Error **errp)
+{
+    PnvChip *chip = PNV_CHIP(dev);
+    Error *error = NULL;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    char *typename = pnv_core_typename(pcc->cpu_model);
+    size_t typesize = object_type_get_instance_size(typename);
+    int i, core_hwid;
+
+    if (!object_class_by_name(typename)) {
+        error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
+        return;
+    }
+
+    /* XSCOM bridge */
+    pnv_xscom_realize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
+
+    /* Cores */
+    pnv_chip_core_sanitize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    chip->cores = g_malloc0(typesize * chip->nr_cores);
+
+    for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
+             && (i < chip->nr_cores); core_hwid++) {
+        char core_name[32];
+        void *pnv_core = chip->cores + i * typesize;
+
+        if (!(chip->cores_mask & (1ull << core_hwid))) {
+            continue;
+        }
+
+        object_initialize(pnv_core, typesize, typename);
+        snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
+        object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
+                                  &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
+                                &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), core_hwid,
+                                CPU_CORE_PROP_CORE_ID, &error_fatal);
+        object_property_set_int(OBJECT(pnv_core),
+                                pcc->core_pir(chip, core_hwid),
+                                "pir", &error_fatal);
+        object_property_set_bool(OBJECT(pnv_core), true, "realized",
+                                 &error_fatal);
+        object_unref(OBJECT(pnv_core));
+
+        /* Each core has an XSCOM MMIO region */
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_EX_CORE_BASE(pcc->xscom_core_base,
+                                                       core_hwid),
+                                &PNV_CORE(pnv_core)->xscom_regs);
+        i++;
+    }
+    g_free(typename);
+
+    /* Create LPC controller */
+    object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
+                             &error_fatal);
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
+}
+
+static Property pnv_chip_properties[] = {
+    DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
+    DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
+    DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
+    DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
+    DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_chip_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_chip_realize;
+    dc->props = pnv_chip_properties;
+    dc->desc = "PowerNV Chip";
+}
+
+static const TypeInfo pnv_chip_info = {
+    .name          = TYPE_PNV_CHIP,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .class_init    = pnv_chip_class_init,
+    .instance_init = pnv_chip_init,
+    .class_size    = sizeof(PnvChipClass),
+    .abstract      = true,
+};
+
+static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
+}
+
+static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(obj);
+    uint32_t num_chips;
+    Error *local_err = NULL;
+
+    visit_type_uint32(v, name, &num_chips, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /*
+     * TODO: should we decide on how many chips we can create based
+     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
+     */
+    if (!is_power_of_2(num_chips) || num_chips > 4) {
+        error_setg(errp, "invalid number of chips: '%d'", num_chips);
+        return;
+    }
+
+    pnv->num_chips = num_chips;
+}
+
+static void powernv_machine_initfn(Object *obj)
+{
+    PnvMachineState *pnv = POWERNV_MACHINE(obj);
+    pnv->num_chips = 1;
+}
+
+static void powernv_machine_class_props_init(ObjectClass *oc)
+{
+    object_class_property_add(oc, "num-chips", "uint32_t",
+                              pnv_get_num_chips, pnv_set_num_chips,
+                              NULL, NULL, NULL);
+    object_class_property_set_description(oc, "num-chips",
+                              "Specifies the number of processor chips",
+                              NULL);
+}
+
+static void powernv_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "IBM PowerNV (Non-Virtualized)";
+    mc->init = ppc_powernv_init;
+    mc->reset = ppc_powernv_reset;
+    mc->max_cpus = MAX_CPUS;
+    mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
+                                      * storage */
+    mc->no_parallel = 1;
+    mc->default_boot_order = NULL;
+    mc->default_ram_size = 1 * G_BYTE;
+
+    powernv_machine_class_props_init(oc);
+}
+
+static const TypeInfo powernv_machine_info = {
+    .name          = TYPE_POWERNV_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .instance_size = sizeof(PnvMachineState),
+    .instance_init = powernv_machine_initfn,
+    .class_init    = powernv_machine_class_init,
+};
+
+static void powernv_machine_register_types(void)
+{
+    type_register_static(&powernv_machine_info);
+    type_register_static(&pnv_chip_info);
+    type_register_static(&pnv_chip_power8e_info);
+    type_register_static(&pnv_chip_power8_info);
+    type_register_static(&pnv_chip_power8nvl_info);
+    type_register_static(&pnv_chip_power9_info);
+}
+
+type_init(powernv_machine_register_types)
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
new file mode 100644
index 0000000000..76ce854b0c
--- /dev/null
+++ b/hw/ppc/pnv_core.c
@@ -0,0 +1,233 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "target-ppc/cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/ppc/pnv_xscom.h"
+
+static void powernv_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_reset(cs);
+
+    /*
+     * the skiboot firmware elects a primary thread to initialize the
+     * system and it can be any.
+     */
+    env->gpr[3] = PNV_FDT_ADDR;
+    env->nip = 0x10;
+    env->msr |= MSR_HVB; /* Hypervisor mode */
+}
+
+static void powernv_cpu_init(PowerPCCPU *cpu, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+    int core_pir;
+    int thread_index = 0; /* TODO: TCG supports only one thread */
+    ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
+
+    core_pir = object_property_get_int(OBJECT(cpu), "core-pir", &error_abort);
+
+    /*
+     * The PIR of a thread is the core PIR + the thread index. We will
+     * need to find a way to get the thread index when TCG supports
+     * more than 1. We could use the object name ?
+     */
+    pir->default_value = core_pir + thread_index;
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
+
+    qemu_register_reset(powernv_cpu_reset, cpu);
+}
+
+/*
+ * These values are read by the PowerNV HW monitors under Linux
+ */
+#define PNV_XSCOM_EX_DTS_RESULT0     0x50000
+#define PNV_XSCOM_EX_DTS_RESULT1     0x50001
+
+static uint64_t pnv_core_xscom_read(void *opaque, hwaddr addr,
+                                    unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    /* The result should be 38 C */
+    switch (offset) {
+    case PNV_XSCOM_EX_DTS_RESULT0:
+        val = 0x26f024f023f0000ull;
+        break;
+    case PNV_XSCOM_EX_DTS_RESULT1:
+        val = 0x24f000000000000ull;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx,
+                  addr);
+    }
+
+    return val;
+}
+
+static void pnv_core_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned int width)
+{
+    qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx,
+                  addr);
+}
+
+static const MemoryRegionOps pnv_core_xscom_ops = {
+    .read = pnv_core_xscom_read,
+    .write = pnv_core_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_core_realize_child(Object *child, Error **errp)
+{
+    Error *local_err = NULL;
+    CPUState *cs = CPU(child);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    object_property_set_bool(child, true, "realized", &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    powernv_cpu_init(cpu, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static void pnv_core_realize(DeviceState *dev, Error **errp)
+{
+    PnvCore *pc = PNV_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(OBJECT(dev));
+    PnvCoreClass *pcc = PNV_CORE_GET_CLASS(OBJECT(dev));
+    const char *typename = object_class_get_name(pcc->cpu_oc);
+    size_t size = object_type_get_instance_size(typename);
+    Error *local_err = NULL;
+    void *obj;
+    int i, j;
+    char name[32];
+
+    pc->threads = g_malloc0(size * cc->nr_threads);
+    for (i = 0; i < cc->nr_threads; i++) {
+        obj = pc->threads + i * size;
+
+        object_initialize(obj, size, typename);
+
+        snprintf(name, sizeof(name), "thread[%d]", i);
+        object_property_add_child(OBJECT(pc), name, obj, &local_err);
+        object_property_add_alias(obj, "core-pir", OBJECT(pc),
+                                  "pir", &local_err);
+        if (local_err) {
+            goto err;
+        }
+        object_unref(obj);
+    }
+
+    for (j = 0; j < cc->nr_threads; j++) {
+        obj = pc->threads + j * size;
+
+        pnv_core_realize_child(obj, &local_err);
+        if (local_err) {
+            goto err;
+        }
+    }
+
+    snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id);
+    pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), &pnv_core_xscom_ops,
+                          pc, name, PNV_XSCOM_EX_CORE_SIZE);
+    return;
+
+err:
+    while (--i >= 0) {
+        obj = pc->threads + i * size;
+        object_unparent(obj);
+    }
+    g_free(pc->threads);
+    error_propagate(errp, local_err);
+}
+
+static Property pnv_core_properties[] = {
+    DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_core_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+    dc->realize = pnv_core_realize;
+    dc->props = pnv_core_properties;
+    pcc->cpu_oc = cpu_class_by_name(TYPE_POWERPC_CPU, data);
+}
+
+static const TypeInfo pnv_core_info = {
+    .name           = TYPE_PNV_CORE,
+    .parent         = TYPE_CPU_CORE,
+    .instance_size  = sizeof(PnvCore),
+    .class_size     = sizeof(PnvCoreClass),
+    .abstract       = true,
+};
+
+static const char *pnv_core_models[] = {
+    "POWER8E", "POWER8", "POWER8NVL", "POWER9"
+};
+
+static void pnv_core_register_types(void)
+{
+    int i ;
+
+    type_register_static(&pnv_core_info);
+    for (i = 0; i < ARRAY_SIZE(pnv_core_models); ++i) {
+        TypeInfo ti = {
+            .parent = TYPE_PNV_CORE,
+            .instance_size = sizeof(PnvCore),
+            .class_init = pnv_core_class_init,
+            .class_data = (void *) pnv_core_models[i],
+        };
+        ti.name = pnv_core_typename(pnv_core_models[i]);
+        type_register(&ti);
+        g_free((void *)ti.name);
+    }
+}
+
+type_init(pnv_core_register_types)
+
+char *pnv_core_typename(const char *model)
+{
+    return g_strdup_printf(TYPE_PNV_CORE "-%s", model);
+}
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
new file mode 100644
index 0000000000..0e2117f0f5
--- /dev/null
+++ b/hw/ppc/pnv_lpc.c
@@ -0,0 +1,472 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
+#include "target-ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/fdt.h"
+
+#include <libfdt.h>
+
+enum {
+    ECCB_CTL    = 0,
+    ECCB_RESET  = 1,
+    ECCB_STAT   = 2,
+    ECCB_DATA   = 3,
+};
+
+/* OPB Master LS registers */
+#define OPB_MASTER_LS_IRQ_STAT  0x50
+#define   OPB_MASTER_IRQ_LPC            0x00000800
+#define OPB_MASTER_LS_IRQ_MASK  0x54
+#define OPB_MASTER_LS_IRQ_POL   0x58
+#define OPB_MASTER_LS_IRQ_INPUT 0x5c
+
+/* LPC HC registers */
+#define LPC_HC_FW_SEG_IDSEL     0x24
+#define LPC_HC_FW_RD_ACC_SIZE   0x28
+#define   LPC_HC_FW_RD_1B               0x00000000
+#define   LPC_HC_FW_RD_2B               0x01000000
+#define   LPC_HC_FW_RD_4B               0x02000000
+#define   LPC_HC_FW_RD_16B              0x04000000
+#define   LPC_HC_FW_RD_128B             0x07000000
+#define LPC_HC_IRQSER_CTRL      0x30
+#define   LPC_HC_IRQSER_EN              0x80000000
+#define   LPC_HC_IRQSER_QMODE           0x40000000
+#define   LPC_HC_IRQSER_START_MASK      0x03000000
+#define   LPC_HC_IRQSER_START_4CLK      0x00000000
+#define   LPC_HC_IRQSER_START_6CLK      0x01000000
+#define   LPC_HC_IRQSER_START_8CLK      0x02000000
+#define LPC_HC_IRQMASK          0x34    /* same bit defs as LPC_HC_IRQSTAT */
+#define LPC_HC_IRQSTAT          0x38
+#define   LPC_HC_IRQ_SERIRQ0            0x80000000 /* all bits down to ... */
+#define   LPC_HC_IRQ_SERIRQ16           0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */
+#define   LPC_HC_IRQ_SERIRQ_ALL         0xffff8000
+#define   LPC_HC_IRQ_LRESET             0x00000400
+#define   LPC_HC_IRQ_SYNC_ABNORM_ERR    0x00000080
+#define   LPC_HC_IRQ_SYNC_NORESP_ERR    0x00000040
+#define   LPC_HC_IRQ_SYNC_NORM_ERR      0x00000020
+#define   LPC_HC_IRQ_SYNC_TIMEOUT_ERR   0x00000010
+#define   LPC_HC_IRQ_SYNC_TARG_TAR_ERR  0x00000008
+#define   LPC_HC_IRQ_SYNC_BM_TAR_ERR    0x00000004
+#define   LPC_HC_IRQ_SYNC_BM0_REQ       0x00000002
+#define   LPC_HC_IRQ_SYNC_BM1_REQ       0x00000001
+#define LPC_HC_ERROR_ADDRESS    0x40
+
+#define LPC_OPB_SIZE            0x100000000ull
+
+#define ISA_IO_SIZE             0x00010000
+#define ISA_MEM_SIZE            0x10000000
+#define LPC_IO_OPB_ADDR         0xd0010000
+#define LPC_IO_OPB_SIZE         0x00010000
+#define LPC_MEM_OPB_ADDR        0xe0010000
+#define LPC_MEM_OPB_SIZE        0x10000000
+#define LPC_FW_OPB_ADDR         0xf0000000
+#define LPC_FW_OPB_SIZE         0x10000000
+
+#define LPC_OPB_REGS_OPB_ADDR   0xc0010000
+#define LPC_OPB_REGS_OPB_SIZE   0x00002000
+#define LPC_HC_REGS_OPB_ADDR    0xc0012000
+#define LPC_HC_REGS_OPB_SIZE    0x00001000
+
+
+/*
+ * TODO: the "primary" cell should only be added on chip 0. This is
+ * how skiboot chooses the default LPC controller on multichip
+ * systems.
+ *
+ * It would be easly done if we can change the populate() interface to
+ * replace the PnvXScomInterface parameter by a PnvChip one
+ */
+static int pnv_lpc_populate(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+    const char compat[] = "ibm,power8-lpc\0ibm,lpc";
+    char *name;
+    int offset;
+    uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE;
+    uint32_t reg[] = {
+        cpu_to_be32(lpc_pcba),
+        cpu_to_be32(PNV_XSCOM_LPC_SIZE)
+    };
+
+    name = g_strdup_printf("isa@%x", lpc_pcba);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, offset, "primary", NULL, 0)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+    return 0;
+}
+
+/*
+ * These read/write handlers of the OPB address space should be common
+ * with the P9 LPC Controller which uses direct MMIOs.
+ *
+ * TODO: rework to use address_space_stq() and address_space_ldq()
+ * instead.
+ */
+static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                     int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits and FW read caching here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, false);
+
+    return success;
+}
+
+static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                      int sz)
+{
+    bool success;
+
+    /* XXX Handle access size limits here */
+    success = !address_space_rw(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz, true);
+
+    return success;
+}
+
+#define ECCB_CTL_READ           (1ull << (63 - 15))
+#define ECCB_CTL_SZ_LSH         (63 - 7)
+#define ECCB_CTL_SZ_MASK        (0xfull << ECCB_CTL_SZ_LSH)
+#define ECCB_CTL_ADDR_MASK      0xffffffffu;
+
+#define ECCB_STAT_OP_DONE       (1ull << (63 - 52))
+#define ECCB_STAT_OP_ERR        (1ull << (63 - 52))
+#define ECCB_STAT_RD_DATA_LSH   (63 - 37)
+#define ECCB_STAT_RD_DATA_MASK  (0xffffffff << ECCB_STAT_RD_DATA_LSH)
+
+static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd)
+{
+    /* XXX Check for magic bits at the top, addr size etc... */
+    unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH;
+    uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK;
+    uint8_t data[4];
+    bool success;
+
+    if (cmd & ECCB_CTL_READ) {
+        success = opb_read(lpc, opb_addr, data, sz);
+        if (success) {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (((uint64_t)data[0]) << 24 |
+                     ((uint64_t)data[1]) << 16 |
+                     ((uint64_t)data[2]) <<  8 |
+                     ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH;
+        } else {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (0xffffffffull << ECCB_STAT_RD_DATA_LSH);
+        }
+    } else {
+        data[0] = lpc->eccb_data_reg >> 24;
+        data[1] = lpc->eccb_data_reg >> 16;
+        data[2] = lpc->eccb_data_reg >>  8;
+        data[3] = lpc->eccb_data_reg;
+
+        success = opb_write(lpc, opb_addr, data, sz);
+        lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
+    }
+    /* XXX Which error bit (if any) to signal OPB error ? */
+}
+
+static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+    case ECCB_RESET:
+        val = 0;
+        break;
+    case ECCB_STAT:
+        val = lpc->eccb_stat_reg;
+        lpc->eccb_stat_reg = 0;
+        break;
+    case ECCB_DATA:
+        val = ((uint64_t)lpc->eccb_data_reg) << 32;
+        break;
+    }
+    return val;
+}
+
+static void pnv_lpc_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+        pnv_lpc_do_eccb(lpc, val);
+        break;
+    case ECCB_RESET:
+        /*  XXXX  */
+        break;
+    case ECCB_STAT:
+        break;
+    case ECCB_DATA:
+        lpc->eccb_data_reg = val >> 32;
+        break;
+    }
+}
+
+static const MemoryRegionOps pnv_lpc_xscom_ops = {
+    .read = pnv_lpc_xscom_read,
+    .write = pnv_lpc_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        val =  lpc->lpc_hc_fw_seg_idsel;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        val =  lpc->lpc_hc_fw_rd_acc_size;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        val =  lpc->lpc_hc_irqser_ctrl;
+        break;
+    case LPC_HC_IRQMASK:
+        val =  lpc->lpc_hc_irqmask;
+        break;
+    case LPC_HC_IRQSTAT:
+        val =  lpc->lpc_hc_irqstat;
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        val =  lpc->lpc_hc_error_addr;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+    return val;
+}
+
+static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    /* XXX Filter out reserved bits */
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        /* XXX Actually figure out how that works as this impact
+         * memory regions/aliases
+         */
+        lpc->lpc_hc_fw_seg_idsel = val;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        lpc->lpc_hc_fw_rd_acc_size = val;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        lpc->lpc_hc_irqser_ctrl = val;
+        break;
+    case LPC_HC_IRQMASK:
+        lpc->lpc_hc_irqmask = val;
+        break;
+    case LPC_HC_IRQSTAT:
+        lpc->lpc_hc_irqstat &= ~val;
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps lpc_hc_ops = {
+    .read = lpc_hc_read,
+    .write = lpc_hc_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        val = lpc->opb_irq_stat;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        val = lpc->opb_irq_mask;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        val = lpc->opb_irq_pol;
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        val = lpc->opb_irq_input;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+
+    return val;
+}
+
+static void opb_master_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    switch (addr) {
+    case OPB_MASTER_LS_IRQ_STAT:
+        lpc->opb_irq_stat &= ~val;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_mask = val;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        /* XXX Filter out reserved bits */
+        lpc->opb_irq_pol = val;
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        /* Read only */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPB MASTER Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps opb_master_ops = {
+    .read = opb_master_read,
+    .write = opb_master_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void pnv_lpc_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC(dev);
+
+    /* Reg inits */
+    lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
+
+    /* Create address space and backing MR for the OPB bus */
+    memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
+    address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
+
+    /* Create ISA IO and Mem space regions which are the root of
+     * the ISA bus (ie, ISA address spaces). We don't create a
+     * separate one for FW which we alias to memory.
+     */
+    memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
+    memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
+
+    /* Create windows from the OPB space to the ISA space */
+    memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io",
+                             &lpc->isa_io, 0, LPC_IO_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR,
+                                &lpc->opb_isa_io);
+    memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem",
+                             &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR,
+                                &lpc->opb_isa_mem);
+    memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw",
+                             &lpc->isa_mem, 0, LPC_FW_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR,
+                                &lpc->opb_isa_fw);
+
+    /* Create MMIO regions for LPC HC and OPB registers */
+    memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops,
+                          lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR,
+                                &lpc->opb_master_regs);
+    memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc,
+                          "lpc-hc", LPC_HC_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
+                                &lpc->lpc_hc_regs);
+
+    /* XScom region for LPC registers */
+    pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(dev),
+                          &pnv_lpc_xscom_ops, lpc, "xscom-lpc",
+                          PNV_XSCOM_LPC_SIZE);
+}
+
+static void pnv_lpc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+    xdc->populate = pnv_lpc_populate;
+
+    dc->realize = pnv_lpc_realize;
+}
+
+static const TypeInfo pnv_lpc_info = {
+    .name          = TYPE_PNV_LPC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvLpcController),
+    .class_init    = pnv_lpc_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_lpc_register_types(void)
+{
+    type_register_static(&pnv_lpc_info);
+}
+
+type_init(pnv_lpc_register_types)
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
new file mode 100644
index 0000000000..8da271872f
--- /dev/null
+++ b/hw/ppc/pnv_xscom.c
@@ -0,0 +1,275 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "qemu/log.h"
+#include "sysemu/kvm.h"
+#include "target-ppc/cpu.h"
+#include "hw/sysbus.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+
+#include <libfdt.h>
+
+static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
+{
+    /*
+     * TODO: When the read/write comes from the monitor, NULL is
+     * passed for the cpu, and no CPU completion is generated.
+     */
+    if (cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+
+        /*
+         * TODO: Need a CPU helper to set HMER, also handle generation
+         * of HMIs
+         */
+        cpu_synchronize_state(cs);
+        env->spr[SPR_HMER] |= hmer_bits;
+    }
+}
+
+static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    addr &= (PNV_XSCOM_SIZE - 1);
+    if (pcc->chip_type == PNV_CHIP_POWER9) {
+        return addr >> 3;
+    } else {
+        return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
+    }
+}
+
+static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
+{
+    switch (pcba) {
+    case 0xf000f:
+        return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id;
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        return 0;
+    case 0x2013f00:     /* PBA stuff */
+    case 0x2013f01:     /* PBA stuff */
+    case 0x2013f02:     /* PBA stuff */
+    case 0x2013f03:     /* PBA stuff */
+    case 0x2013f04:     /* PBA stuff */
+    case 0x2013f05:     /* PBA stuff */
+    case 0x2013f06:     /* PBA stuff */
+    case 0x2013f07:     /* PBA stuff */
+        return 0;
+    case 0x2013028:     /* CAPP stuff */
+    case 0x201302a:     /* CAPP stuff */
+    case 0x2013801:     /* CAPP stuff */
+    case 0x2013802:     /* CAPP stuff */
+        return 0;
+    default:
+        return -1;
+    }
+}
+
+static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
+{
+    /* We ignore writes to these */
+    switch (pcba) {
+    case 0xf000f:       /* chip id is RO */
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c01:     /* PIBAM FIR */
+    case 0x1010c02:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x1010c04:     /* PIBAM FIR MASK */
+    case 0x1010c05:     /* PIBAM FIR MASK */
+    case 0x2020007:     /* ADU stuff */
+    case 0x2020009:     /* ADU stuff */
+    case 0x202000f:     /* ADU stuff */
+        return true;
+    default:
+        return false;
+    }
+}
+
+static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    uint64_t val = 0;
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    val = xscom_read_default(chip, pcba);
+    if (val != -1) {
+        goto complete;
+    }
+
+    val = address_space_ldq(&chip->xscom_as, (uint64_t) pcba << 3,
+                            MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x\n", addr, pcba);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return 0;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+    return val;
+}
+
+static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    if (xscom_write_default(chip, pcba, val)) {
+        goto complete;
+    }
+
+    address_space_stq(&chip->xscom_as, (uint64_t) pcba << 3, val,
+                      MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n",
+                      addr, pcba, val);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+}
+
+const MemoryRegionOps pnv_xscom_ops = {
+    .read = xscom_read,
+    .write = xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+void pnv_xscom_realize(PnvChip *chip, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(chip);
+    char *name;
+
+    name = g_strdup_printf("xscom-%x", chip->chip_id);
+    memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops,
+                          chip, name, PNV_XSCOM_SIZE);
+    sysbus_init_mmio(sbd, &chip->xscom_mmio);
+
+    memory_region_init(&chip->xscom, OBJECT(chip), name, PNV_XSCOM_SIZE);
+    address_space_init(&chip->xscom_as, &chip->xscom, name);
+    g_free(name);
+}
+
+static const TypeInfo pnv_xscom_interface_info = {
+    .name = TYPE_PNV_XSCOM_INTERFACE,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(PnvXScomInterfaceClass),
+};
+
+static void pnv_xscom_register_types(void)
+{
+    type_register_static(&pnv_xscom_interface_info);
+}
+
+type_init(pnv_xscom_register_types)
+
+typedef struct ForeachPopulateArgs {
+    void *fdt;
+    int xscom_offset;
+} ForeachPopulateArgs;
+
+static int xscom_populate_child(Object *child, void *opaque)
+{
+    if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) {
+        ForeachPopulateArgs *args = opaque;
+        PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child);
+        PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd);
+
+        if (xc->populate) {
+            _FDT((xc->populate(xd, args->fdt, args->xscom_offset)));
+        }
+    }
+    return 0;
+}
+
+static const char compat_p8[] = "ibm,power8-xscom\0ibm,xscom";
+static const char compat_p9[] = "ibm,power9-xscom\0ibm,xscom";
+
+int pnv_xscom_populate(PnvChip *chip, void *fdt, int root_offset)
+{
+    uint64_t reg[] = { cpu_to_be64(PNV_XSCOM_BASE(chip)),
+                       cpu_to_be64(PNV_XSCOM_SIZE) };
+    int xscom_offset;
+    ForeachPopulateArgs args;
+    char *name;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+
+    name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0]));
+    xscom_offset = fdt_add_subnode(fdt, root_offset, name);
+    _FDT(xscom_offset);
+    g_free(name);
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
+
+    if (pcc->chip_type == PNV_CHIP_POWER9) {
+        _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p9,
+                          sizeof(compat_p9))));
+    } else {
+        _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat_p8,
+                          sizeof(compat_p8))));
+    }
+
+    _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0)));
+
+    args.fdt = fdt;
+    args.xscom_offset = xscom_offset;
+
+    object_child_foreach(OBJECT(chip), xscom_populate_child, &args);
+    return 0;
+}
+
+void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr)
+{
+    memory_region_add_subregion(&chip->xscom, offset << 3, mr);
+}
+
+void pnv_xscom_region_init(MemoryRegion *mr,
+                           struct Object *owner,
+                           const MemoryRegionOps *ops,
+                           void *opaque,
+                           const char *name,
+                           uint64_t size)
+{
+    memory_region_init_io(mr, owner, ops, opaque, name, size << 3);
+}
diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h
index c67febca2f..a9ffc87f19 100644
--- a/hw/ppc/ppc405.h
+++ b/hw/ppc/ppc405.h
@@ -71,11 +71,5 @@ CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
                         hwaddr ram_sizes[2],
                         uint32_t sysclk, qemu_irq **picp,
                         int do_init);
-/* IBM STBxxx microcontrollers */
-CPUPPCState *ppc_stb025_init (MemoryRegion ram_memories[2],
-                           hwaddr ram_bases[2],
-                           hwaddr ram_sizes[2],
-                           uint32_t sysclk, qemu_irq **picp,
-                           ram_addr_t *offsetp);
 
 #endif /* PPC405_H */
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
index 4b2f07aecb..d01798f245 100644
--- a/hw/ppc/ppc405_boards.c
+++ b/hw/ppc/ppc405_boards.c
@@ -37,7 +37,6 @@
 #include "qemu/log.h"
 #include "qemu/error-report.h"
 #include "hw/loader.h"
-#include "sysemu/block-backend.h"
 #include "sysemu/blockdev.h"
 #include "exec/address-spaces.h"
 
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
index 22c584eb8d..cf958a9e00 100644
--- a/hw/ppc/ppce500_spin.c
+++ b/hw/ppc/ppce500_spin.c
@@ -54,11 +54,6 @@ typedef struct SpinState {
     SpinInfo spin[MAX_CPUS];
 } SpinState;
 
-typedef struct spin_kick {
-    PowerPCCPU *cpu;
-    SpinInfo *spin;
-} SpinKick;
-
 static void spin_reset(void *opaque)
 {
     SpinState *s = opaque;
@@ -89,16 +84,15 @@ static void mmubooke_create_initial_mapping(CPUPPCState *env,
     env->tlb_dirty = true;
 }
 
-static void spin_kick(void *data)
+static void spin_kick(CPUState *cs, run_on_cpu_data data)
 {
-    SpinKick *kick = data;
-    CPUState *cpu = CPU(kick->cpu);
-    CPUPPCState *env = &kick->cpu->env;
-    SpinInfo *curspin = kick->spin;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    SpinInfo *curspin = data.host_ptr;
     hwaddr map_size = 64 * 1024 * 1024;
     hwaddr map_start;
 
-    cpu_synchronize_state(cpu);
+    cpu_synchronize_state(cs);
     stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]);
     env->nip = ldq_p(&curspin->addr) & (map_size - 1);
     env->gpr[3] = ldq_p(&curspin->r3);
@@ -112,10 +106,10 @@ static void spin_kick(void *data)
     map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
     mmubooke_create_initial_mapping(env, 0, map_start, map_size);
 
-    cpu->halted = 0;
-    cpu->exception_index = -1;
-    cpu->stopped = false;
-    qemu_cpu_kick(cpu);
+    cs->halted = 0;
+    cs->exception_index = -1;
+    cs->stopped = false;
+    qemu_cpu_kick(cs);
 }
 
 static void spin_write(void *opaque, hwaddr addr, uint64_t value,
@@ -153,12 +147,7 @@ static void spin_write(void *opaque, hwaddr addr, uint64_t value,
 
     if (!(ldq_p(&curspin->addr) & 1)) {
         /* run CPU */
-        SpinKick kick = {
-            .cpu = POWERPC_CPU(cpu),
-            .spin = curspin,
-        };
-
-        run_on_cpu(cpu, spin_kick, &kick);
+        run_on_cpu(cpu, spin_kick, RUN_ON_CPU_HOST_PTR(curspin));
     }
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 30d6800ab3..208ef7b110 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -37,7 +37,6 @@
 #include "sysemu/block-backend.h"
 #include "sysemu/cpus.h"
 #include "sysemu/kvm.h"
-#include "sysemu/device_tree.h"
 #include "kvm_ppc.h"
 #include "migration/migration.h"
 #include "mmu-hash64.h"
@@ -47,6 +46,7 @@
 #include "hw/ppc/ppc.h"
 #include "hw/loader.h"
 
+#include "hw/ppc/fdt.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "hw/pci-host/spapr.h"
@@ -249,40 +249,6 @@ static int spapr_fixup_cpu_dt(void *fdt, sPAPRMachineState *spapr)
     return ret;
 }
 
-
-static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
-                                     size_t maxsize)
-{
-    size_t maxcells = maxsize / sizeof(uint32_t);
-    int i, j, count;
-    uint32_t *p = prop;
-
-    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
-        struct ppc_one_seg_page_size *sps = &env->sps.sps[i];
-
-        if (!sps->page_shift) {
-            break;
-        }
-        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
-            if (sps->enc[count].page_shift == 0) {
-                break;
-            }
-        }
-        if ((p - prop) >= (maxcells - 3 - count * 2)) {
-            break;
-        }
-        *(p++) = cpu_to_be32(sps->page_shift);
-        *(p++) = cpu_to_be32(sps->slb_enc);
-        *(p++) = cpu_to_be32(count);
-        for (j = 0; j < count; j++) {
-            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
-            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
-        }
-    }
-
-    return (p - prop) * sizeof(uint32_t);
-}
-
 static hwaddr spapr_node0_size(void)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
@@ -299,225 +265,11 @@ static hwaddr spapr_node0_size(void)
     return machine->ram_size;
 }
 
-#define _FDT(exp) \
-    do { \
-        int ret = (exp);                                           \
-        if (ret < 0) {                                             \
-            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
-                    #exp, fdt_strerror(ret));                      \
-            exit(1);                                               \
-        }                                                          \
-    } while (0)
-
 static void add_str(GString *s, const gchar *s1)
 {
     g_string_append_len(s, s1, strlen(s1) + 1);
 }
 
-static void *spapr_create_fdt_skel(hwaddr initrd_base,
-                                   hwaddr initrd_size,
-                                   hwaddr kernel_size,
-                                   bool little_endian,
-                                   const char *kernel_cmdline,
-                                   uint32_t epow_irq)
-{
-    void *fdt;
-    uint32_t start_prop = cpu_to_be32(initrd_base);
-    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    GString *hypertas = g_string_sized_new(256);
-    GString *qemu_hypertas = g_string_sized_new(256);
-    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(max_cpus)};
-    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    char *buf;
-
-    add_str(hypertas, "hcall-pft");
-    add_str(hypertas, "hcall-term");
-    add_str(hypertas, "hcall-dabr");
-    add_str(hypertas, "hcall-interrupt");
-    add_str(hypertas, "hcall-tce");
-    add_str(hypertas, "hcall-vio");
-    add_str(hypertas, "hcall-splpar");
-    add_str(hypertas, "hcall-bulk");
-    add_str(hypertas, "hcall-set-mode");
-    add_str(hypertas, "hcall-sprg0");
-    add_str(hypertas, "hcall-copy");
-    add_str(hypertas, "hcall-debug");
-    add_str(qemu_hypertas, "hcall-memop1");
-
-    fdt = g_malloc0(FDT_MAX_SIZE);
-    _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
-
-    if (kernel_size) {
-        _FDT((fdt_add_reservemap_entry(fdt, KERNEL_LOAD_ADDR, kernel_size)));
-    }
-    if (initrd_size) {
-        _FDT((fdt_add_reservemap_entry(fdt, initrd_base, initrd_size)));
-    }
-    _FDT((fdt_finish_reservemap(fdt)));
-
-    /* Root node */
-    _FDT((fdt_begin_node(fdt, "")));
-    _FDT((fdt_property_string(fdt, "device_type", "chrp")));
-    _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
-    _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
-
-    /*
-     * Add info to guest to indentify which host is it being run on
-     * and what is the uuid of the guest
-     */
-    if (kvmppc_get_host_model(&buf)) {
-        _FDT((fdt_property_string(fdt, "host-model", buf)));
-        g_free(buf);
-    }
-    if (kvmppc_get_host_serial(&buf)) {
-        _FDT((fdt_property_string(fdt, "host-serial", buf)));
-        g_free(buf);
-    }
-
-    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
-                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
-                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
-                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
-                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
-                          qemu_uuid[14], qemu_uuid[15]);
-
-    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
-    if (qemu_uuid_set) {
-        _FDT((fdt_property_string(fdt, "system-id", buf)));
-    }
-    g_free(buf);
-
-    if (qemu_get_vm_name()) {
-        _FDT((fdt_property_string(fdt, "ibm,partition-name",
-                                  qemu_get_vm_name())));
-    }
-
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
-
-    /* /chosen */
-    _FDT((fdt_begin_node(fdt, "chosen")));
-
-    /* Set Form1_affinity */
-    _FDT((fdt_property(fdt, "ibm,architecture-vec-5", vec5, sizeof(vec5))));
-
-    _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline)));
-    _FDT((fdt_property(fdt, "linux,initrd-start",
-                       &start_prop, sizeof(start_prop))));
-    _FDT((fdt_property(fdt, "linux,initrd-end",
-                       &end_prop, sizeof(end_prop))));
-    if (kernel_size) {
-        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
-                              cpu_to_be64(kernel_size) };
-
-        _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop))));
-        if (little_endian) {
-            _FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
-        }
-    }
-    if (boot_menu) {
-        _FDT((fdt_property_cell(fdt, "qemu,boot-menu", boot_menu)));
-    }
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-width", graphic_width)));
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-height", graphic_height)));
-    _FDT((fdt_property_cell(fdt, "qemu,graphic-depth", graphic_depth)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* RTAS */
-    _FDT((fdt_begin_node(fdt, "rtas")));
-
-    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
-        add_str(hypertas, "hcall-multi-tce");
-    }
-    _FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas->str,
-                       hypertas->len)));
-    g_string_free(hypertas, TRUE);
-    _FDT((fdt_property(fdt, "qemu,hypertas-functions", qemu_hypertas->str,
-                       qemu_hypertas->len)));
-    g_string_free(qemu_hypertas, TRUE);
-
-    _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
-        refpoints, sizeof(refpoints))));
-
-    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
-    _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
-                            RTAS_EVENT_SCAN_RATE)));
-
-    if (msi_nonbroken) {
-        _FDT((fdt_property(fdt, "ibm,change-msix-capable", NULL, 0)));
-    }
-
-    /*
-     * According to PAPR, rtas ibm,os-term does not guarantee a return
-     * back to the guest cpu.
-     *
-     * While an additional ibm,extended-os-term property indicates that
-     * rtas call return will always occur. Set this property.
-     */
-    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* interrupt controller */
-    _FDT((fdt_begin_node(fdt, "interrupt-controller")));
-
-    _FDT((fdt_property_string(fdt, "device_type",
-                              "PowerPC-External-Interrupt-Presentation")));
-    _FDT((fdt_property_string(fdt, "compatible", "IBM,ppc-xicp")));
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-    _FDT((fdt_property(fdt, "ibm,interrupt-server-ranges",
-                       interrupt_server_ranges_prop,
-                       sizeof(interrupt_server_ranges_prop))));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
-    _FDT((fdt_property_cell(fdt, "linux,phandle", PHANDLE_XICP)));
-    _FDT((fdt_property_cell(fdt, "phandle", PHANDLE_XICP)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* vdevice */
-    _FDT((fdt_begin_node(fdt, "vdevice")));
-
-    _FDT((fdt_property_string(fdt, "device_type", "vdevice")));
-    _FDT((fdt_property_string(fdt, "compatible", "IBM,vdevice")));
-    _FDT((fdt_property_cell(fdt, "#address-cells", 0x1)));
-    _FDT((fdt_property_cell(fdt, "#size-cells", 0x0)));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 0x2)));
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-
-    _FDT((fdt_end_node(fdt)));
-
-    /* event-sources */
-    spapr_events_fdt_skel(fdt, epow_irq);
-
-    /* /hypervisor node */
-    if (kvm_enabled()) {
-        uint8_t hypercall[16];
-
-        /* indicate KVM hypercall interface */
-        _FDT((fdt_begin_node(fdt, "hypervisor")));
-        _FDT((fdt_property_string(fdt, "compatible", "linux,kvm")));
-        if (kvmppc_has_cap_fixup_hcalls()) {
-            /*
-             * Older KVM versions with older guest kernels were broken with the
-             * magic page, don't allow the guest to map it.
-             */
-            if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
-                                      sizeof(hypercall))) {
-                _FDT((fdt_property(fdt, "hcall-instructions", hypercall,
-                                   sizeof(hypercall))));
-            }
-        }
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    _FDT((fdt_end_node(fdt))); /* close root node */
-    _FDT((fdt_finish(fdt)));
-
-    return fdt;
-}
-
 static int spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
                                        hwaddr size)
 {
@@ -594,6 +346,51 @@ static int spapr_populate_memory(sPAPRMachineState *spapr, void *fdt)
     return 0;
 }
 
+/* Populate the "ibm,pa-features" property */
+static void spapr_populate_pa_features(CPUPPCState *env, void *fdt, int offset)
+{
+    uint8_t pa_features_206[] = { 6, 0,
+        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+    uint8_t pa_features_207[] = { 24, 0,
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+        0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
+    uint8_t *pa_features;
+    size_t pa_size;
+
+    switch (env->mmu_model) {
+    case POWERPC_MMU_2_06:
+    case POWERPC_MMU_2_06a:
+        pa_features = pa_features_206;
+        pa_size = sizeof(pa_features_206);
+        break;
+    case POWERPC_MMU_2_07:
+    case POWERPC_MMU_2_07a:
+        pa_features = pa_features_207;
+        pa_size = sizeof(pa_features_207);
+        break;
+    default:
+        return;
+    }
+
+    if (env->ci_large_pages) {
+        /*
+         * Note: we keep CI large pages off by default because a 64K capable
+         * guest provisioned with large pages might otherwise try to map a qemu
+         * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+         * even if that qemu runs on a 4k host.
+         * We dd this bit back here if we are confident this is not an issue
+         */
+        pa_features[3] |= 0x20;
+    }
+    if (kvmppc_has_cap_htm() && pa_size > 24) {
+        pa_features[24] |= 0x80;    /* Transactional memory support */
+    }
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+}
+
 static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
                                   sPAPRMachineState *spapr)
 {
@@ -621,24 +418,6 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
     }
 
-    /* Note: we keep CI large pages off for now because a 64K capable guest
-     * provisioned with large pages might otherwise try to map a qemu
-     * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
-     * even if that qemu runs on a 4k host.
-     *
-     * We can later add this bit back when we are confident this is not
-     * an issue (!HV KVM or 64K host)
-     */
-    uint8_t pa_features_206[] = { 6, 0,
-        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
-    uint8_t pa_features_207[] = { 24, 0,
-        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
-        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-        0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
-    uint8_t *pa_features;
-    size_t pa_size;
-
     _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
 
@@ -656,13 +435,13 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
         _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
                                pcc->l1_dcache_size)));
     } else {
-        fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n");
+        error_report("Warning: Unknown L1 dcache size for cpu");
     }
     if (pcc->l1_icache_size) {
         _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
                                pcc->l1_icache_size)));
     } else {
-        fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n");
+        error_report("Warning: Unknown L1 icache size for cpu");
     }
 
     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
@@ -698,25 +477,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
         _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
     }
 
-    page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop,
+    page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
                                                   sizeof(page_sizes_prop));
     if (page_sizes_prop_size) {
         _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
                           page_sizes_prop, page_sizes_prop_size)));
     }
 
-    /* Do the ibm,pa-features property, adjust it for ci-large-pages */
-    if (env->mmu_model == POWERPC_MMU_2_06) {
-        pa_features = pa_features_206;
-        pa_size = sizeof(pa_features_206);
-    } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
-        pa_features = pa_features_207;
-        pa_size = sizeof(pa_features_207);
-    }
-    if (env->ci_large_pages) {
-        pa_features[3] |= 0x20;
-    }
-    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+    spapr_populate_pa_features(env, fdt, offset);
 
     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
                            cs->cpu_index / vcpus_per_socket)));
@@ -886,13 +654,42 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt)
     return ret;
 }
 
+static int spapr_dt_cas_updates(sPAPRMachineState *spapr, void *fdt,
+                                sPAPROptionVector *ov5_updates)
+{
+    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    int ret = 0, offset;
+
+    /* Generate ibm,dynamic-reconfiguration-memory node if required */
+    if (spapr_ovec_test(ov5_updates, OV5_DRCONF_MEMORY)) {
+        g_assert(smc->dr_lmb_enabled);
+        ret = spapr_populate_drconf_memory(spapr, fdt);
+        if (ret) {
+            goto out;
+        }
+    }
+
+    offset = fdt_path_offset(fdt, "/chosen");
+    if (offset < 0) {
+        offset = fdt_add_subnode(fdt, 0, "chosen");
+        if (offset < 0) {
+            return offset;
+        }
+    }
+    ret = spapr_ovec_populate_dt(fdt, offset, spapr->ov5_cas,
+                                 "ibm,architecture-vec-5");
+
+out:
+    return ret;
+}
+
 int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
                                  target_ulong addr, target_ulong size,
-                                 bool cpu_update, bool memory_update)
+                                 bool cpu_update,
+                                 sPAPROptionVector *ov5_updates)
 {
     void *fdt, *fdt_skel;
     sPAPRDeviceTreeUpdateHeader hdr = { .version_id = 1 };
-    sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
 
     size -= sizeof(hdr);
 
@@ -911,9 +708,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
         _FDT((spapr_fixup_cpu_dt(fdt, spapr)));
     }
 
-    /* Generate ibm,dynamic-reconfiguration-memory node if required */
-    if (memory_update && smc->dr_lmb_enabled) {
-        _FDT((spapr_populate_drconf_memory(spapr, fdt)));
+    if (spapr_dt_cas_updates(spapr, fdt, ov5_updates)) {
+        return -1;
     }
 
     /* Pack resulting tree */
@@ -932,42 +728,220 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr,
     return 0;
 }
 
-static void spapr_finalize_fdt(sPAPRMachineState *spapr,
-                               hwaddr fdt_addr,
-                               hwaddr rtas_addr,
-                               hwaddr rtas_size)
+static void spapr_dt_rtas(sPAPRMachineState *spapr, void *fdt)
+{
+    int rtas;
+    GString *hypertas = g_string_sized_new(256);
+    GString *qemu_hypertas = g_string_sized_new(256);
+    uint32_t refpoints[] = { cpu_to_be32(0x4), cpu_to_be32(0x4) };
+    uint64_t max_hotplug_addr = spapr->hotplug_memory.base +
+        memory_region_size(&spapr->hotplug_memory.mr);
+    uint32_t lrdr_capacity[] = {
+        cpu_to_be32(max_hotplug_addr >> 32),
+        cpu_to_be32(max_hotplug_addr & 0xffffffff),
+        0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
+        cpu_to_be32(max_cpus / smp_threads),
+    };
+
+    _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
+
+    /* hypertas */
+    add_str(hypertas, "hcall-pft");
+    add_str(hypertas, "hcall-term");
+    add_str(hypertas, "hcall-dabr");
+    add_str(hypertas, "hcall-interrupt");
+    add_str(hypertas, "hcall-tce");
+    add_str(hypertas, "hcall-vio");
+    add_str(hypertas, "hcall-splpar");
+    add_str(hypertas, "hcall-bulk");
+    add_str(hypertas, "hcall-set-mode");
+    add_str(hypertas, "hcall-sprg0");
+    add_str(hypertas, "hcall-copy");
+    add_str(hypertas, "hcall-debug");
+    add_str(qemu_hypertas, "hcall-memop1");
+
+    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+        add_str(hypertas, "hcall-multi-tce");
+    }
+    _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
+                     hypertas->str, hypertas->len));
+    g_string_free(hypertas, TRUE);
+    _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
+                     qemu_hypertas->str, qemu_hypertas->len));
+    g_string_free(qemu_hypertas, TRUE);
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+                     refpoints, sizeof(refpoints)));
+
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
+                          RTAS_ERROR_LOG_MAX));
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
+                          RTAS_EVENT_SCAN_RATE));
+
+    if (msi_nonbroken) {
+        _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
+    }
+
+    /*
+     * According to PAPR, rtas ibm,os-term does not guarantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates
+     * that rtas call return will always occur. Set this property.
+     */
+    _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
+                     lrdr_capacity, sizeof(lrdr_capacity)));
+
+    spapr_dt_rtas_tokens(fdt, rtas);
+}
+
+static void spapr_dt_chosen(sPAPRMachineState *spapr, void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    int chosen;
+    const char *boot_device = machine->boot_order;
+    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+    size_t cb = 0;
+    char *bootlist = get_boot_devices_list(&cb, true);
+
+    _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
+
+    _FDT(fdt_setprop_string(fdt, chosen, "bootargs", machine->kernel_cmdline));
+    _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+                          spapr->initrd_base));
+    _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+                          spapr->initrd_base + spapr->initrd_size));
+
+    if (spapr->kernel_size) {
+        uint64_t kprop[2] = { cpu_to_be64(KERNEL_LOAD_ADDR),
+                              cpu_to_be64(spapr->kernel_size) };
+
+        _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
+                         &kprop, sizeof(kprop)));
+        if (spapr->kernel_le) {
+            _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+        }
+    }
+    if (boot_menu) {
+        _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
+    }
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
+    _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
+
+    if (cb && bootlist) {
+        int i;
+
+        for (i = 0; i < cb; i++) {
+            if (bootlist[i] == '\n') {
+                bootlist[i] = ' ';
+            }
+        }
+        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
+    }
+
+    if (boot_device && strlen(boot_device)) {
+        _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
+    }
+
+    if (!spapr->has_graphics && stdout_path) {
+        _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
+    }
+
+    g_free(stdout_path);
+    g_free(bootlist);
+}
+
+static void spapr_dt_hypervisor(sPAPRMachineState *spapr, void *fdt)
+{
+    /* The /hypervisor node isn't in PAPR - this is a hack to allow PR
+     * KVM to work under pHyp with some guest co-operation */
+    int hypervisor;
+    uint8_t hypercall[16];
+
+    _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
+    /* indicate KVM hypercall interface */
+    _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
+    if (kvmppc_has_cap_fixup_hcalls()) {
+        /*
+         * Older KVM versions with older guest kernels were broken
+         * with the magic page, don't allow the guest to map it.
+         */
+        if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+                                  sizeof(hypercall))) {
+            _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
+                             hypercall, sizeof(hypercall)));
+        }
+    }
+}
+
+static void *spapr_build_fdt(sPAPRMachineState *spapr,
+                             hwaddr rtas_addr,
+                             hwaddr rtas_size)
 {
     MachineState *machine = MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
-    const char *boot_device = machine->boot_order;
-    int ret, i;
-    size_t cb = 0;
-    char *bootlist;
+    int ret;
     void *fdt;
     sPAPRPHBState *phb;
+    char *buf;
 
-    fdt = g_malloc(FDT_MAX_SIZE);
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
 
-    /* open out the base tree into a temp buffer for the final tweaks */
-    _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
+    /* Root node */
+    _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
+    _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
+    _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
 
-    ret = spapr_populate_memory(spapr, fdt);
-    if (ret < 0) {
-        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
-        exit(1);
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
+        g_free(buf);
+    }
+
+    buf = qemu_uuid_unparse_strdup(&qemu_uuid);
+
+    _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
+    if (qemu_uuid_set) {
+        _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
+    }
+    g_free(buf);
+
+    if (qemu_get_vm_name()) {
+        _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
+                                qemu_get_vm_name()));
     }
 
-    ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
+    _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
+
+    /* /interrupt controller */
+    spapr_dt_xics(spapr->xics, fdt, PHANDLE_XICP);
+
+    ret = spapr_populate_memory(spapr, fdt);
     if (ret < 0) {
-        fprintf(stderr, "couldn't setup vio devices in fdt\n");
+        error_report("couldn't setup memory nodes in fdt");
         exit(1);
     }
 
+    /* /vdevice */
+    spapr_dt_vdevice(spapr->vio_bus, fdt);
+
     if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
         ret = spapr_rng_populate_dt(fdt);
         if (ret < 0) {
-            fprintf(stderr, "could not set up rng device in the fdt\n");
+            error_report("could not set up rng device in the fdt");
             exit(1);
         }
     }
@@ -980,43 +954,9 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
         }
     }
 
-    /* RTAS */
-    ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't set up RTAS device tree properties\n");
-    }
-
     /* cpus */
     spapr_populate_cpus_dt_node(fdt, spapr);
 
-    bootlist = get_boot_devices_list(&cb, true);
-    if (cb && bootlist) {
-        int offset = fdt_path_offset(fdt, "/chosen");
-        if (offset < 0) {
-            exit(1);
-        }
-        for (i = 0; i < cb; i++) {
-            if (bootlist[i] == '\n') {
-                bootlist[i] = ' ';
-            }
-
-        }
-        ret = fdt_setprop_string(fdt, offset, "qemu,boot-list", bootlist);
-    }
-
-    if (boot_device && strlen(boot_device)) {
-        int offset = fdt_path_offset(fdt, "/chosen");
-
-        if (offset < 0) {
-            exit(1);
-        }
-        fdt_setprop_string(fdt, offset, "qemu,boot-device", boot_device);
-    }
-
-    if (!spapr->has_graphics) {
-        spapr_populate_chosen_stdout(fdt, spapr->vio_bus);
-    }
-
     if (smc->dr_lmb_enabled) {
         _FDT(spapr_drc_populate_dt(fdt, 0, NULL, SPAPR_DR_CONNECTOR_TYPE_LMB));
     }
@@ -1031,19 +971,36 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
         }
     }
 
-    _FDT((fdt_pack(fdt)));
+    /* /event-sources */
+    spapr_dt_events(spapr, fdt);
 
-    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
-        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
-                     fdt_totalsize(fdt), FDT_MAX_SIZE);
-        exit(1);
+    /* /rtas */
+    spapr_dt_rtas(spapr, fdt);
+
+    /* /chosen */
+    spapr_dt_chosen(spapr, fdt);
+
+    /* /hypervisor */
+    if (kvm_enabled()) {
+        spapr_dt_hypervisor(spapr, fdt);
     }
 
-    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
-    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    /* Build memory reserve map */
+    if (spapr->kernel_size) {
+        _FDT((fdt_add_mem_rsv(fdt, KERNEL_LOAD_ADDR, spapr->kernel_size)));
+    }
+    if (spapr->initrd_size) {
+        _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, spapr->initrd_size)));
+    }
 
-    g_free(bootlist);
-    g_free(fdt);
+    /* ibm,client-architecture-support updates */
+    ret = spapr_dt_cas_updates(spapr, fdt, spapr->ov5_cas);
+    if (ret < 0) {
+        error_report("couldn't setup CAS properties fdt");
+        exit(1);
+    }
+
+    return fdt;
 }
 
 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
@@ -1158,7 +1115,7 @@ static void spapr_reallocate_hpt(sPAPRMachineState *spapr, int shift,
     }
 }
 
-static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
+static void find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
 {
     bool matched = false;
 
@@ -1171,8 +1128,6 @@ static int find_unknown_sysbus_device(SysBusDevice *sbdev, void *opaque)
                      qdev_fw_name(DEVICE(sbdev)));
         exit(1);
     }
-
-    return 0;
 }
 
 static void ppc_spapr_reset(void)
@@ -1181,6 +1136,9 @@ static void ppc_spapr_reset(void)
     sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
     PowerPCCPU *first_ppc_cpu;
     uint32_t rtas_limit;
+    hwaddr rtas_addr, fdt_addr;
+    void *fdt;
+    int rc;
 
     /* Check for unknown sysbus devices */
     foreach_dynamic_sysbus_device(find_unknown_sysbus_device, NULL);
@@ -1204,24 +1162,44 @@ static void ppc_spapr_reset(void)
      * processed with 32-bit real mode code if necessary
      */
     rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
-    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
-    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+    rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+    fdt_addr = rtas_addr - FDT_MAX_SIZE;
 
-    /* Load the fdt */
-    spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
-                       spapr->rtas_size);
+    /* if this reset wasn't generated by CAS, we should reset our
+     * negotiated options and start from scratch */
+    if (!spapr->cas_reboot) {
+        spapr_ovec_cleanup(spapr->ov5_cas);
+        spapr->ov5_cas = spapr_ovec_new();
+    }
+
+    fdt = spapr_build_fdt(spapr, rtas_addr, spapr->rtas_size);
+
+    spapr_load_rtas(spapr, fdt, rtas_addr);
+
+    rc = fdt_pack(fdt);
+
+    /* Should only fail if we've built a corrupted tree */
+    assert(rc == 0);
+
+    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
+        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
+                     fdt_totalsize(fdt), FDT_MAX_SIZE);
+        exit(1);
+    }
 
-    /* Copy RTAS over */
-    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
-                              spapr->rtas_size);
+    /* Load the fdt */
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+    cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    g_free(fdt);
 
     /* Set up the entry state */
     first_ppc_cpu = POWERPC_CPU(first_cpu);
-    first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
+    first_ppc_cpu->env.gpr[3] = fdt_addr;
     first_ppc_cpu->env.gpr[5] = 0;
     first_cpu->halted = 0;
     first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
 
+    spapr->cas_reboot = false;
 }
 
 static void spapr_create_nvram(sPAPRMachineState *spapr)
@@ -1289,6 +1267,68 @@ static bool version_before_3(void *opaque, int version_id)
     return version_id < 3;
 }
 
+static bool spapr_ov5_cas_needed(void *opaque)
+{
+    sPAPRMachineState *spapr = opaque;
+    sPAPROptionVector *ov5_mask = spapr_ovec_new();
+    sPAPROptionVector *ov5_legacy = spapr_ovec_new();
+    sPAPROptionVector *ov5_removed = spapr_ovec_new();
+    bool cas_needed;
+
+    /* Prior to the introduction of sPAPROptionVector, we had two option
+     * vectors we dealt with: OV5_FORM1_AFFINITY, and OV5_DRCONF_MEMORY.
+     * Both of these options encode machine topology into the device-tree
+     * in such a way that the now-booted OS should still be able to interact
+     * appropriately with QEMU regardless of what options were actually
+     * negotiatied on the source side.
+     *
+     * As such, we can avoid migrating the CAS-negotiated options if these
+     * are the only options available on the current machine/platform.
+     * Since these are the only options available for pseries-2.7 and
+     * earlier, this allows us to maintain old->new/new->old migration
+     * compatibility.
+     *
+     * For QEMU 2.8+, there are additional CAS-negotiatable options available
+     * via default pseries-2.8 machines and explicit command-line parameters.
+     * Some of these options, like OV5_HP_EVT, *do* require QEMU to be aware
+     * of the actual CAS-negotiated values to continue working properly. For
+     * example, availability of memory unplug depends on knowing whether
+     * OV5_HP_EVT was negotiated via CAS.
+     *
+     * Thus, for any cases where the set of available CAS-negotiatable
+     * options extends beyond OV5_FORM1_AFFINITY and OV5_DRCONF_MEMORY, we
+     * include the CAS-negotiated options in the migration stream.
+     */
+    spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY);
+    spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY);
+
+    /* spapr_ovec_diff returns true if bits were removed. we avoid using
+     * the mask itself since in the future it's possible "legacy" bits may be
+     * removed via machine options, which could generate a false positive
+     * that breaks migration.
+     */
+    spapr_ovec_intersect(ov5_legacy, spapr->ov5, ov5_mask);
+    cas_needed = spapr_ovec_diff(ov5_removed, spapr->ov5, ov5_legacy);
+
+    spapr_ovec_cleanup(ov5_mask);
+    spapr_ovec_cleanup(ov5_legacy);
+    spapr_ovec_cleanup(ov5_removed);
+
+    return cas_needed;
+}
+
+static const VMStateDescription vmstate_spapr_ov5_cas = {
+    .name = "spapr_option_vector_ov5_cas",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_ov5_cas_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_POINTER_V(ov5_cas, sPAPRMachineState, 1,
+                                 vmstate_spapr_ovec, sPAPROptionVector),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr = {
     .name = "spapr",
     .version_id = 3,
@@ -1304,6 +1344,10 @@ static const VMStateDescription vmstate_spapr = {
         VMSTATE_PPC_TIMEBASE_V(tb, sPAPRMachineState, 2),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_spapr_ov5_cas,
+        NULL
+    }
 };
 
 static int htab_save_setup(QEMUFile *f, void *opaque)
@@ -1716,7 +1760,6 @@ static void ppc_spapr_init(MachineState *machine)
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
     const char *kernel_filename = machine->kernel_filename;
-    const char *kernel_cmdline = machine->kernel_cmdline;
     const char *initrd_filename = machine->initrd_filename;
     PCIHostState *phb;
     int i;
@@ -1726,10 +1769,7 @@ static void ppc_spapr_init(MachineState *machine)
     void *rma = NULL;
     hwaddr rma_alloc_size;
     hwaddr node0_size = spapr_node0_size();
-    uint32_t initrd_base = 0;
-    long kernel_size = 0, initrd_size = 0;
     long load_limit, fw_size;
-    bool kernel_le = false;
     char *filename;
     int smt = kvmppc_smt_threads();
     int spapr_cores = smp_cpus / smp_threads;
@@ -1803,13 +1843,25 @@ static void ppc_spapr_init(MachineState *machine)
                                    DIV_ROUND_UP(max_cpus * smt, smp_threads),
                                    XICS_IRQS_SPAPR, &error_fatal);
 
+    /* Set up containers for ibm,client-set-architecture negotiated options */
+    spapr->ov5 = spapr_ovec_new();
+    spapr->ov5_cas = spapr_ovec_new();
+
     if (smc->dr_lmb_enabled) {
+        spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
         spapr_validate_node_memory(machine, &error_fatal);
     }
 
+    spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+
+    /* advertise support for dedicated HP event source to guests */
+    if (spapr->use_hotplug_event_source) {
+        spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
+    }
+
     /* init CPUs */
     if (machine->cpu_model == NULL) {
-        machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
+        machine->cpu_model = kvm_enabled() ? "host" : smc->tcg_default_cpu;
     }
 
     ppc_cpu_parse_features(machine->cpu_model);
@@ -1857,6 +1909,9 @@ static void ppc_spapr_init(MachineState *machine)
         /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
         kvmppc_enable_logical_ci_hcalls();
         kvmppc_enable_set_mode_hcall();
+
+        /* H_CLEAR_MOD/_REF are mandatory in PAPR, but off by default */
+        kvmppc_enable_clear_ref_mod_hcalls();
     }
 
     /* allocate RAM */
@@ -1927,7 +1982,7 @@ static void ppc_spapr_init(MachineState *machine)
     }
     g_free(filename);
 
-    /* Set up EPOW events infrastructure */
+    /* Set up RTAS event infrastructure */
     spapr_events_init(spapr);
 
     /* Set up the RTC RTAS interfaces */
@@ -1999,19 +2054,19 @@ static void ppc_spapr_init(MachineState *machine)
     if (kernel_filename) {
         uint64_t lowaddr = 0;
 
-        kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
-                               NULL, &lowaddr, NULL, 1, PPC_ELF_MACHINE,
-                               0, 0);
-        if (kernel_size == ELF_LOAD_WRONG_ENDIAN) {
-            kernel_size = load_elf(kernel_filename,
-                                   translate_kernel_address, NULL,
-                                   NULL, &lowaddr, NULL, 0, PPC_ELF_MACHINE,
-                                   0, 0);
-            kernel_le = kernel_size > 0;
-        }
-        if (kernel_size < 0) {
-            error_report("error loading %s: %s",
-                         kernel_filename, load_elf_strerror(kernel_size));
+        spapr->kernel_size = load_elf(kernel_filename, translate_kernel_address,
+                                      NULL, NULL, &lowaddr, NULL, 1,
+                                      PPC_ELF_MACHINE, 0, 0);
+        if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
+            spapr->kernel_size = load_elf(kernel_filename,
+                                          translate_kernel_address, NULL, NULL,
+                                          &lowaddr, NULL, 0, PPC_ELF_MACHINE,
+                                          0, 0);
+            spapr->kernel_le = spapr->kernel_size > 0;
+        }
+        if (spapr->kernel_size < 0) {
+            error_report("error loading %s: %s", kernel_filename,
+                         load_elf_strerror(spapr->kernel_size));
             exit(1);
         }
 
@@ -2020,17 +2075,17 @@ static void ppc_spapr_init(MachineState *machine)
             /* Try to locate the initrd in the gap between the kernel
              * and the firmware. Add a bit of space just in case
              */
-            initrd_base = (KERNEL_LOAD_ADDR + kernel_size + 0x1ffff) & ~0xffff;
-            initrd_size = load_image_targphys(initrd_filename, initrd_base,
-                                              load_limit - initrd_base);
-            if (initrd_size < 0) {
+            spapr->initrd_base = (KERNEL_LOAD_ADDR + spapr->kernel_size
+                                  + 0x1ffff) & ~0xffff;
+            spapr->initrd_size = load_image_targphys(initrd_filename,
+                                                     spapr->initrd_base,
+                                                     load_limit
+                                                     - spapr->initrd_base);
+            if (spapr->initrd_size < 0) {
                 error_report("could not load initial ram disk '%s'",
                              initrd_filename);
                 exit(1);
             }
-        } else {
-            initrd_base = 0;
-            initrd_size = 0;
         }
     }
 
@@ -2056,13 +2111,6 @@ static void ppc_spapr_init(MachineState *machine)
     register_savevm_live(NULL, "spapr/htab", -1, 1,
                          &savevm_htab_handlers, spapr);
 
-    /* Prepare the device tree */
-    spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
-                                            kernel_size, kernel_le,
-                                            kernel_cmdline,
-                                            spapr->check_exception_irq);
-    assert(spapr->fdt_skel != NULL);
-
     /* used by RTAS */
     QTAILQ_INIT(&spapr->ccs_list);
     qemu_register_reset(spapr_ccs_reset_hook, spapr);
@@ -2160,16 +2208,41 @@ static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
     spapr->kvm_type = g_strdup(value);
 }
 
+static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return spapr->use_hotplug_event_source;
+}
+
+static void spapr_set_modern_hotplug_events(Object *obj, bool value,
+                                            Error **errp)
+{
+    sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
+
+    spapr->use_hotplug_event_source = value;
+}
+
 static void spapr_machine_initfn(Object *obj)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(obj);
 
     spapr->htab_fd = -1;
+    spapr->use_hotplug_event_source = true;
     object_property_add_str(obj, "kvm-type",
                             spapr_get_kvm_type, spapr_set_kvm_type, NULL);
     object_property_set_description(obj, "kvm-type",
                                     "Specifies the KVM virtualization mode (HV, PR)",
                                     NULL);
+    object_property_add_bool(obj, "modern-hotplug-events",
+                            spapr_get_modern_hotplug_events,
+                            spapr_set_modern_hotplug_events,
+                            NULL);
+    object_property_set_description(obj, "modern-hotplug-events",
+                                    "Use dedicated hotplug event mechanism in"
+                                    " place of standard EPOW events when possible"
+                                    " (required for memory hot-unplug support)",
+                                    NULL);
 }
 
 static void spapr_machine_finalizefn(Object *obj)
@@ -2179,10 +2252,8 @@ static void spapr_machine_finalizefn(Object *obj)
     g_free(spapr->kvm_type);
 }
 
-static void ppc_cpu_do_nmi_on_cpu(void *arg)
+static void ppc_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
 {
-    CPUState *cs = arg;
-
     cpu_synchronize_state(cs);
     ppc_cpu_do_system_reset(cs);
 }
@@ -2192,18 +2263,20 @@ static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
     CPUState *cs;
 
     CPU_FOREACH(cs) {
-        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs);
+        async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
     }
 }
 
-static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
-                           uint32_t node, Error **errp)
+static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+                           uint32_t node, bool dedicated_hp_event_source,
+                           Error **errp)
 {
     sPAPRDRConnector *drc;
     sPAPRDRConnectorClass *drck;
     uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
     int i, fdt_offset, fdt_size;
     void *fdt;
+    uint64_t addr = addr_start;
 
     for (i = 0; i < nr_lmbs; i++) {
         drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
@@ -2217,12 +2290,27 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
         drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
         drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
         addr += SPAPR_MEMORY_BLOCK_SIZE;
+        if (!dev->hotplugged) {
+            /* guests expect coldplugged LMBs to be pre-allocated */
+            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
+            drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_UNISOLATED);
+        }
     }
     /* send hotplug notification to the
      * guest only in case of hotplugged memory
      */
     if (dev->hotplugged) {
-       spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, nr_lmbs);
+        if (dedicated_hp_event_source) {
+            drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                    addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+            drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+            spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                                   nr_lmbs,
+                                                   drck->get_index(drc));
+        } else {
+            spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                           nr_lmbs);
+        }
     }
 }
 
@@ -2255,8 +2343,94 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         goto out;
     }
 
-    spapr_add_lmbs(dev, addr, size, node, &error_abort);
+    spapr_add_lmbs(dev, addr, size, node,
+                   spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT),
+                   &error_abort);
+
+out:
+    error_propagate(errp, local_err);
+}
+
+typedef struct sPAPRDIMMState {
+    uint32_t nr_lmbs;
+} sPAPRDIMMState;
+
+static void spapr_lmb_release(DeviceState *dev, void *opaque)
+{
+    sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
+    HotplugHandler *hotplug_ctrl;
+
+    if (--ds->nr_lmbs) {
+        return;
+    }
+
+    g_free(ds);
+
+    /*
+     * Now that all the LMBs have been removed by the guest, call the
+     * pc-dimm unplug handler to cleanup up the pc-dimm device.
+     */
+    hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+}
+
+static void spapr_del_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+                           Error **errp)
+{
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    int i;
+    sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
+    uint64_t addr = addr_start;
+
+    ds->nr_lmbs = nr_lmbs;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+        drck->detach(drc, dev, spapr_lmb_release, ds, errp);
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                              nr_lmbs,
+                                              drck->get_index(drc));
+}
+
+static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+
+    pc_dimm_memory_unplug(dev, &ms->hotplug_memory, mr);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    Error *local_err = NULL;
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *mr = ddc->get_memory_region(dimm);
+    uint64_t size = memory_region_size(mr);
+    uint64_t addr;
+
+    addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
 
+    spapr_del_lmbs(dev, addr, size, &error_abort);
 out:
     error_propagate(errp, local_err);
 }
@@ -2334,10 +2508,42 @@ static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
 static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
                                       DeviceState *dev, Error **errp)
 {
+    sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
     MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
-        error_setg(errp, "Memory hot unplug not supported by sPAPR");
+        if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
+            spapr_memory_unplug(hotplug_dev, dev, errp);
+        } else {
+            error_setg(errp, "Memory hot unplug not supported for this guest");
+        }
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        if (!mc->query_hotpluggable_cpus) {
+            error_setg(errp, "CPU hot unplug not supported on this machine");
+            return;
+        }
+        spapr_core_unplug(hotplug_dev, dev, errp);
+    }
+}
+
+static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
+                                                DeviceState *dev, Error **errp)
+{
+    sPAPRMachineState *sms = SPAPR_MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        if (spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
+            spapr_memory_unplug_request(hotplug_dev, dev, errp);
+        } else {
+            /* NOTE: this means there is a window after guest reset, prior to
+             * CAS negotiation, where unplug requests will fail due to the
+             * capability not being detected yet. This is a bit different than
+             * the case with PCI unplug, where the events will be queued and
+             * eventually handled by the guest after boot
+             */
+            error_setg(errp, "Memory hot unplug not supported for this guest");
+        }
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
         if (!mc->query_hotpluggable_cpus) {
             error_setg(errp, "CPU hot unplug not supported on this machine");
@@ -2355,8 +2561,8 @@ static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
     }
 }
 
-static HotplugHandler *spapr_get_hotpug_handler(MachineState *machine,
-                                             DeviceState *dev)
+static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
+                                                 DeviceState *dev)
 {
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
         object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
@@ -2403,6 +2609,56 @@ static HotpluggableCPUList *spapr_query_hotpluggable_cpus(MachineState *machine)
     return head;
 }
 
+static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
+                                uint64_t *buid, hwaddr *pio,
+                                hwaddr *mmio32, hwaddr *mmio64,
+                                unsigned n_dma, uint32_t *liobns, Error **errp)
+{
+    /*
+     * New-style PHB window placement.
+     *
+     * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
+     * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
+     * windows.
+     *
+     * Some guest kernels can't work with MMIO windows above 1<<46
+     * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
+     *
+     * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
+     * PHB stacked together.  (32TiB+2GiB)..(32TiB+64GiB) contains the
+     * 2GiB 32-bit MMIO windows for each PHB.  Then 33..64TiB has the
+     * 1TiB 64-bit MMIO windows for each PHB.
+     */
+    const uint64_t base_buid = 0x800000020000000ULL;
+    const int max_phbs =
+        (SPAPR_PCI_LIMIT - SPAPR_PCI_BASE) / SPAPR_PCI_MEM64_WIN_SIZE - 1;
+    int i;
+
+    /* Sanity check natural alignments */
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
+    /* Sanity check bounds */
+    QEMU_BUILD_BUG_ON((max_phbs * SPAPR_PCI_IO_WIN_SIZE) > SPAPR_PCI_MEM32_WIN_SIZE);
+    QEMU_BUILD_BUG_ON((max_phbs * SPAPR_PCI_MEM32_WIN_SIZE) > SPAPR_PCI_MEM64_WIN_SIZE);
+
+    if (index >= max_phbs) {
+        error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+                   max_phbs - 1);
+        return;
+    }
+
+    *buid = base_buid + index;
+    for (i = 0; i < n_dma; ++i) {
+        liobns[i] = SPAPR_PCI_LIOBN(index, i);
+    }
+
+    *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
+    *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
+    *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -2421,23 +2677,26 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     mc->init = ppc_spapr_init;
     mc->reset = ppc_spapr_reset;
     mc->block_default_type = IF_SCSI;
-    mc->max_cpus = MAX_CPUMASK_BITS;
+    mc->max_cpus = 255;
     mc->no_parallel = 1;
     mc->default_boot_order = "";
     mc->default_ram_size = 512 * M_BYTE;
     mc->kvm_type = spapr_kvm_type;
     mc->has_dynamic_sysbus = true;
     mc->pci_allow_0_address = true;
-    mc->get_hotplug_handler = spapr_get_hotpug_handler;
+    mc->get_hotplug_handler = spapr_get_hotplug_handler;
     hc->pre_plug = spapr_machine_device_pre_plug;
     hc->plug = spapr_machine_device_plug;
     hc->unplug = spapr_machine_device_unplug;
     mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
+    hc->unplug_request = spapr_machine_device_unplug_request;
 
     smc->dr_lmb_enabled = true;
+    smc->tcg_default_cpu = "POWER8";
     mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
     fwc->get_dev_path = spapr_get_fw_dev_path;
     nc->nmi_monitor_handler = spapr_nmi;
+    smc->phb_placement = spapr_phb_placement;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -2485,19 +2744,114 @@ static const TypeInfo spapr_machine_info = {
     }                                                                \
     type_init(spapr_machine_register_##suffix)
 
+/*
+ * pseries-2.8
+ */
+static void spapr_machine_2_8_instance_options(MachineState *machine)
+{
+}
+
+static void spapr_machine_2_8_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(2_8, "2.8", true);
+
 /*
  * pseries-2.7
  */
+#define SPAPR_COMPAT_2_7                            \
+    HW_COMPAT_2_7                                   \
+    {                                               \
+        .driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,     \
+        .property = "mem_win_size",                 \
+        .value    = stringify(SPAPR_PCI_2_7_MMIO_WIN_SIZE),\
+    },                                              \
+    {                                               \
+        .driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,     \
+        .property = "mem64_win_size",               \
+        .value    = "0",                            \
+    },                                              \
+    {                                               \
+        .driver = TYPE_POWERPC_CPU,                 \
+        .property = "pre-2.8-migration",            \
+        .value    = "on",                           \
+    },                                              \
+    {                                               \
+        .driver = TYPE_SPAPR_PCI_HOST_BRIDGE,       \
+        .property = "pre-2.8-migration",            \
+        .value    = "on",                           \
+    },
+
+static void phb_placement_2_7(sPAPRMachineState *spapr, uint32_t index,
+                              uint64_t *buid, hwaddr *pio,
+                              hwaddr *mmio32, hwaddr *mmio64,
+                              unsigned n_dma, uint32_t *liobns, Error **errp)
+{
+    /* Legacy PHB placement for pseries-2.7 and earlier machine types */
+    const uint64_t base_buid = 0x800000020000000ULL;
+    const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */
+    const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */
+    const hwaddr pio_offset = 0x80000000; /* 2 GiB */
+    const uint32_t max_index = 255;
+    const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */
+
+    uint64_t ram_top = MACHINE(spapr)->ram_size;
+    hwaddr phb0_base, phb_base;
+    int i;
+
+    /* Do we have hotpluggable memory? */
+    if (MACHINE(spapr)->maxram_size > ram_top) {
+        /* Can't just use maxram_size, because there may be an
+         * alignment gap between normal and hotpluggable memory
+         * regions */
+        ram_top = spapr->hotplug_memory.base +
+            memory_region_size(&spapr->hotplug_memory.mr);
+    }
+
+    phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
+
+    if (index > max_index) {
+        error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+                   max_index);
+        return;
+    }
+
+    *buid = base_buid + index;
+    for (i = 0; i < n_dma; ++i) {
+        liobns[i] = SPAPR_PCI_LIOBN(index, i);
+    }
+
+    phb_base = phb0_base + index * phb_spacing;
+    *pio = phb_base + pio_offset;
+    *mmio32 = phb_base + mmio_offset;
+    /*
+     * We don't set the 64-bit MMIO window, relying on the PHB's
+     * fallback behaviour of automatically splitting a large "32-bit"
+     * window into contiguous 32-bit and 64-bit windows
+     */
+}
+
 static void spapr_machine_2_7_instance_options(MachineState *machine)
 {
+    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
+
+    spapr_machine_2_8_instance_options(machine);
+    spapr->use_hotplug_event_source = false;
 }
 
 static void spapr_machine_2_7_class_options(MachineClass *mc)
 {
-    /* Defaults for the latest behaviour inherited from the base class */
+    sPAPRMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_2_8_class_options(mc);
+    smc->tcg_default_cpu = "POWER7";
+    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_7);
+    smc->phb_placement = phb_placement_2_7;
 }
 
-DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
+DEFINE_SPAPR_MACHINE(2_7, "2.7", false);
 
 /*
  * pseries-2.6
@@ -2512,6 +2866,7 @@ DEFINE_SPAPR_MACHINE(2_7, "2.7", true);
 
 static void spapr_machine_2_6_instance_options(MachineState *machine)
 {
+    spapr_machine_2_7_instance_options(machine);
 }
 
 static void spapr_machine_2_6_class_options(MachineClass *mc)
@@ -2536,6 +2891,7 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
 
 static void spapr_machine_2_5_instance_options(MachineState *machine)
 {
+    spapr_machine_2_6_instance_options(machine);
 }
 
 static void spapr_machine_2_5_class_options(MachineClass *mc)
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index bcb483dbe6..e0c14f6b77 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -69,11 +69,9 @@ void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp)
     }
 
     /* Set NUMA node for the added CPUs  */
-    for (i = 0; i < nb_numa_nodes; i++) {
-        if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
+    i = numa_get_node_for_cpu(cs->cpu_index);
+    if (i < nb_numa_nodes) {
             cs->numa_node = i;
-            break;
-        }
     }
 
     xics_cpu_setup(spapr->xics, cpu);
@@ -92,27 +90,28 @@ char *spapr_get_cpu_core_type(const char *model)
     gchar **model_pieces = g_strsplit(model, ",", 2);
 
     core_type = g_strdup_printf("%s-%s", model_pieces[0], TYPE_SPAPR_CPU_CORE);
-    g_strfreev(model_pieces);
 
     /* Check whether it exists or whether we have to look up an alias name */
     if (!object_class_by_name(core_type)) {
         const char *realmodel;
 
         g_free(core_type);
-        realmodel = ppc_cpu_lookup_alias(model);
+        core_type = NULL;
+        realmodel = ppc_cpu_lookup_alias(model_pieces[0]);
         if (realmodel) {
-            return spapr_get_cpu_core_type(realmodel);
+            core_type = spapr_get_cpu_core_type(realmodel);
         }
-        return NULL;
     }
 
+    g_strfreev(model_pieces);
     return core_type;
 }
 
 static void spapr_core_release(DeviceState *dev, void *opaque)
 {
     sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
-    const char *typename = object_class_get_name(sc->cpu_class);
+    sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev));
+    const char *typename = object_class_get_name(scc->cpu_class);
     size_t size = object_type_get_instance_size(typename);
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     CPUCore *cc = CPU_CORE(dev);
@@ -185,7 +184,7 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
 
     /*
      * Setup CPU DT entries only for hotplugged CPUs. For boot time or
-     * coldplugged CPUs DT entries are setup in spapr_finalize_fdt().
+     * coldplugged CPUs DT entries are setup in spapr_build_fdt().
      */
     if (dev->hotplugged) {
         fdt = spapr_populate_hotplug_cpu_dt(cs, &fdt_offset, spapr);
@@ -287,8 +286,9 @@ static void spapr_cpu_core_realize_child(Object *child, Error **errp)
 static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
 {
     sPAPRCPUCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+    sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(OBJECT(dev));
     CPUCore *cc = CPU_CORE(OBJECT(dev));
-    const char *typename = object_class_get_name(sc->cpu_class);
+    const char *typename = object_class_get_name(scc->cpu_class);
     size_t size = object_type_get_instance_size(typename);
     Error *local_err = NULL;
     void *obj;
@@ -331,83 +331,43 @@ static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
     error_propagate(errp, local_err);
 }
 
-static void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(oc);
-    dc->realize = spapr_cpu_core_realize;
-}
-
-/*
- * instance_init routines from different flavours of sPAPR CPU cores.
- */
-#define SPAPR_CPU_CORE_INITFN(_type, _fname) \
-static void glue(glue(spapr_cpu_core_, _fname), _initfn(Object *obj)) \
-{ \
-    sPAPRCPUCore *core = SPAPR_CPU_CORE(obj); \
-    char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, stringify(_type)); \
-    ObjectClass *oc = object_class_by_name(name); \
-    g_assert(oc); \
-    g_free((void *)name); \
-    core->cpu_class = oc; \
-}
-
-SPAPR_CPU_CORE_INITFN(970mp_v1.0, 970MP_v10);
-SPAPR_CPU_CORE_INITFN(970mp_v1.1, 970MP_v11);
-SPAPR_CPU_CORE_INITFN(970_v2.2, 970);
-SPAPR_CPU_CORE_INITFN(POWER5+_v2.1, POWER5plus);
-SPAPR_CPU_CORE_INITFN(POWER7_v2.3, POWER7);
-SPAPR_CPU_CORE_INITFN(POWER7+_v2.1, POWER7plus);
-SPAPR_CPU_CORE_INITFN(POWER8_v2.0, POWER8);
-SPAPR_CPU_CORE_INITFN(POWER8E_v2.1, POWER8E);
-SPAPR_CPU_CORE_INITFN(POWER8NVL_v1.0, POWER8NVL);
-
-typedef struct SPAPRCoreInfo {
-    const char *name;
-    void (*initfn)(Object *obj);
-} SPAPRCoreInfo;
-
-static const SPAPRCoreInfo spapr_cores[] = {
+static const char *spapr_core_models[] = {
     /* 970 */
-    { .name = "970_v2.2", .initfn = spapr_cpu_core_970_initfn },
+    "970_v2.2",
 
     /* 970MP variants */
-    { .name = "970MP_v1.0", .initfn = spapr_cpu_core_970MP_v10_initfn },
-    { .name = "970mp_v1.0", .initfn = spapr_cpu_core_970MP_v10_initfn },
-    { .name = "970MP_v1.1", .initfn = spapr_cpu_core_970MP_v11_initfn },
-    { .name = "970mp_v1.1", .initfn = spapr_cpu_core_970MP_v11_initfn },
+    "970MP_v1.0",
+    "970mp_v1.0",
+    "970MP_v1.1",
+    "970mp_v1.1",
 
     /* POWER5+ */
-    { .name = "POWER5+_v2.1", .initfn = spapr_cpu_core_POWER5plus_initfn },
+    "POWER5+_v2.1",
 
     /* POWER7 */
-    { .name = "POWER7_v2.3", .initfn = spapr_cpu_core_POWER7_initfn },
+    "POWER7_v2.3",
 
     /* POWER7+ */
-    { .name = "POWER7+_v2.1", .initfn = spapr_cpu_core_POWER7plus_initfn },
+    "POWER7+_v2.1",
 
     /* POWER8 */
-    { .name = "POWER8_v2.0", .initfn = spapr_cpu_core_POWER8_initfn },
+    "POWER8_v2.0",
 
     /* POWER8E */
-    { .name = "POWER8E_v2.1", .initfn = spapr_cpu_core_POWER8E_initfn },
+    "POWER8E_v2.1",
 
     /* POWER8NVL */
-    { .name = "POWER8NVL_v1.0", .initfn = spapr_cpu_core_POWER8NVL_initfn },
-
-    { .name = NULL }
+    "POWER8NVL_v1.0",
 };
 
-static void spapr_cpu_core_register(const SPAPRCoreInfo *info)
+void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
 {
-    TypeInfo type_info = {
-        .parent = TYPE_SPAPR_CPU_CORE,
-        .instance_size = sizeof(sPAPRCPUCore),
-        .instance_init = info->initfn,
-    };
-
-    type_info.name = g_strdup_printf("%s-" TYPE_SPAPR_CPU_CORE, info->name);
-    type_register(&type_info);
-    g_free((void *)type_info.name);
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    sPAPRCPUCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
+
+    dc->realize = spapr_cpu_core_realize;
+    scc->cpu_class = cpu_class_by_name(TYPE_POWERPC_CPU, data);
+    g_assert(scc->cpu_class);
 }
 
 static const TypeInfo spapr_cpu_core_type_info = {
@@ -415,17 +375,27 @@ static const TypeInfo spapr_cpu_core_type_info = {
     .parent = TYPE_CPU_CORE,
     .abstract = true,
     .instance_size = sizeof(sPAPRCPUCore),
-    .class_init = spapr_cpu_core_class_init,
+    .class_size = sizeof(sPAPRCPUCoreClass),
 };
 
 static void spapr_cpu_core_register_types(void)
 {
-    const SPAPRCoreInfo *info = spapr_cores;
+    int i;
 
     type_register_static(&spapr_cpu_core_type_info);
-    while (info->name) {
-        spapr_cpu_core_register(info);
-        info++;
+
+    for (i = 0; i < ARRAY_SIZE(spapr_core_models); i++) {
+        TypeInfo type_info = {
+            .parent = TYPE_SPAPR_CPU_CORE,
+            .instance_size = sizeof(sPAPRCPUCore),
+            .class_init = spapr_cpu_core_class_init,
+            .class_data = (void *) spapr_core_models[i],
+        };
+
+        type_info.name = g_strdup_printf("%s-" TYPE_SPAPR_CPU_CORE,
+                                         spapr_core_models[i]);
+        type_register(&type_info);
+        g_free((void *)type_info.name);
     }
 }
 
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 26a067951c..a0c44ee593 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -20,20 +20,7 @@
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
-
-/* #define DEBUG_SPAPR_DRC */
-
-#ifdef DEBUG_SPAPR_DRC
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#define DPRINTFN(fmt, ...) \
-    do { DPRINTF(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#define DPRINTFN(fmt, ...) \
-    do { } while (0)
-#endif
+#include "trace.h"
 
 #define DRC_CONTAINER_PATH "/dr-connector"
 #define DRC_INDEX_TYPE_SHIFT 28
@@ -69,7 +56,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
-    DPRINTFN("drc: %x, set_isolation_state: %x", get_index(drc), state);
+    trace_spapr_drc_set_isolation_state(get_index(drc), state);
 
     if (state == SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
         /* cannot unisolate a non-existant resource, and, or resources
@@ -81,6 +68,23 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
         }
     }
 
+    /*
+     * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
+     * belong to a DIMM device that is marked for removal.
+     *
+     * Currently the guest userspace tool drmgr that drives the memory
+     * hotplug/unplug will just try to remove a set of 'removable' LMBs
+     * in response to a hot unplug request that is based on drc-count.
+     * If the LMB being removed doesn't belong to a DIMM device that is
+     * actually being unplugged, fail the isolation request here.
+     */
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+        if ((state == SPAPR_DR_ISOLATION_STATE_ISOLATED) &&
+             !drc->awaiting_release) {
+            return RTAS_OUT_HW_ERROR;
+        }
+    }
+
     drc->isolation_state = state;
 
     if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
@@ -94,11 +98,11 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
          */
         if (drc->awaiting_release) {
             if (drc->configured) {
-                DPRINTFN("finalizing device removal");
+                trace_spapr_drc_set_isolation_state_finalizing(get_index(drc));
                 drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
                              drc->detach_cb_opaque, NULL);
             } else {
-                DPRINTFN("deferring device removal on unconfigured device\n");
+                trace_spapr_drc_set_isolation_state_deferring(get_index(drc));
             }
         }
         drc->configured = false;
@@ -110,7 +114,7 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
 static uint32_t set_indicator_state(sPAPRDRConnector *drc,
                                     sPAPRDRIndicatorState state)
 {
-    DPRINTFN("drc: %x, set_indicator_state: %x", get_index(drc), state);
+    trace_spapr_drc_set_indicator_state(get_index(drc), state);
     drc->indicator_state = state;
     return RTAS_OUT_SUCCESS;
 }
@@ -120,7 +124,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
-    DPRINTFN("drc: %x, set_allocation_state: %x", get_index(drc), state);
+    trace_spapr_drc_set_allocation_state(get_index(drc), state);
 
     if (state == SPAPR_DR_ALLOCATION_STATE_USABLE) {
         /* if there's no resource/device associated with the DRC, there's
@@ -137,7 +141,7 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
         drc->allocation_state = state;
         if (drc->awaiting_release &&
             drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-            DPRINTFN("finalizing device removal");
+            trace_spapr_drc_set_allocation_state_finalizing(get_index(drc));
             drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
                          drc->detach_cb_opaque, NULL);
         } else if (drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE) {
@@ -167,12 +171,11 @@ static const void *get_fdt(sPAPRDRConnector *drc, int *fdt_start_offset)
 
 static void set_configured(sPAPRDRConnector *drc)
 {
-    DPRINTFN("drc: %x, set_configured", get_index(drc));
+    trace_spapr_drc_set_configured(get_index(drc));
 
     if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
         /* guest should be not configuring an isolated device */
-        DPRINTFN("drc: %x, set_configured: skipping isolated device",
-                 get_index(drc));
+        trace_spapr_drc_set_configured_skipping(get_index(drc));
         return;
     }
     drc->configured = true;
@@ -222,7 +225,7 @@ static uint32_t entity_sense(sPAPRDRConnector *drc, sPAPRDREntitySense *state)
         }
     }
 
-    DPRINTFN("drc: %x, entity_sense: %x", get_index(drc), state);
+    trace_spapr_drc_entity_sense(get_index(drc), *state);
     return RTAS_OUT_SUCCESS;
 }
 
@@ -336,7 +339,7 @@ static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
 static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
                    int fdt_start_offset, bool coldplug, Error **errp)
 {
-    DPRINTFN("drc: %x, attach", get_index(drc));
+    trace_spapr_drc_attach(get_index(drc));
 
     if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
         error_setg(errp, "an attached device is still awaiting release");
@@ -389,7 +392,7 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
                    spapr_drc_detach_cb *detach_cb,
                    void *detach_cb_opaque, Error **errp)
 {
-    DPRINTFN("drc: %x, detach", get_index(drc));
+    trace_spapr_drc_detach(get_index(drc));
 
     drc->detach_cb = detach_cb;
     drc->detach_cb_opaque = detach_cb_opaque;
@@ -415,21 +418,21 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d,
     }
 
     if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
-        DPRINTFN("awaiting transition to isolated state before removal");
+        trace_spapr_drc_awaiting_isolated(get_index(drc));
         drc->awaiting_release = true;
         return;
     }
 
     if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI &&
         drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-        DPRINTFN("awaiting transition to unusable state before removal");
+        trace_spapr_drc_awaiting_unusable(get_index(drc));
         drc->awaiting_release = true;
         return;
     }
 
     if (drc->awaiting_allocation) {
         drc->awaiting_release = true;
-        DPRINTFN("awaiting allocation to complete before removal");
+        trace_spapr_drc_awaiting_allocation(get_index(drc));
         return;
     }
 
@@ -460,7 +463,7 @@ static void reset(DeviceState *d)
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDREntitySense state;
 
-    DPRINTFN("drc reset: %x", drck->get_index(drc));
+    trace_spapr_drc_reset(drck->get_index(drc));
     /* immediately upon reset we can safely assume DRCs whose devices
      * are pending removal can be safely removed, and that they will
      * subsequently be left in an ISOLATED state. move the DRC to this
@@ -502,7 +505,7 @@ static void realize(DeviceState *d, Error **errp)
     gchar *child_name;
     Error *err = NULL;
 
-    DPRINTFN("drc realize: %x", drck->get_index(drc));
+    trace_spapr_drc_realize(drck->get_index(drc));
     /* NOTE: we do this as part of realize/unrealize due to the fact
      * that the guest will communicate with the DRC via RTAS calls
      * referencing the global DRC index. By unlinking the DRC
@@ -513,7 +516,7 @@ static void realize(DeviceState *d, Error **errp)
     root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
     snprintf(link_name, sizeof(link_name), "%x", drck->get_index(drc));
     child_name = object_get_canonical_path_component(OBJECT(drc));
-    DPRINTFN("drc child name: %s", child_name);
+    trace_spapr_drc_realize_child(drck->get_index(drc), child_name);
     object_property_add_alias(root_container, link_name,
                               drc->owner, child_name, &err);
     if (err) {
@@ -521,7 +524,7 @@ static void realize(DeviceState *d, Error **errp)
         object_unref(OBJECT(drc));
     }
     g_free(child_name);
-    DPRINTFN("drc realize complete");
+    trace_spapr_drc_realize_complete(drck->get_index(drc));
 }
 
 static void unrealize(DeviceState *d, Error **errp)
@@ -532,7 +535,7 @@ static void unrealize(DeviceState *d, Error **errp)
     char name[256];
     Error *err = NULL;
 
-    DPRINTFN("drc unrealize: %x", drck->get_index(drc));
+    trace_spapr_drc_unrealize(drck->get_index(drc));
     root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
     snprintf(name, sizeof(name), "%x", drck->get_index(drc));
     object_property_del(root_container, name, &err);
@@ -816,7 +819,7 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
                       drc_indexes->data,
                       drc_indexes->len * sizeof(uint32_t));
     if (ret) {
-        fprintf(stderr, "Couldn't create ibm,drc-indexes property\n");
+        error_report("Couldn't create ibm,drc-indexes property");
         goto out;
     }
 
@@ -824,21 +827,21 @@ int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
                       drc_power_domains->data,
                       drc_power_domains->len * sizeof(uint32_t));
     if (ret) {
-        fprintf(stderr, "Couldn't finalize ibm,drc-power-domains property\n");
+        error_report("Couldn't finalize ibm,drc-power-domains property");
         goto out;
     }
 
     ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-names",
                       drc_names->str, drc_names->len);
     if (ret) {
-        fprintf(stderr, "Couldn't finalize ibm,drc-names property\n");
+        error_report("Couldn't finalize ibm,drc-names property");
         goto out;
     }
 
     ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-types",
                       drc_types->str, drc_types->len);
     if (ret) {
-        fprintf(stderr, "Couldn't finalize ibm,drc-types property\n");
+        error_report("Couldn't finalize ibm,drc-types property");
         goto out;
     }
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index b0668b34a9..f85a9c32a7 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -32,6 +32,7 @@
 #include "hw/qdev.h"
 #include "sysemu/device_tree.h"
 
+#include "hw/ppc/fdt.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "hw/pci/pci.h"
@@ -39,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "hw/ppc/spapr_ovec.h"
 #include <libfdt.h>
 
 struct rtas_error_log {
@@ -173,6 +175,16 @@ struct epow_log_full {
     struct rtas_event_log_v6_epow epow;
 } QEMU_PACKED;
 
+union drc_identifier {
+    uint32_t index;
+    uint32_t count;
+    struct {
+        uint32_t count;
+        uint32_t index;
+    } count_indexed;
+    char name[1];
+} QEMU_PACKED;
+
 struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
     struct rtas_event_log_v6_section_header hdr;
@@ -189,12 +201,9 @@ struct rtas_event_log_v6_hp {
 #define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
 #define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
 #define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
     uint8_t reserved;
-    union {
-        uint32_t index;
-        uint32_t count;
-        char name[1];
-    } drc;
+    union drc_identifier drc_id;
 } QEMU_PACKED;
 
 struct hp_log_full {
@@ -205,38 +214,132 @@ struct hp_log_full {
     struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
-#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
-#define EVENT_MASK_EPOW                      0x40000000
-#define EVENT_MASK_HOTPLUG                   0x10000000
-#define EVENT_MASK_IO                        0x08000000
-
-#define _FDT(exp) \
-    do { \
-        int ret = (exp);                                           \
-        if (ret < 0) {                                             \
-            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
-                    #exp, fdt_strerror(ret));                      \
-            exit(1);                                               \
-        }                                                          \
-    } while (0)
-
-void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
+typedef enum EventClass {
+    EVENT_CLASS_INTERNAL_ERRORS     = 0,
+    EVENT_CLASS_EPOW                = 1,
+    EVENT_CLASS_RESERVED            = 2,
+    EVENT_CLASS_HOT_PLUG            = 3,
+    EVENT_CLASS_IO                  = 4,
+    EVENT_CLASS_MAX
+} EventClassIndex;
+#define EVENT_CLASS_MASK(index) (1 << (31 - index))
+
+static const char * const event_names[EVENT_CLASS_MAX] = {
+    [EVENT_CLASS_INTERNAL_ERRORS]       = "internal-errors",
+    [EVENT_CLASS_EPOW]                  = "epow-events",
+    [EVENT_CLASS_HOT_PLUG]              = "hot-plug-events",
+    [EVENT_CLASS_IO]                    = "ibm,io-events",
+};
+
+struct sPAPREventSource {
+    int irq;
+    uint32_t mask;
+    bool enabled;
+};
+
+static sPAPREventSource *spapr_event_sources_new(void)
 {
-    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
-    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
+    return g_new0(sPAPREventSource, EVENT_CLASS_MAX);
+}
 
-    _FDT((fdt_begin_node(fdt, "event-sources")));
+static void spapr_event_sources_register(sPAPREventSource *event_sources,
+                                        EventClassIndex index, int irq)
+{
+    /* we only support 1 irq per event class at the moment */
+    g_assert(event_sources);
+    g_assert(!event_sources[index].enabled);
+    event_sources[index].irq = irq;
+    event_sources[index].mask = EVENT_CLASS_MASK(index);
+    event_sources[index].enabled = true;
+}
 
-    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
-    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
-    _FDT((fdt_property(fdt, "interrupt-ranges",
-                       irq_ranges, sizeof(irq_ranges))));
+static const sPAPREventSource *
+spapr_event_sources_get_source(sPAPREventSource *event_sources,
+                               EventClassIndex index)
+{
+    g_assert(index < EVENT_CLASS_MAX);
+    g_assert(event_sources);
+
+    return &event_sources[index];
+}
+
+void spapr_dt_events(sPAPRMachineState *spapr, void *fdt)
+{
+    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
+    int i, count = 0, event_sources;
+    sPAPREventSource *events = spapr->event_sources;
+
+    g_assert(events);
 
-    _FDT((fdt_begin_node(fdt, "epow-events")));
-    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
-    _FDT((fdt_end_node(fdt)));
+    _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources"));
 
-    _FDT((fdt_end_node(fdt)));
+    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
+        int node_offset;
+        uint32_t interrupts[2];
+        const sPAPREventSource *source =
+            spapr_event_sources_get_source(events, i);
+        const char *source_name = event_names[i];
+
+        if (!source->enabled) {
+            continue;
+        }
+
+        interrupts[0] = cpu_to_be32(source->irq);
+        interrupts[1] = 0;
+
+        _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name));
+        _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts,
+                         sizeof(interrupts)));
+
+        irq_ranges[count++] = interrupts[0];
+        irq_ranges[count++] = cpu_to_be32(1);
+    }
+
+    irq_ranges[count] = cpu_to_be32(count);
+    count++;
+
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)));
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges",
+                      irq_ranges, count * sizeof(uint32_t))));
+}
+
+static const sPAPREventSource *
+rtas_event_log_to_source(sPAPRMachineState *spapr, int log_type)
+{
+    const sPAPREventSource *source;
+
+    g_assert(spapr->event_sources);
+
+    switch (log_type) {
+    case RTAS_LOG_TYPE_HOTPLUG:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_HOT_PLUG);
+        if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) {
+            g_assert(source->enabled);
+            break;
+        }
+        /* fall back to epow for legacy hotplug interrupt source */
+    case RTAS_LOG_TYPE_EPOW:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_EPOW);
+        break;
+    default:
+        source = NULL;
+    }
+
+    return source;
+}
+
+static int rtas_event_log_to_irq(sPAPRMachineState *spapr, int log_type)
+{
+    const sPAPREventSource *source;
+
+    source = rtas_event_log_to_source(spapr, log_type);
+    g_assert(source);
+    g_assert(source->enabled);
+
+    return source->irq;
 }
 
 static void rtas_event_log_queue(int log_type, void *data, bool exception)
@@ -257,19 +360,15 @@ static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return NULL;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const sPAPREventSource *source =
+            rtas_event_log_to_source(spapr, entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             break;
         }
     }
@@ -286,19 +385,15 @@ static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     sPAPREventLogEntry *entry = NULL;
 
-    /* we only queue EPOW events atm. */
-    if ((event_mask & EVENT_MASK_EPOW) == 0) {
-        return false;
-    }
-
     QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const sPAPREventSource *source =
+            rtas_event_log_to_source(spapr, entry->log_type);
+
         if (entry->exception != exception) {
             continue;
         }
 
-        /* EPOW and hotplug events are surfaced in the same manner */
-        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
-            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+        if (source->mask & event_mask) {
             return true;
         }
     }
@@ -386,7 +481,9 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
 
     rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(spapr,
+                                                       RTAS_LOG_TYPE_EPOW)));
 }
 
 static void spapr_hotplug_set_signalled(uint32_t drc_index)
@@ -398,7 +495,7 @@ static void spapr_hotplug_set_signalled(uint32_t drc_index)
 
 static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
                                     sPAPRDRConnectorType drc_type,
-                                    uint32_t drc)
+                                    union drc_identifier *drc_id)
 {
     sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct hp_log_full *new_hp;
@@ -443,7 +540,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     case SPAPR_DR_CONNECTOR_TYPE_PCI:
         hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
         if (hp->hotplug_action == RTAS_LOG_V6_HP_ACTION_ADD) {
-            spapr_hotplug_set_signalled(drc);
+            spapr_hotplug_set_signalled(drc_id->index);
         }
         break;
     case SPAPR_DR_CONNECTOR_TYPE_LMB:
@@ -461,48 +558,89 @@ static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
     }
 
     if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
-        hp->drc.count = cpu_to_be32(drc);
+        hp->drc_id.count = cpu_to_be32(drc_id->count);
     } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
-        hp->drc.index = cpu_to_be32(drc);
+        hp->drc_id.index = cpu_to_be32(drc_id->index);
+    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
+        /* we should not be using count_indexed value unless the guest
+         * supports dedicated hotplug event source
+         */
+        g_assert(spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
+        hp->drc_id.count_indexed.count =
+            cpu_to_be32(drc_id->count_indexed.count);
+        hp->drc_id.count_indexed.index =
+            cpu_to_be32(drc_id->count_indexed.index);
     }
 
     rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
 
-    qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    qemu_irq_pulse(xics_get_qirq(spapr->xics,
+                                 rtas_event_log_to_irq(spapr,
+                                                       RTAS_LOG_TYPE_HOTPLUG)));
 }
 
 void spapr_hotplug_req_add_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_identifier drc_id;
 
+    drc_id.index = drck->get_index(drc);
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
 {
     sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     sPAPRDRConnectorType drc_type = drck->get_type(drc);
-    uint32_t index = drck->get_index(drc);
+    union drc_identifier drc_id;
 
+    drc_id.index = drck->get_index(drc);
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
                                        uint32_t count)
 {
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
 }
 
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count)
 {
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
     spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
-                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                            uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                               uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
 static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -514,6 +652,7 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     uint64_t xinfo;
     sPAPREventLogEntry *event;
     struct rtas_error_log *hdr;
+    int i;
 
     if ((nargs < 6) || (nargs > 7) || nret != 1) {
         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -550,8 +689,14 @@ static void check_exception(PowerPCCPU *cpu, sPAPRMachineState *spapr,
      * do the latter here, since our code relies on edge-triggered
      * interrupts.
      */
-    if (rtas_event_log_contains(mask, true)) {
-        qemu_irq_pulse(xics_get_qirq(spapr->xics, spapr->check_exception_irq));
+    for (i = 0; i < EVENT_CLASS_MAX; i++) {
+        if (rtas_event_log_contains(EVENT_CLASS_MASK(i), true)) {
+            const sPAPREventSource *source =
+                spapr_event_sources_get_source(spapr->event_sources, i);
+
+            g_assert(source->enabled);
+            qemu_irq_pulse(xics_get_qirq(spapr->xics, source->irq));
+        }
     }
 
     return;
@@ -603,8 +748,27 @@ static void event_scan(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 void spapr_events_init(sPAPRMachineState *spapr)
 {
     QTAILQ_INIT(&spapr->pending_events);
-    spapr->check_exception_irq = xics_spapr_alloc(spapr->xics, 0, 0, false,
-                                            &error_fatal);
+
+    spapr->event_sources = spapr_event_sources_new();
+
+    spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
+                                 xics_spapr_alloc(spapr->xics, 0, false,
+                                                  &error_fatal));
+
+    /* NOTE: if machine supports modern/dedicated hotplug event source,
+     * we add it to the device-tree unconditionally. This means we may
+     * have cases where the source is enabled in QEMU, but unused by the
+     * guest because it does not support modern hotplug events, so we
+     * take care to rely on checking for negotiation of OV5_HP_EVT option
+     * before attempting to use it to signal events, rather than simply
+     * checking that it's enabled.
+     */
+    if (spapr->use_hotplug_event_source) {
+        spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
+                                     xics_spapr_alloc(spapr->xics, 0, false,
+                                                      &error_fatal));
+    }
+
     spapr->epow_notifier.notify = spapr_powerdown_req;
     qemu_register_powerdown_notifier(&spapr->epow_notifier);
     spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 73af112e1d..9a9bedf1bd 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -11,21 +11,21 @@
 #include "trace.h"
 #include "sysemu/kvm.h"
 #include "kvm_ppc.h"
+#include "hw/ppc/spapr_ovec.h"
 
 struct SPRSyncState {
-    CPUState *cs;
     int spr;
     target_ulong value;
     target_ulong mask;
 };
 
-static void do_spr_sync(void *arg)
+static void do_spr_sync(CPUState *cs, run_on_cpu_data arg)
 {
-    struct SPRSyncState *s = arg;
-    PowerPCCPU *cpu = POWERPC_CPU(s->cs);
+    struct SPRSyncState *s = arg.host_ptr;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
     CPUPPCState *env = &cpu->env;
 
-    cpu_synchronize_state(s->cs);
+    cpu_synchronize_state(cs);
     env->spr[s->spr] &= ~s->mask;
     env->spr[s->spr] |= s->value;
 }
@@ -34,12 +34,11 @@ static void set_spr(CPUState *cs, int spr, target_ulong value,
                     target_ulong mask)
 {
     struct SPRSyncState s = {
-        .cs = cs,
         .spr = spr,
         .value = value,
         .mask = mask
     };
-    run_on_cpu(cs, do_spr_sync, &s);
+    run_on_cpu(cs, do_spr_sync, RUN_ON_CPU_HOST_PTR(&s));
 }
 
 static bool has_spr(PowerPCCPU *cpu, int spr)
@@ -201,7 +200,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     switch (ret) {
     case REMOVE_SUCCESS:
-        check_tlb_flush(env);
+        check_tlb_flush(env, true);
         return H_SUCCESS;
 
     case REMOVE_NOT_FOUND:
@@ -282,7 +281,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         }
     }
  exit:
-    check_tlb_flush(env);
+    check_tlb_flush(env, true);
 
     return rc;
 }
@@ -319,6 +318,8 @@ static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     ppc_hash64_store_hpte(cpu, pte_index,
                           (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
     ppc_hash64_tlb_flush_hpte(cpu, pte_index, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, true);
     /* Don't need a memory barrier, due to qemu's global lock */
     ppc_hash64_store_hpte(cpu, pte_index, v | HPTE64_V_HPTE_DIRTY, r);
     return H_SUCCESS;
@@ -880,44 +881,18 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return ret;
 }
 
-/*
- * Return the offset to the requested option vector @vector in the
- * option vector table @table.
- */
-static target_ulong cas_get_option_vector(int vector, target_ulong table)
-{
-    int i;
-    char nr_vectors, nr_entries;
-
-    if (!table) {
-        return 0;
-    }
-
-    nr_vectors = (ldl_phys(&address_space_memory, table) >> 24) + 1;
-    if (!vector || vector > nr_vectors) {
-        return 0;
-    }
-    table++; /* skip nr option vectors */
-
-    for (i = 0; i < vector - 1; i++) {
-        nr_entries = ldl_phys(&address_space_memory, table) >> 24;
-        table += nr_entries + 2;
-    }
-    return table;
-}
-
 typedef struct {
-    PowerPCCPU *cpu;
     uint32_t cpu_version;
     Error *err;
 } SetCompatState;
 
-static void do_set_compat(void *arg)
+static void do_set_compat(CPUState *cs, run_on_cpu_data arg)
 {
-    SetCompatState *s = arg;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    SetCompatState *s = arg.host_ptr;
 
-    cpu_synchronize_state(CPU(s->cpu));
-    ppc_set_compat(s->cpu, s->cpu_version, &s->err);
+    cpu_synchronize_state(cs);
+    ppc_set_compat(cpu, s->cpu_version, &s->err);
 }
 
 #define get_compat_level(cpuver) ( \
@@ -961,23 +936,21 @@ static void cas_handle_compat_cpu(PowerPCCPUClass *pcc, uint32_t pvr,
     }
 }
 
-#define OV5_DRCONF_MEMORY 0x20
-
 static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
                                                   sPAPRMachineState *spapr,
                                                   target_ulong opcode,
                                                   target_ulong *args)
 {
     target_ulong list = ppc64_phys_to_real(args[0]);
-    target_ulong ov_table, ov5;
+    target_ulong ov_table;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu_);
     CPUState *cs;
-    bool cpu_match = false, cpu_update = true, memory_update = false;
+    bool cpu_match = false, cpu_update = true;
     unsigned old_cpu_version = cpu_->cpu_version;
     unsigned compat_lvl = 0, cpu_version = 0;
     unsigned max_lvl = get_compat_level(cpu_->max_compat);
     int counter;
-    char ov5_byte2;
+    sPAPROptionVector *ov5_guest, *ov5_cas_old, *ov5_updates;
 
     /* Parse PVR list */
     for (counter = 0; counter < 512; ++counter) {
@@ -1013,12 +986,11 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
     if (old_cpu_version != cpu_version) {
         CPU_FOREACH(cs) {
             SetCompatState s = {
-                .cpu = POWERPC_CPU(cs),
                 .cpu_version = cpu_version,
                 .err = NULL,
             };
 
-            run_on_cpu(cs, do_set_compat, &s);
+            run_on_cpu(cs, do_set_compat, RUN_ON_CPU_HOST_PTR(&s));
 
             if (s.err) {
                 error_report_err(s.err);
@@ -1034,19 +1006,34 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu_,
     /* For the future use: here @ov_table points to the first option vector */
     ov_table = list;
 
-    ov5 = cas_get_option_vector(5, ov_table);
-    if (!ov5) {
-        return H_SUCCESS;
-    }
+    ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+
+    /* NOTE: there are actually a number of ov5 bits where input from the
+     * guest is always zero, and the platform/QEMU enables them independently
+     * of guest input. To model these properly we'd want some sort of mask,
+     * but since they only currently apply to memory migration as defined
+     * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
+     * to worry about this for now.
+     */
+    ov5_cas_old = spapr_ovec_clone(spapr->ov5_cas);
+    /* full range of negotiated ov5 capabilities */
+    spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
+    spapr_ovec_cleanup(ov5_guest);
+    /* capabilities that have been added since CAS-generated guest reset.
+     * if capabilities have since been removed, generate another reset
+     */
+    ov5_updates = spapr_ovec_new();
+    spapr->cas_reboot = spapr_ovec_diff(ov5_updates,
+                                        ov5_cas_old, spapr->ov5_cas);
 
-    /* @list now points to OV 5 */
-    ov5_byte2 = ldub_phys(&address_space_memory, ov5 + 2);
-    if (ov5_byte2 & OV5_DRCONF_MEMORY) {
-        memory_update = true;
+    if (!spapr->cas_reboot) {
+        spapr->cas_reboot =
+            (spapr_h_cas_compose_response(spapr, args[1], args[2], cpu_update,
+                                          ov5_updates) != 0);
     }
+    spapr_ovec_cleanup(ov5_updates);
 
-    if (spapr_h_cas_compose_response(spapr, args[1], args[2],
-                                     cpu_update, memory_update)) {
+    if (spapr->cas_reboot) {
         qemu_system_reset_request();
     }
 
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 6bc4d4db33..ae30bbe30f 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -156,14 +156,17 @@ static uint64_t spapr_tce_get_min_page_size(MemoryRegion *iommu)
     return 1ULL << tcet->page_shift;
 }
 
-static void spapr_tce_notify_started(MemoryRegion *iommu)
+static void spapr_tce_notify_flag_changed(MemoryRegion *iommu,
+                                          IOMMUNotifierFlag old,
+                                          IOMMUNotifierFlag new)
 {
-    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
-}
+    struct sPAPRTCETable *tbl = container_of(iommu, sPAPRTCETable, iommu);
 
-static void spapr_tce_notify_stopped(MemoryRegion *iommu)
-{
-    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), false);
+    if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
+        spapr_tce_set_need_vfio(tbl, true);
+    } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {
+        spapr_tce_set_need_vfio(tbl, false);
+    }
 }
 
 static int spapr_tce_table_post_load(void *opaque, int version_id)
@@ -246,8 +249,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
     .translate = spapr_tce_translate_iommu,
     .get_min_page_size = spapr_tce_get_min_page_size,
-    .notify_started = spapr_tce_notify_started,
-    .notify_stopped = spapr_tce_notify_stopped,
+    .notify_flag_changed = spapr_tce_notify_flag_changed,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
@@ -310,8 +312,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
     char tmp[32];
 
     if (spapr_tce_find_by_liobn(liobn)) {
-        fprintf(stderr, "Attempted to create TCE table with duplicate"
-                " LIOBN 0x%x\n", liobn);
+        error_report("Attempted to create TCE table with duplicate"
+                " LIOBN 0x%x", liobn);
         return NULL;
     }
 
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
new file mode 100644
index 0000000000..3eb1d5976f
--- /dev/null
+++ b/hw/ppc/spapr_ovec.c
@@ -0,0 +1,254 @@
+/*
+ * QEMU SPAPR Architecture Option Vector Helper Functions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ *  Bharata B Rao     <bharata@linux.vnet.ibm.com>
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "qemu/bitmap.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include <libfdt.h>
+
+/* #define DEBUG_SPAPR_OVEC */
+
+#ifdef DEBUG_SPAPR_OVEC
+#define DPRINTFN(fmt, ...) \
+    do { fprintf(stderr, fmt "\n", ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTFN(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define OV_MAXBYTES 256 /* not including length byte */
+#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
+
+/* we *could* work with bitmaps directly, but handling the bitmap privately
+ * allows us to more safely make assumptions about the bitmap size and
+ * simplify the calling code somewhat
+ */
+struct sPAPROptionVector {
+    unsigned long *bitmap;
+    int32_t bitmap_size; /* only used for migration */
+};
+
+const VMStateDescription vmstate_spapr_ovec = {
+    .name = "spapr_option_vector",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(bitmap, sPAPROptionVector, 1, bitmap_size),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+sPAPROptionVector *spapr_ovec_new(void)
+{
+    sPAPROptionVector *ov;
+
+    ov = g_new0(sPAPROptionVector, 1);
+    ov->bitmap = bitmap_new(OV_MAXBITS);
+    ov->bitmap_size = OV_MAXBITS;
+
+    return ov;
+}
+
+sPAPROptionVector *spapr_ovec_clone(sPAPROptionVector *ov_orig)
+{
+    sPAPROptionVector *ov;
+
+    g_assert(ov_orig);
+
+    ov = spapr_ovec_new();
+    bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS);
+
+    return ov;
+}
+
+void spapr_ovec_intersect(sPAPROptionVector *ov,
+                          sPAPROptionVector *ov1,
+                          sPAPROptionVector *ov2)
+{
+    g_assert(ov);
+    g_assert(ov1);
+    g_assert(ov2);
+
+    bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+}
+
+/* returns true if options bits were removed, false otherwise */
+bool spapr_ovec_diff(sPAPROptionVector *ov,
+                     sPAPROptionVector *ov_old,
+                     sPAPROptionVector *ov_new)
+{
+    unsigned long *change_mask = bitmap_new(OV_MAXBITS);
+    unsigned long *removed_bits = bitmap_new(OV_MAXBITS);
+    bool bits_were_removed = false;
+
+    g_assert(ov);
+    g_assert(ov_old);
+    g_assert(ov_new);
+
+    bitmap_xor(change_mask, ov_old->bitmap, ov_new->bitmap, OV_MAXBITS);
+    bitmap_and(ov->bitmap, ov_new->bitmap, change_mask, OV_MAXBITS);
+    bitmap_and(removed_bits, ov_old->bitmap, change_mask, OV_MAXBITS);
+
+    if (!bitmap_empty(removed_bits, OV_MAXBITS)) {
+        bits_were_removed = true;
+    }
+
+    g_free(change_mask);
+    g_free(removed_bits);
+
+    return bits_were_removed;
+}
+
+void spapr_ovec_cleanup(sPAPROptionVector *ov)
+{
+    if (ov) {
+        g_free(ov->bitmap);
+        g_free(ov);
+    }
+}
+
+void spapr_ovec_set(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    set_bit(bitnr, ov->bitmap);
+}
+
+void spapr_ovec_clear(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    clear_bit(bitnr, ov->bitmap);
+}
+
+bool spapr_ovec_test(sPAPROptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert_cmpint(bitnr, <, OV_MAXBITS);
+
+    return test_bit(bitnr, ov->bitmap) ? true : false;
+}
+
+static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
+                                 long bitmap_offset)
+{
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (entry & (1 << (BITS_PER_BYTE - 1 - i))) {
+            bitmap_set(bitmap, bitmap_offset + i, 1);
+        }
+    }
+}
+
+static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset)
+{
+    uint8_t entry = 0;
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (test_bit(bitmap_offset + i, bitmap)) {
+            entry |= (1 << (BITS_PER_BYTE - 1 - i));
+        }
+    }
+
+    return entry;
+}
+
+static target_ulong vector_addr(target_ulong table_addr, int vector)
+{
+    uint16_t vector_count, vector_len;
+    int i;
+
+    vector_count = ldub_phys(&address_space_memory, table_addr) + 1;
+    if (vector > vector_count) {
+        return 0;
+    }
+    table_addr++; /* skip nr option vectors */
+
+    for (i = 0; i < vector - 1; i++) {
+        vector_len = ldub_phys(&address_space_memory, table_addr) + 1;
+        table_addr += vector_len + 1; /* bit-vector + length byte */
+    }
+    return table_addr;
+}
+
+sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
+{
+    sPAPROptionVector *ov;
+    target_ulong addr;
+    uint16_t vector_len;
+    int i;
+
+    g_assert(table_addr);
+    g_assert_cmpint(vector, >=, 1); /* vector numbering starts at 1 */
+
+    addr = vector_addr(table_addr, vector);
+    if (!addr) {
+        /* specified vector isn't present */
+        return NULL;
+    }
+
+    vector_len = ldub_phys(&address_space_memory, addr++) + 1;
+    g_assert_cmpint(vector_len, <=, OV_MAXBYTES);
+    ov = spapr_ovec_new();
+
+    for (i = 0; i < vector_len; i++) {
+        uint8_t entry = ldub_phys(&address_space_memory, addr + i);
+        if (entry) {
+            DPRINTFN("read guest vector %2d, byte %3d / %3d: 0x%.2x",
+                     vector, i + 1, vector_len, entry);
+            guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
+        }
+    }
+
+    return ov;
+}
+
+int spapr_ovec_populate_dt(void *fdt, int fdt_offset,
+                           sPAPROptionVector *ov, const char *name)
+{
+    uint8_t vec[OV_MAXBYTES + 1];
+    uint16_t vec_len;
+    unsigned long lastbit;
+    int i;
+
+    g_assert(ov);
+
+    lastbit = find_last_bit(ov->bitmap, OV_MAXBITS);
+    /* if no bits are set, include at least 1 byte of the vector so we can
+     * still encoded this in the device tree while abiding by the same
+     * encoding/sizing expected in ibm,client-architecture-support
+     */
+    vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1;
+    g_assert_cmpint(vec_len, <=, OV_MAXBYTES);
+    /* guest expects vector len encoded as vec_len - 1, since the length byte
+     * is assumed and not included, and the first byte of the vector
+     * is assumed as well
+     */
+    vec[0] = vec_len - 1;
+
+    for (i = 1; i < vec_len + 1; i++) {
+        vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
+        if (vec[i]) {
+            DPRINTFN("encoding guest vector byte %3d / %3d: 0x%.2x",
+                     i, vec_len, vec[i]);
+        }
+    }
+
+    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 949c44fec8..fd6fc1d953 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -47,6 +47,7 @@
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
 
 #include "hw/vfio/vfio.h"
 
@@ -362,7 +363,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     }
 
     /* Allocate MSIs */
-    irq = xics_spapr_alloc_block(spapr->xics, 0, req_num, false,
+    irq = xics_spapr_alloc_block(spapr->xics, req_num, false,
                            ret_intr_type == RTAS_TYPE_MSI, &err);
     if (err) {
         error_reportf_err(err, "Can't allocate MSIs for device %x: ",
@@ -1310,32 +1311,27 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
     if (sphb->index != (uint32_t)-1) {
-        hwaddr windows_base;
+        sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+        Error *local_err = NULL;
 
         if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != (uint32_t)-1)
             || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
             || (sphb->mem_win_addr != (hwaddr)-1)
+            || (sphb->mem64_win_addr != (hwaddr)-1)
             || (sphb->io_win_addr != (hwaddr)-1)) {
             error_setg(errp, "Either \"index\" or other parameters must"
                        " be specified for PAPR PHB, not both");
             return;
         }
 
-        if (sphb->index > SPAPR_PCI_MAX_INDEX) {
-            error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-                       SPAPR_PCI_MAX_INDEX);
+        smc->phb_placement(spapr, sphb->index,
+                           &sphb->buid, &sphb->io_win_addr,
+                           &sphb->mem_win_addr, &sphb->mem64_win_addr,
+                           windows_supported, sphb->dma_liobn, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
             return;
         }
-
-        sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
-        for (i = 0; i < windows_supported; ++i) {
-            sphb->dma_liobn[i] = SPAPR_PCI_LIOBN(sphb->index, i);
-        }
-
-        windows_base = SPAPR_PCI_WINDOW_BASE
-            + sphb->index * SPAPR_PCI_WINDOW_SPACING;
-        sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
-        sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
     }
 
     if (sphb->buid == (uint64_t)-1) {
@@ -1359,11 +1355,49 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    if (sphb->mem64_win_size != 0) {
+        if (sphb->mem64_win_addr == (hwaddr)-1) {
+            error_setg(errp,
+                       "64-bit memory window address not specified for PHB");
+            return;
+        }
+
+        if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+            error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
+                       " (max 2 GiB)", sphb->mem_win_size);
+            return;
+        }
+
+        if (sphb->mem64_win_pciaddr == (hwaddr)-1) {
+            /* 64-bit window defaults to identity mapping */
+            sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
+        }
+    } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+        /*
+         * For compatibility with old configuration, if no 64-bit MMIO
+         * window is specified, but the ordinary (32-bit) memory
+         * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
+         * window, with a 64-bit MMIO window following on immediately
+         * afterwards
+         */
+        sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem64_win_pciaddr =
+            SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
+    }
+
     if (spapr_pci_find_phb(spapr, sphb->buid)) {
         error_setg(errp, "PCI host bridges must have unique BUIDs");
         return;
     }
 
+    if (sphb->numa_node != -1 &&
+        (sphb->numa_node >= MAX_NODES || !numa_info[sphb->numa_node].present)) {
+        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
+        return;
+    }
+
     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
 
     namebuf = alloca(strlen(sphb->dtbusname) + 32);
@@ -1372,12 +1406,19 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
 
-    sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
-    memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
+    sprintf(namebuf, "%s.mmio32-alias", sphb->dtbusname);
+    memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
                              namebuf, &sphb->memspace,
                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
-                                &sphb->memwindow);
+                                &sphb->mem32window);
+
+    sprintf(namebuf, "%s.mmio64-alias", sphb->dtbusname);
+    memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
+                             namebuf, &sphb->memspace,
+                             sphb->mem64_win_pciaddr, sphb->mem64_win_size);
+    memory_region_add_subregion(get_system_memory(), sphb->mem64_win_addr,
+                                &sphb->mem64window);
 
     /* Initialize IO regions */
     sprintf(namebuf, "%s.io", sphb->dtbusname);
@@ -1444,8 +1485,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
         uint32_t irq;
         Error *local_err = NULL;
 
-        irq = xics_spapr_alloc_block(spapr->xics, 0, 1, true, false,
-                                     &local_err);
+        irq = xics_spapr_alloc_block(spapr->xics, 1, true, false, &local_err);
         if (local_err) {
             error_propagate(errp, local_err);
             error_prepend(errp, "can't allocate LSIs: ");
@@ -1530,7 +1570,12 @@ static Property spapr_phb_properties[] = {
     DEFINE_PROP_UINT32("liobn64", sPAPRPHBState, dma_liobn[1], -1),
     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
-                       SPAPR_PCI_MMIO_WIN_SIZE),
+                       SPAPR_PCI_MEM32_WIN_SIZE),
+    DEFINE_PROP_UINT64("mem64_win_addr", sPAPRPHBState, mem64_win_addr, -1),
+    DEFINE_PROP_UINT64("mem64_win_size", sPAPRPHBState, mem64_win_size,
+                       SPAPR_PCI_MEM64_WIN_SIZE),
+    DEFINE_PROP_UINT64("mem64_win_pciaddr", sPAPRPHBState, mem64_win_pciaddr,
+                       -1),
     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
                        SPAPR_PCI_IO_WIN_SIZE),
@@ -1544,6 +1589,9 @@ static Property spapr_phb_properties[] = {
     DEFINE_PROP_BOOL("ddw", sPAPRPHBState, ddw_enabled, true),
     DEFINE_PROP_UINT64("pgsz", sPAPRPHBState, page_size_mask,
                        (1ULL << 12) | (1ULL << 16)),
+    DEFINE_PROP_UINT32("numa_node", sPAPRPHBState, numa_node, -1),
+    DEFINE_PROP_BOOL("pre-2.8-migration", sPAPRPHBState,
+                     pre_2_8_migration, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -1590,6 +1638,20 @@ static void spapr_pci_pre_save(void *opaque)
         sphb->msi_devs[i].key = *(uint32_t *) key;
         sphb->msi_devs[i].value = *(spapr_pci_msi *) value;
     }
+
+    if (sphb->pre_2_8_migration) {
+        sphb->mig_liobn = sphb->dma_liobn[0];
+        sphb->mig_mem_win_addr = sphb->mem_win_addr;
+        sphb->mig_mem_win_size = sphb->mem_win_size;
+        sphb->mig_io_win_addr = sphb->io_win_addr;
+        sphb->mig_io_win_size = sphb->io_win_size;
+
+        if ((sphb->mem64_win_size != 0)
+            && (sphb->mem64_win_addr
+                == (sphb->mem_win_addr + sphb->mem_win_size))) {
+            sphb->mig_mem_win_size += sphb->mem64_win_size;
+        }
+    }
 }
 
 static int spapr_pci_post_load(void *opaque, int version_id)
@@ -1612,6 +1674,13 @@ static int spapr_pci_post_load(void *opaque, int version_id)
     return 0;
 }
 
+static bool pre_2_8_migration(void *opaque, int version_id)
+{
+    sPAPRPHBState *sphb = opaque;
+
+    return sphb->pre_2_8_migration;
+}
+
 static const VMStateDescription vmstate_spapr_pci = {
     .name = "spapr_pci",
     .version_id = 2,
@@ -1620,11 +1689,11 @@ static const VMStateDescription vmstate_spapr_pci = {
     .post_load = spapr_pci_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
-        VMSTATE_UINT32_EQUAL(dma_liobn[0], sPAPRPHBState),
-        VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
-        VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
-        VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
-        VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
+        VMSTATE_UINT32_TEST(mig_liobn, sPAPRPHBState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_mem_win_addr, sPAPRPHBState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_mem_win_size, sPAPRPHBState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_io_win_addr, sPAPRPHBState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_io_win_size, sPAPRPHBState, pre_2_8_migration),
         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
         VMSTATE_INT32(msi_devs_num, sPAPRPHBState),
@@ -1765,10 +1834,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
     int bus_off, i, j, ret;
     char nodename[FDT_NAME_MAX];
     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
-    const uint64_t mmiosize = memory_region_size(&phb->memwindow);
-    const uint64_t w32max = (1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET;
-    const uint64_t w32size = MIN(w32max, mmiosize);
-    const uint64_t w64size = (mmiosize > w32size) ? (mmiosize - w32size) : 0;
     struct {
         uint32_t hi;
         uint64_t child;
@@ -1783,15 +1848,16 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
         {
             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
             cpu_to_be64(phb->mem_win_addr),
-            cpu_to_be64(w32size),
+            cpu_to_be64(phb->mem_win_size),
         },
         {
-            cpu_to_be32(b_ss(3)), cpu_to_be64(1ULL << 32),
-            cpu_to_be64(phb->mem_win_addr + w32size),
-            cpu_to_be64(w64size)
+            cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
+            cpu_to_be64(phb->mem64_win_addr),
+            cpu_to_be64(phb->mem64_win_size),
         },
     };
-    const unsigned sizeof_ranges = (w64size ? 3 : 2) * sizeof(ranges[0]);
+    const unsigned sizeof_ranges =
+        (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
     uint32_t interrupt_map_mask[] = {
         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
@@ -1805,6 +1871,11 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
         cpu_to_be32(1),
         cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
     };
+    uint32_t associativity[] = {cpu_to_be32(0x4),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(0x0),
+                                cpu_to_be32(phb->numa_node)};
     sPAPRTCETable *tcet;
     PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
     sPAPRFDT s_fdt;
@@ -1837,6 +1908,12 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
                          &ddw_extensions, sizeof(ddw_extensions)));
     }
 
+    /* Advertise NUMA via ibm,associativity */
+    if (phb->numa_node != -1) {
+        _FDT(fdt_setprop(fdt, bus_off, "ibm,associativity", associativity,
+                         sizeof(associativity)));
+    }
+
     /* Build the interrupt-map, this must matches what is done
      * in pci_spapr_map_irq
      */
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index dc058e512b..bb19944686 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -27,6 +27,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "qemu/log.h"
+#include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/char.h"
 #include "hw/qdev.h"
@@ -36,6 +37,7 @@
 
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_rtas.h"
 #include "hw/ppc/ppc.h"
 #include "qapi-event.h"
 #include "hw/boards.h"
@@ -43,16 +45,8 @@
 #include <libfdt.h>
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/cutils.h"
-
-/* #define DEBUG_SPAPR */
-
-#ifdef DEBUG_SPAPR
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
+#include "trace.h"
+#include "hw/ppc/fdt.h"
 
 static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPRMachineState *spapr,
                                                     uint32_t drc_index)
@@ -302,7 +296,8 @@ static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
         break;
     }
     case RTAS_SYSPARM_UUID:
-        ret = sysparm_st(buffer, length, qemu_uuid, (qemu_uuid_set ? 16 : 0));
+        ret = sysparm_st(buffer, length, (unsigned char *)&qemu_uuid,
+                         (qemu_uuid_set ? 16 : 0));
         break;
     default:
         ret = RTAS_OUT_NOT_SUPPORTED;
@@ -434,8 +429,7 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     /* if this is a DR sensor we can assume sensor_index == drc_index */
     drc = spapr_dr_connector_by_index(sensor_index);
     if (!drc) {
-        DPRINTF("rtas_set_indicator: invalid sensor/DRC index: %xh\n",
-                sensor_index);
+        trace_spapr_rtas_set_indicator_invalid(sensor_index);
         ret = RTAS_OUT_PARAM_ERROR;
         goto out;
     }
@@ -474,8 +468,7 @@ static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
 out_unimplemented:
     /* currently only DR-related sensors are implemented */
-    DPRINTF("rtas_set_indicator: sensor/indicator not implemented: %d\n",
-            sensor_type);
+    trace_spapr_rtas_set_indicator_not_supported(sensor_index, sensor_type);
     rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
 }
 
@@ -501,16 +494,15 @@ static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) {
         /* currently only DR-related sensors are implemented */
-        DPRINTF("rtas_get_sensor_state: sensor/indicator not implemented: %d\n",
-                sensor_type);
+        trace_spapr_rtas_get_sensor_state_not_supported(sensor_index,
+                                                        sensor_type);
         ret = RTAS_OUT_NOT_SUPPORTED;
         goto out;
     }
 
     drc = spapr_dr_connector_by_index(sensor_index);
     if (!drc) {
-        DPRINTF("rtas_get_sensor_state: invalid sensor/DRC index: %xh\n",
-                sensor_index);
+        trace_spapr_rtas_get_sensor_state_invalid(sensor_index);
         ret = RTAS_OUT_PARAM_ERROR;
         goto out;
     }
@@ -567,8 +559,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
     drc_index = rtas_ld(wa_addr, 0);
     drc = spapr_dr_connector_by_index(drc_index);
     if (!drc) {
-        DPRINTF("rtas_ibm_configure_connector: invalid DRC index: %xh\n",
-                drc_index);
+        trace_spapr_rtas_ibm_configure_connector_invalid(drc_index);
         rc = RTAS_OUT_PARAM_ERROR;
         goto out;
     }
@@ -576,8 +567,7 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
     drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
     fdt = drck->get_fdt(drc, NULL);
     if (!fdt) {
-        DPRINTF("rtas_ibm_configure_connector: Missing FDT for DRC index: %xh\n",
-                drc_index);
+        trace_spapr_rtas_ibm_configure_connector_missing_fdt(drc_index);
         rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE;
         goto out;
     }
@@ -691,6 +681,24 @@ target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     return H_PARAMETER;
 }
 
+uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args,
+                         uint32_t nret, uint64_t rets)
+{
+    int token;
+
+    for (token = 0; token < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; token++) {
+        if (strcmp(cmd, rtas_table[token].name) == 0) {
+            sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+            PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+            rtas_table[token].fn(cpu, spapr, token + RTAS_TOKEN_BASE,
+                                 nargs, args, nret, rets);
+            return H_SUCCESS;
+        }
+    }
+    return H_PARAMETER;
+}
+
 void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
 {
     assert((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX));
@@ -703,47 +711,9 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
     rtas_table[token].fn = fn;
 }
 
-int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
-                                 hwaddr rtas_size)
+void spapr_dt_rtas_tokens(void *fdt, int rtas)
 {
-    int ret;
     int i;
-    uint32_t lrdr_capacity[5];
-    MachineState *machine = MACHINE(qdev_get_machine());
-    sPAPRMachineState *spapr = SPAPR_MACHINE(machine);
-    uint64_t max_hotplug_addr = spapr->hotplug_memory.base +
-                                memory_region_size(&spapr->hotplug_memory.mr);
-
-    ret = fdt_add_mem_rsv(fdt, rtas_addr, rtas_size);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't add RTAS reserve entry: %s\n",
-                fdt_strerror(ret));
-        return ret;
-    }
-
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-base",
-                                rtas_addr);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't add linux,rtas-base property: %s\n",
-                fdt_strerror(ret));
-        return ret;
-    }
-
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "linux,rtas-entry",
-                                rtas_addr);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't add linux,rtas-entry property: %s\n",
-                fdt_strerror(ret));
-        return ret;
-    }
-
-    ret = qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size",
-                                rtas_size);
-    if (ret < 0) {
-        fprintf(stderr, "Couldn't add rtas-size property: %s\n",
-                fdt_strerror(ret));
-        return ret;
-    }
 
     for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
         struct rtas_call *call = &rtas_table[i];
@@ -752,29 +722,49 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
             continue;
         }
 
-        ret = qemu_fdt_setprop_cell(fdt, "/rtas", call->name,
-                                    i + RTAS_TOKEN_BASE);
-        if (ret < 0) {
-            fprintf(stderr, "Couldn't add rtas token for %s: %s\n",
-                    call->name, fdt_strerror(ret));
-            return ret;
-        }
+        _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE));
+    }
+}
 
+void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr)
+{
+    int rtas_node;
+    int ret;
+
+    /* Copy RTAS blob into guest RAM */
+    cpu_physical_memory_write(addr, spapr->rtas_blob, spapr->rtas_size);
+
+    ret = fdt_add_mem_rsv(fdt, addr, spapr->rtas_size);
+    if (ret < 0) {
+        error_report("Couldn't add RTAS reserve entry: %s",
+                     fdt_strerror(ret));
+        exit(1);
     }
 
-    lrdr_capacity[0] = cpu_to_be32(max_hotplug_addr >> 32);
-    lrdr_capacity[1] = cpu_to_be32(max_hotplug_addr & 0xffffffff);
-    lrdr_capacity[2] = 0;
-    lrdr_capacity[3] = cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE);
-    lrdr_capacity[4] = cpu_to_be32(max_cpus/smp_threads);
-    ret = qemu_fdt_setprop(fdt, "/rtas", "ibm,lrdr-capacity", lrdr_capacity,
-                     sizeof(lrdr_capacity));
+    /* Update the device tree with the blob's location */
+    rtas_node = fdt_path_offset(fdt, "/rtas");
+    assert(rtas_node >= 0);
+
+    ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-base", addr);
     if (ret < 0) {
-        fprintf(stderr, "Couldn't add ibm,lrdr-capacity rtas property\n");
-        return ret;
+        error_report("Couldn't add linux,rtas-base property: %s",
+                     fdt_strerror(ret));
+        exit(1);
     }
 
-    return 0;
+    ret = fdt_setprop_cell(fdt, rtas_node, "linux,rtas-entry", addr);
+    if (ret < 0) {
+        error_report("Couldn't add linux,rtas-entry property: %s",
+                     fdt_strerror(ret));
+        exit(1);
+    }
+
+    ret = fdt_setprop_cell(fdt, rtas_node, "rtas-size", spapr->rtas_size);
+    if (ret < 0) {
+        error_report("Couldn't add rtas-size property: %s",
+                     fdt_strerror(ret));
+        exit(1);
+    }
 }
 
 static void core_rtas_register_types(void)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index f93244d7c1..cc1e09c568 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -20,6 +20,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "qemu/log.h"
@@ -35,19 +36,11 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "hw/ppc/xics.h"
+#include "hw/ppc/fdt.h"
+#include "trace.h"
 
 #include <libfdt.h>
 
-/* #define DEBUG_SPAPR */
-
-#ifdef DEBUG_SPAPR
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 static Property spapr_vio_props[] = {
     DEFINE_PROP_UINT32("irq", VIOsPAPRDevice, irq, 0), \
     DEFINE_PROP_END_OF_LIST(),
@@ -201,9 +194,7 @@ static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
     dev->crq.qsize = queue_len;
     dev->crq.qnext = 0;
 
-    DPRINTF("CRQ for dev 0x" TARGET_FMT_lx " registered at 0x"
-            TARGET_FMT_lx "/0x" TARGET_FMT_lx "\n",
-            reg, queue_addr, queue_len);
+    trace_spapr_vio_h_reg_crq(reg, queue_addr, queue_len);
     return H_SUCCESS;
 }
 
@@ -213,7 +204,7 @@ static target_ulong free_crq(VIOsPAPRDevice *dev)
     dev->crq.qsize = 0;
     dev->crq.qnext = 0;
 
-    DPRINTF("CRQ for dev 0x%" PRIx32 " freed\n", dev->reg);
+    trace_spapr_vio_free_crq(dev->reg);
 
     return H_SUCCESS;
 }
@@ -276,7 +267,7 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
     uint8_t byte;
 
     if (!dev->crq.qsize) {
-        fprintf(stderr, "spapr_vio_send_creq on uninitialized queue\n");
+        error_report("spapr_vio_send_creq on uninitialized queue");
         return -1;
     }
 
@@ -463,7 +454,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
         dev->qdev.id = id;
     }
 
-    dev->irq = xics_spapr_alloc(spapr->xics, 0, dev->irq, false, &local_err);
+    dev->irq = xics_spapr_alloc(spapr->xics, dev->irq, false, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -634,11 +625,21 @@ static int compare_reg(const void *p1, const void *p2)
     return 1;
 }
 
-int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
+void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt)
 {
     DeviceState *qdev, **qdevs;
     BusChild *kid;
     int i, num, ret = 0;
+    int node;
+
+    _FDT(node = fdt_add_subnode(fdt, 0, "vdevice"));
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice"));
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice"));
+    _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1));
+    _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
 
     /* Count qdevs on the bus list */
     num = 0;
@@ -660,43 +661,32 @@ int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
      * to know that will mean they are in forward order in the tree. */
     for (i = num - 1; i >= 0; i--) {
         VIOsPAPRDevice *dev = (VIOsPAPRDevice *)(qdevs[i]);
+        VIOsPAPRDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
 
         ret = vio_make_devnode(dev, fdt);
-
         if (ret < 0) {
-            goto out;
+            error_report("Couldn't create device node /vdevice/%s@%"PRIx32,
+                         vdc->dt_name, dev->reg);
+            exit(1);
         }
     }
 
-    ret = 0;
-out:
     g_free(qdevs);
-
-    return ret;
 }
 
-int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus)
+gchar *spapr_vio_stdout_path(VIOsPAPRBus *bus)
 {
     VIOsPAPRDevice *dev;
     char *name, *path;
-    int ret, offset;
 
     dev = spapr_vty_get_default(bus);
-    if (!dev)
-        return 0;
-
-    offset = fdt_path_offset(fdt, "/chosen");
-    if (offset < 0) {
-        return offset;
+    if (!dev) {
+        return NULL;
     }
 
     name = spapr_vio_get_dev_name(DEVICE(dev));
     path = g_strdup_printf("/vdevice/%s", name);
 
-    ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path);
-
     g_free(name);
-    g_free(path);
-
-    return ret;
+    return path;
 }
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index dfeab93089..2297ead11e 100644
--- a/hw/ppc/trace-events
+++ b/hw/ppc/trace-events
@@ -35,6 +35,39 @@ spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64
 spapr_iommu_ddw_remove(uint32_t liobn) "liobn=%"PRIx32
 spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=%"PRIx64" addr=%"PRIx32
 
+# hw/ppc/spapr_drc.c
+spapr_drc_set_isolation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: %"PRIx32
+spapr_drc_set_isolation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_isolation_state_deferring(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_indicator_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_configured_skipping(uint32_t index) "drc: 0x%"PRIx32", isolated device"
+spapr_drc_entity_sense(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_detach(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_isolated(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_unusable(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_allocation(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize_child(uint32_t index, char *childname) "drc: 0x%"PRIx32", child name: %s"
+spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32
+
+# hw/ppc/spapr_rtas.c
+spapr_rtas_set_indicator_invalid(uint32_t index) "sensor index: 0x%"PRIx32
+spapr_rtas_set_indicator_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
+spapr_rtas_get_sensor_state_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
+spapr_rtas_get_sensor_state_invalid(uint32_t index) "sensor index: 0x%"PRIx32
+spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx32
+spapr_rtas_ibm_configure_connector_missing_fdt(uint32_t index) "DRC index: 0x%"PRIx32
+
+# hw/ppc/spapr_vio.c
+spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
+spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
+
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
 
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index bb8e4be339..0f2580d644 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -141,7 +141,8 @@ int map_indicator(AdapterInfo *adapter, IndAddr *indicator)
 int css_create_css_image(uint8_t cssid, bool default_image)
 {
     trace_css_new_image(cssid, default_image ? "(default)" : "");
-    if (cssid > MAX_CSSID) {
+    /* 255 is reserved */
+    if (cssid == 255) {
         return -EINVAL;
     }
     if (channel_subsys.css[cssid]) {
@@ -774,7 +775,7 @@ int css_do_xsch(SubchDev *sch)
     PMCW *p = &sch->curr_status.pmcw;
     int ret;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         ret = -ENODEV;
         goto out;
     }
@@ -814,7 +815,7 @@ int css_do_csch(SubchDev *sch)
     PMCW *p = &sch->curr_status.pmcw;
     int ret;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         ret = -ENODEV;
         goto out;
     }
@@ -836,7 +837,7 @@ int css_do_hsch(SubchDev *sch)
     PMCW *p = &sch->curr_status.pmcw;
     int ret;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         ret = -ENODEV;
         goto out;
     }
@@ -912,7 +913,7 @@ int css_do_ssch(SubchDev *sch, ORB *orb)
     PMCW *p = &sch->curr_status.pmcw;
     int ret;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         ret = -ENODEV;
         goto out;
     }
@@ -989,7 +990,7 @@ int css_do_tsch_get_irb(SubchDev *sch, IRB *target_irb, int *irb_len)
     uint16_t stctl;
     IRB irb;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         return 3;
     }
 
@@ -1195,7 +1196,7 @@ int css_do_rsch(SubchDev *sch)
     PMCW *p = &sch->curr_status.pmcw;
     int ret;
 
-    if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+    if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
         ret = -ENODEV;
         goto out;
     }
@@ -1267,7 +1268,7 @@ bool css_schid_final(int m, uint8_t cssid, uint8_t ssid, uint16_t schid)
     uint8_t real_cssid;
 
     real_cssid = (!m && (cssid == 0)) ? channel_subsys.default_cssid : cssid;
-    if (real_cssid > MAX_CSSID || ssid > MAX_SSID ||
+    if (ssid > MAX_SSID ||
         !channel_subsys.css[real_cssid] ||
         !channel_subsys.css[real_cssid]->sch_set[ssid]) {
         return true;
@@ -1282,9 +1283,6 @@ static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
     CssImage *css;
 
     trace_css_chpid_add(cssid, chpid, type);
-    if (cssid > MAX_CSSID) {
-        return -EINVAL;
-    }
     css = channel_subsys.css[cssid];
     if (!css) {
         return -EINVAL;
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 9c1c04e590..63f6248f1d 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -383,7 +383,6 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
     uint64_t pte;
     uint32_t flags;
     S390PCIBusDevice *pbdev = container_of(iommu, S390PCIBusDevice, iommu_mr);
-    S390pciState *s;
     IOMMUTLBEntry ret = {
         .target_as = &address_space_memory,
         .iova = 0,
@@ -405,19 +404,6 @@ static IOMMUTLBEntry s390_translate_iommu(MemoryRegion *iommu, hwaddr addr,
 
     DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
 
-    s = S390_PCI_HOST_BRIDGE(pci_device_root_bus(pbdev->pdev)->qbus.parent);
-    /* s390 does not have an APIC mapped to main storage so we use
-     * a separate AddressSpace only for msix notifications
-     */
-    if (addr == ZPCI_MSI_ADDR) {
-        ret.target_as = &s->msix_notify_as;
-        ret.iova = addr;
-        ret.translated_addr = addr;
-        ret.addr_mask = 0xfff;
-        ret.perm = IOMMU_RW;
-        return ret;
-    }
-
     if (addr < pbdev->pba || addr > pbdev->pal) {
         return ret;
     }
@@ -476,8 +462,7 @@ static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
 static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
                                 unsigned int size)
 {
-    S390PCIBusDevice *pbdev;
-    uint32_t io_int_word;
+    S390PCIBusDevice *pbdev = opaque;
     uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
     uint32_t vec = data & ZPCI_MSI_VEC_MASK;
     uint64_t ind_bit;
@@ -486,7 +471,6 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
 
     DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);
 
-    pbdev = s390_pci_find_dev_by_idx(idx);
     if (!pbdev) {
         e |= (vec << ERR_EVENT_MVN_OFFSET);
         s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
@@ -504,8 +488,7 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
                    0x80 >> ((ind_bit + vec) % 8));
     if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
                                        0x80 >> (sum_bit % 8))) {
-        io_int_word = (pbdev->isc << 27) | IO_INT_WORD_AI;
-        s390_io_interrupt(0, 0, 0, io_int_word);
+        css_adapter_interrupt(pbdev->isc);
     }
 }
 
@@ -548,10 +531,6 @@ static void s390_pcihost_init_as(S390pciState *s)
 
         s->iommu[i] = iommu;
     }
-
-    memory_region_init_io(&s->msix_notify_mr, OBJECT(s),
-                          &s390_msi_ctrl_ops, s, "msix-s390", UINT64_MAX);
-    address_space_init(&s->msix_notify_as, &s->msix_notify_mr, "msix-pci");
 }
 
 static int s390_pcihost_init(SysBusDevice *dev)
@@ -581,7 +560,7 @@ static int s390_pcihost_init(SysBusDevice *dev)
     return 0;
 }
 
-static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
+static int s390_pci_setup_msix(S390PCIBusDevice *pbdev)
 {
     uint8_t pos;
     uint16_t ctrl;
@@ -609,6 +588,26 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
     return 0;
 }
 
+static void s390_pci_msix_init(S390PCIBusDevice *pbdev)
+{
+    char *name;
+
+    name = g_strdup_printf("msix-s390-%04x", pbdev->uid);
+
+    memory_region_init_io(&pbdev->msix_notify_mr, OBJECT(pbdev),
+                          &s390_msi_ctrl_ops, pbdev, name, PAGE_SIZE);
+    memory_region_add_subregion(&pbdev->iommu->mr, ZPCI_MSI_ADDR,
+                                &pbdev->msix_notify_mr);
+
+    g_free(name);
+}
+
+static void s390_pci_msix_free(S390PCIBusDevice *pbdev)
+{
+    memory_region_del_subregion(&pbdev->iommu->mr, &pbdev->msix_notify_mr);
+    object_unparent(OBJECT(&pbdev->msix_notify_mr));
+}
+
 static S390PCIBusDevice *s390_pci_device_new(const char *target)
 {
     DeviceState *dev = NULL;
@@ -649,6 +648,7 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
             pbdev = s390_pci_device_new(dev->id);
             if (!pbdev) {
                 error_setg(errp, "create zpci device failed");
+                return;
             }
         }
 
@@ -661,7 +661,9 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,
         pbdev->pdev = pdev;
         pbdev->iommu = s->iommu[PCI_SLOT(pdev->devfn)];
         pbdev->state = ZPCI_FS_STANDBY;
-        s390_pcihost_setup_msix(pbdev);
+
+        s390_pci_msix_init(pbdev);
+        s390_pci_setup_msix(pbdev);
 
         if (dev->hotplugged) {
             s390_pci_generate_plug_event(HP_EVENT_RESERVED_TO_STANDBY,
@@ -717,11 +719,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
                 break;
             }
         }
-
-        if (!pbdev) {
-            object_unparent(OBJECT(pci_dev));
-            return;
-        }
+        assert(pbdev != NULL);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
         pbdev = S390_PCI_DEVICE(dev);
         pci_dev = pbdev->pdev;
@@ -752,6 +750,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler *hotplug_dev,
     s390_pci_generate_plug_event(HP_EVENT_STANDBY_TO_RESERVED,
                                  pbdev->fh, pbdev->fid);
     object_unparent(OBJECT(pci_dev));
+    s390_pci_msix_free(pbdev);
     pbdev->pdev = NULL;
     pbdev->state = ZPCI_FS_RESERVED;
 out:
@@ -808,17 +807,11 @@ static uint32_t s390_pci_generate_fid(Error **errp)
 {
     uint32_t fid = 0;
 
-    while (fid <= ZPCI_MAX_FID) {
+    do {
         if (!s390_pci_find_dev_by_fid(fid)) {
             return fid;
         }
-
-        if (fid == ZPCI_MAX_FID) {
-            break;
-        }
-
-        fid++;
-    }
+    } while (fid++ != ZPCI_MAX_FID);
 
     error_setg(errp, "no free fid could be found");
     return 0;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 4f564e02f2..7f2701301e 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -82,6 +82,7 @@
 #define ZPCI_EDMA_ADDR 0x1ffffffffffffffULL
 
 #define PAGE_SHIFT      12
+#define PAGE_SIZE       (1 << PAGE_SHIFT)
 #define PAGE_MASK       (~(PAGE_SIZE-1))
 #define PAGE_DEFAULT_ACC        0
 #define PAGE_DEFAULT_KEY        (PAGE_DEFAULT_ACC << 4)
@@ -283,6 +284,7 @@ typedef struct S390PCIBusDevice {
     AdapterRoutes routes;
     S390PCIIOMMU *iommu;
     MemoryRegion iommu_mr;
+    MemoryRegion msix_notify_mr;
     IndAddr *summary_ind;
     IndAddr *indicator;
     QEMUTimer *release_timer;
@@ -297,8 +299,6 @@ typedef struct S390pciState {
     S390PCIBus *bus;
     S390PCIBusDevice *pbdev[PCI_SLOT_MAX];
     S390PCIIOMMU *iommu[PCI_SLOT_MAX];
-    AddressSpace msix_notify_as;
-    MemoryRegion msix_notify_mr;
     QTAILQ_HEAD(, SeiContainer) pending_sei;
 } S390pciState;
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index f069b110b4..0864d9be12 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -315,6 +315,8 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
     S390PCIBusDevice *pbdev;
     uint64_t offset;
     uint64_t data;
+    MemoryRegion *mr;
+    MemTxResult result;
     uint8_t len;
     uint32_t fh;
     uint8_t pcias;
@@ -363,9 +365,13 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             program_interrupt(env, PGM_OPERAND, 4);
             return 0;
         }
-        MemoryRegion *mr = pbdev->pdev->io_regions[pcias].memory;
-        memory_region_dispatch_read(mr, offset, &data, len,
-                                    MEMTXATTRS_UNSPECIFIED);
+        mr = pbdev->pdev->io_regions[pcias].memory;
+        result = memory_region_dispatch_read(mr, offset, &data, len,
+                                             MEMTXATTRS_UNSPECIFIED);
+        if (result != MEMTX_OK) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
     } else if (pcias == 15) {
         if ((4 - (offset & 0x3)) < len) {
             program_interrupt(env, PGM_OPERAND, 4);
@@ -442,6 +448,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
     CPUS390XState *env = &cpu->env;
     uint64_t offset, data;
     S390PCIBusDevice *pbdev;
+    MemoryRegion *mr;
+    MemTxResult result;
     uint8_t len;
     uint32_t fh;
     uint8_t pcias;
@@ -491,7 +499,7 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             program_interrupt(env, PGM_OPERAND, 4);
             return 0;
         }
-        MemoryRegion *mr;
+
         if (trap_msix(pbdev, offset, pcias)) {
             offset = offset - pbdev->msix.table_offset;
             mr = &pbdev->pdev->msix_table_mmio;
@@ -500,8 +508,12 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             mr = pbdev->pdev->io_regions[pcias].memory;
         }
 
-        memory_region_dispatch_write(mr, offset, data, len,
+        result = memory_region_dispatch_write(mr, offset, data, len,
                                      MEMTXATTRS_UNSPECIFIED);
+        if (result != MEMTX_OK) {
+            program_interrupt(env, PGM_OPERAND, 4);
+            return 0;
+        }
     } else if (pcias == 15) {
         if ((4 - (offset & 0x3)) < len) {
             program_interrupt(env, PGM_OPERAND, 4);
@@ -631,6 +643,7 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
     CPUS390XState *env = &cpu->env;
     S390PCIBusDevice *pbdev;
     MemoryRegion *mr;
+    MemTxResult result;
     int i;
     uint32_t fh;
     uint8_t pcias;
@@ -688,7 +701,7 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
 
     mr = pbdev->pdev->io_regions[pcias].memory;
     if (!memory_region_access_valid(mr, env->regs[r3], len, true)) {
-        program_interrupt(env, PGM_ADDRESSING, 6);
+        program_interrupt(env, PGM_OPERAND, 6);
         return 0;
     }
 
@@ -697,9 +710,13 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
     }
 
     for (i = 0; i < len / 8; i++) {
-        memory_region_dispatch_write(mr, env->regs[r3] + i * 8,
+        result = memory_region_dispatch_write(mr, env->regs[r3] + i * 8,
                                      ldq_p(buffer + i * 8), 8,
                                      MEMTXATTRS_UNSPECIFIED);
+        if (result != MEMTX_OK) {
+            program_interrupt(env, PGM_OPERAND, 6);
+            return 0;
+        }
     }
 
     setcc(cpu, ZPCI_PCI_LS_OK);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 91d9cefbb5..e340eab36b 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -193,6 +193,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data)
     S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
 
     s390mc->ri_allowed = true;
+    s390mc->cpu_model_allowed = true;
     mc->init = ccw_init;
     mc->reset = s390_machine_reset;
     mc->hot_add_cpu = s390_hot_add_cpu;
@@ -249,10 +250,28 @@ bool ri_allowed(void)
 
             return s390mc->ri_allowed;
         }
+        /*
+         * Make sure the "none" machine can have ri, otherwise it won't * be
+         * unlocked in KVM and therefore the host CPU model might be wrong.
+         */
+        return true;
     }
     return 0;
 }
 
+bool cpu_model_allowed(void)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+    if (object_class_dynamic_cast(OBJECT_CLASS(mc),
+                                  TYPE_S390_CCW_MACHINE)) {
+        S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
+        return s390mc->cpu_model_allowed;
+    }
+    /* allow CPU model qmp queries with the "none" machine */
+    return true;
+}
+
 static inline void s390_machine_initfn(Object *obj)
 {
     object_property_add_bool(obj, "aes-key-wrap",
@@ -316,7 +335,11 @@ static const TypeInfo ccw_machine_info = {
     }                                                                         \
     type_init(ccw_machine_register_##suffix)
 
+#define CCW_COMPAT_2_7 \
+        HW_COMPAT_2_7
+
 #define CCW_COMPAT_2_6 \
+        CCW_COMPAT_2_7 \
         HW_COMPAT_2_6 \
         {\
             .driver   = TYPE_S390_IPL,\
@@ -372,14 +395,29 @@ static const TypeInfo ccw_machine_info = {
             .value    = "0",\
         },
 
+static void ccw_machine_2_8_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_2_8_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE(2_8, "2.8", true);
+
 static void ccw_machine_2_7_instance_options(MachineState *machine)
 {
+    ccw_machine_2_8_instance_options(machine);
 }
 
 static void ccw_machine_2_7_class_options(MachineClass *mc)
 {
+    S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
+    s390mc->cpu_model_allowed = false;
+    ccw_machine_2_8_class_options(mc);
+    SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_7);
 }
-DEFINE_CCW_MACHINE(2_7, "2.7", true);
+DEFINE_CCW_MACHINE(2_7, "2.7", false);
 
 static void ccw_machine_2_6_instance_options(MachineState *machine)
 {
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 544c61643d..0a963473ad 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -101,7 +101,11 @@ void s390_init_cpus(MachineState *machine)
     gchar *name;
 
     if (machine->cpu_model == NULL) {
-        machine->cpu_model = "host";
+        if (kvm_enabled()) {
+            machine->cpu_model = "host";
+        } else {
+            machine->cpu_model = "qemu";
+        }
     }
 
     cpu_states = g_new0(S390CPU *, max_cpus);
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index fca37f511e..e741da1141 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -26,7 +26,25 @@
 
 static inline SCLPDevice *get_sclp_device(void)
 {
-    return SCLP(object_resolve_path_type("", TYPE_SCLP, NULL));
+    static SCLPDevice *sclp;
+
+    if (!sclp) {
+        sclp = SCLP(object_resolve_path_type("", TYPE_SCLP, NULL));
+    }
+    return sclp;
+}
+
+static void prepare_cpu_entries(SCLPDevice *sclp, CPUEntry *entry, int count)
+{
+    uint8_t features[SCCB_CPU_FEATURE_LEN] = { 0 };
+    int i;
+
+    s390_get_feat_block(S390_FEAT_TYPE_SCLP_CPU, features);
+    for (i = 0; i < count; i++) {
+        entry[i].address = i;
+        entry[i].type = 0;
+        memcpy(entry[i].features, features, sizeof(entry[i].features));
+    }
 }
 
 /* Provide information about the configuration, CPUs and storage */
@@ -37,7 +55,6 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
     sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
     CPUState *cpu;
     int cpu_count = 0;
-    int i = 0;
     int rnsize, rnmax;
     int slots = MIN(machine->ram_slots, s390_get_memslot_count(kvm_state));
 
@@ -50,10 +67,15 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
     read_info->offset_cpu = cpu_to_be16(offsetof(ReadInfo, entries));
     read_info->highest_cpu = cpu_to_be16(max_cpus);
 
-    for (i = 0; i < cpu_count; i++) {
-        read_info->entries[i].address = i;
-        read_info->entries[i].type = 0;
-    }
+    read_info->ibc_val = cpu_to_be32(s390_get_ibc_val());
+
+    /* Configuration Characteristic (Extension) */
+    s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR,
+                         read_info->conf_char);
+    s390_get_feat_block(S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT,
+                         read_info->conf_char_ext);
+
+    prepare_cpu_entries(sclp, read_info->entries, cpu_count);
 
     read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO |
                                         SCLP_HAS_PCI_RECONFIG);
@@ -88,6 +110,8 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
 
         read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
     }
+    read_info->mha_pow = s390_get_mha_pow();
+    read_info->hmfai = cpu_to_be32(s390_get_hmfai());
 
     rnsize = 1 << (sclp->increment_size - 20);
     if (rnsize <= 128) {
@@ -304,7 +328,6 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb)
     ReadCpuInfo *cpu_info = (ReadCpuInfo *) sccb;
     CPUState *cpu;
     int cpu_count = 0;
-    int i = 0;
 
     CPU_FOREACH(cpu) {
         cpu_count++;
@@ -318,10 +341,7 @@ static void sclp_read_cpu_info(SCLPDevice *sclp, SCCB *sccb)
     cpu_info->offset_standby = cpu_to_be16(cpu_info->offset_configured
         + cpu_info->nr_configured*sizeof(CPUEntry));
 
-    for (i = 0; i < cpu_count; i++) {
-        cpu_info->entries[i].address = i;
-        cpu_info->entries[i].type = 0;
-    }
+    prepare_cpu_entries(sclp, cpu_info->entries, cpu_count);
 
     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
@@ -406,7 +426,7 @@ int sclp_service_call(CPUS390XState *env, uint64_t sccb, uint32_t code)
         goto out;
     }
 
-    sclp_c->execute(sclp, (SCCB *)&work_sccb, code);
+    sclp_c->execute(sclp, &work_sccb, code);
 
     cpu_physical_memory_write(sccb, &work_sccb,
                               be16_to_cpu(work_sccb.h.length));
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index a554a24d06..f5c1d98192 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -59,38 +59,11 @@ static void virtio_ccw_stop_ioeventfd(VirtioCcwDevice *dev)
     virtio_bus_stop_ioeventfd(&dev->bus);
 }
 
-static bool virtio_ccw_ioeventfd_started(DeviceState *d)
+static bool virtio_ccw_ioeventfd_enabled(DeviceState *d)
 {
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
 
-    return dev->ioeventfd_started;
-}
-
-static void virtio_ccw_ioeventfd_set_started(DeviceState *d, bool started,
-                                             bool err)
-{
-    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-
-    dev->ioeventfd_started = started;
-    if (err) {
-        /* Disable ioeventfd for this device. */
-        dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
-    }
-}
-
-static bool virtio_ccw_ioeventfd_disabled(DeviceState *d)
-{
-    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-
-    return dev->ioeventfd_disabled ||
-        !(dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD);
-}
-
-static void virtio_ccw_ioeventfd_set_disabled(DeviceState *d, bool disabled)
-{
-    VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-
-    dev->ioeventfd_disabled = disabled;
+    return (dev->flags & VIRTIO_CCW_FLAG_USE_IOEVENTFD) != 0;
 }
 
 static int virtio_ccw_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
@@ -330,6 +303,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
             features.index = address_space_ldub(&address_space_memory,
                                                 ccw.cda
                                                 + sizeof(features.features),
@@ -339,7 +314,7 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
                 if (dev->revision >= 1) {
                     /* Don't offer legacy features for modern devices. */
                     features.features = (uint32_t)
-                        (vdev->host_features & ~VIRTIO_LEGACY_FEATURES);
+                        (vdev->host_features & ~vdc->legacy_features);
                 } else {
                     features.features = (uint32_t)vdev->host_features;
                 }
@@ -455,6 +430,26 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
             }
         }
         break;
+    case CCW_CMD_READ_STATUS:
+        if (check_len) {
+            if (ccw.count != sizeof(status)) {
+                ret = -EINVAL;
+                break;
+            }
+        } else if (ccw.count < sizeof(status)) {
+            /* Can't execute command. */
+            ret = -EINVAL;
+            break;
+        }
+        if (!ccw.cda) {
+            ret = -EFAULT;
+        } else {
+            address_space_stb(&address_space_memory, ccw.cda, vdev->status,
+                                        MEMTXATTRS_UNSPECIFIED, NULL);
+            sch->curr_status.scsw.count = ccw.count - sizeof(vdev->status);;
+            ret = 0;
+        }
+        break;
     case CCW_CMD_WRITE_STATUS:
         if (check_len) {
             if (ccw.count != sizeof(status)) {
@@ -689,6 +684,10 @@ static void virtio_ccw_device_realize(VirtioCcwDevice *dev, Error **errp)
         sch->cssid, sch->ssid, sch->schid, sch->devno,
         ccw_dev->bus_id.valid ? "user-configured" : "auto-configured");
 
+    if (!kvm_eventfds_enabled()) {
+        dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
+    }
+
     if (k->realize) {
         k->realize(dev, &err);
     }
@@ -1261,6 +1260,16 @@ static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
     return 0;
 }
 
+static void virtio_ccw_pre_plugged(DeviceState *d, Error **errp)
+{
+   VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+   VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
+
+    if (dev->max_rev >= 1) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+}
+
 /* This is called by virtio-bus just after the device is plugged. */
 static void virtio_ccw_device_plugged(DeviceState *d, Error **errp)
 {
@@ -1270,6 +1279,10 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp)
     SubchDev *sch = ccw_dev->sch;
     int n = virtio_get_num_queues(vdev);
 
+    if (!virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
+        dev->max_rev = 0;
+    }
+
     if (virtio_get_num_queues(vdev) > VIRTIO_CCW_QUEUE_MAX) {
         error_setg(errp, "The number of virtqueues %d "
                    "exceeds ccw limit %d", n,
@@ -1277,31 +1290,13 @@ static void virtio_ccw_device_plugged(DeviceState *d, Error **errp)
         return;
     }
 
-    if (!kvm_eventfds_enabled()) {
-        dev->flags &= ~VIRTIO_CCW_FLAG_USE_IOEVENTFD;
-    }
-
     sch->id.cu_model = virtio_bus_get_vdev_id(&dev->bus);
 
-    if (dev->max_rev >= 1) {
-        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
-    }
 
     css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
                           d->hotplugged, 1);
 }
 
-static void virtio_ccw_post_plugged(DeviceState *d, Error **errp)
-{
-   VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-   VirtIODevice *vdev = virtio_bus_get_device(&dev->bus);
-
-   if (!virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1)) {
-        /* A backend didn't support modern virtio. */
-       dev->max_rev = 0;
-   }
-}
-
 static void virtio_ccw_device_unplugged(DeviceState *d)
 {
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
@@ -1593,13 +1588,10 @@ static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data)
     k->load_queue = virtio_ccw_load_queue;
     k->save_config = virtio_ccw_save_config;
     k->load_config = virtio_ccw_load_config;
+    k->pre_plugged = virtio_ccw_pre_plugged;
     k->device_plugged = virtio_ccw_device_plugged;
-    k->post_plugged = virtio_ccw_post_plugged;
     k->device_unplugged = virtio_ccw_device_unplugged;
-    k->ioeventfd_started = virtio_ccw_ioeventfd_started;
-    k->ioeventfd_set_started = virtio_ccw_ioeventfd_set_started;
-    k->ioeventfd_disabled = virtio_ccw_ioeventfd_disabled;
-    k->ioeventfd_set_disabled = virtio_ccw_ioeventfd_set_disabled;
+    k->ioeventfd_enabled = virtio_ccw_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_ccw_ioeventfd_assign;
 }
 
@@ -1658,6 +1650,57 @@ static const TypeInfo virtio_ccw_9p_info = {
 };
 #endif
 
+#ifdef CONFIG_VHOST_VSOCK
+
+static Property vhost_vsock_ccw_properties[] = {
+    DEFINE_PROP_CSS_DEV_ID("devno", VirtioCcwDevice, parent_obj.bus_id),
+    DEFINE_PROP_UINT32("max_revision", VirtioCcwDevice, max_rev,
+                       VIRTIO_CCW_MAX_REV),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_ccw_realize(VirtioCcwDevice *ccw_dev, Error **errp)
+{
+    VHostVSockCCWState *dev = VHOST_VSOCK_CCW(ccw_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+    Error *err = NULL;
+
+    qdev_set_parent_bus(vdev, BUS(&ccw_dev->bus));
+    object_property_set_bool(OBJECT(vdev), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+    }
+}
+
+static void vhost_vsock_ccw_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+    k->realize = vhost_vsock_ccw_realize;
+    k->exit = virtio_ccw_exit;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->props = vhost_vsock_ccw_properties;
+    dc->reset = virtio_ccw_reset;
+}
+
+static void vhost_vsock_ccw_instance_init(Object *obj)
+{
+    VHostVSockCCWState *dev = VHOST_VSOCK_CCW(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_VSOCK);
+}
+
+static const TypeInfo vhost_vsock_ccw_info = {
+    .name          = TYPE_VHOST_VSOCK_CCW,
+    .parent        = TYPE_VIRTIO_CCW_DEVICE,
+    .instance_size = sizeof(VHostVSockCCWState),
+    .instance_init = vhost_vsock_ccw_instance_init,
+    .class_init    = vhost_vsock_ccw_class_init,
+};
+#endif
+
 static void virtio_ccw_register(void)
 {
     type_register_static(&virtio_ccw_bus_info);
@@ -1674,6 +1717,9 @@ static void virtio_ccw_register(void)
 #ifdef CONFIG_VIRTFS
     type_register_static(&virtio_ccw_9p_info);
 #endif
+#ifdef CONFIG_VHOST_VSOCK
+    type_register_static(&vhost_vsock_ccw_info);
+#endif
 }
 
 type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
index 1c6bc86316..77d10f1671 100644
--- a/hw/s390x/virtio-ccw.h
+++ b/hw/s390x/virtio-ccw.h
@@ -23,6 +23,9 @@
 #include "hw/virtio/virtio-balloon.h"
 #include "hw/virtio/virtio-rng.h"
 #include "hw/virtio/virtio-bus.h"
+#ifdef CONFIG_VHOST_VSOCK
+#include "hw/virtio/vhost-vsock.h"
+#endif /* CONFIG_VHOST_VSOCK */
 
 #include "hw/s390x/s390_flic.h"
 #include "hw/s390x/css.h"
@@ -42,6 +45,7 @@
 #define CCW_CMD_SET_IND      0x43
 #define CCW_CMD_SET_CONF_IND 0x53
 #define CCW_CMD_READ_VQ_CONF 0x32
+#define CCW_CMD_READ_STATUS  0x72
 #define CCW_CMD_SET_IND_ADAPTER 0x73
 #define CCW_CMD_SET_VIRTIO_REV 0x83
 
@@ -82,8 +86,6 @@ struct VirtioCcwDevice {
     int revision;
     uint32_t max_rev;
     VirtioBusState bus;
-    bool ioeventfd_started;
-    bool ioeventfd_disabled;
     uint32_t flags;
     uint8_t thinint_isc;
     AdapterRoutes routes;
@@ -95,7 +97,7 @@ struct VirtioCcwDevice {
 };
 
 /* The maximum virtio revision we support. */
-#define VIRTIO_CCW_MAX_REV 1
+#define VIRTIO_CCW_MAX_REV 2
 static inline int virtio_ccw_rev_max(VirtioCcwDevice *dev)
 {
     return dev->max_rev;
@@ -180,7 +182,6 @@ typedef struct VirtIORNGCcw {
     VirtIORNG vdev;
 } VirtIORNGCcw;
 
-void virtio_ccw_device_update_status(SubchDev *sch);
 VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
 
 #ifdef CONFIG_VIRTFS
@@ -197,4 +198,16 @@ typedef struct V9fsCCWState {
 
 #endif /* CONFIG_VIRTFS */
 
+#ifdef CONFIG_VHOST_VSOCK
+#define TYPE_VHOST_VSOCK_CCW "vhost-vsock-ccw"
+#define VHOST_VSOCK_CCW(obj) \
+    OBJECT_CHECK(VHostVSockCCWState, (obj), TYPE_VHOST_VSOCK_CCW)
+
+typedef struct VHostVSockCCWState {
+    VirtioCcwDevice parent_obj;
+    VHostVSock vdev;
+} VHostVSockCCWState;
+
+#endif /* CONFIG_VHOST_VSOCK */
+
 #endif
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 1f2f2d33dd..5a5a4e946a 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -406,11 +406,9 @@ uint64_t esp_reg_read(ESPState *s, uint32_t saddr)
             /* Data out.  */
             qemu_log_mask(LOG_UNIMP, "esp: PIO data read not implemented\n");
             s->rregs[ESP_FIFO] = 0;
-            esp_raise_irq(s);
         } else if (s->ti_rptr < s->ti_wptr) {
             s->ti_size--;
             s->rregs[ESP_FIFO] = s->ti_buf[s->ti_rptr++];
-            esp_raise_irq(s);
         }
         if (s->ti_rptr == s->ti_wptr) {
             s->ti_rptr = 0;
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index df205cdafe..feb1191315 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -19,6 +19,7 @@
 #include "hw/pci/pci.h"
 #include "hw/scsi/scsi.h"
 #include "sysemu/dma.h"
+#include "qemu/log.h"
 
 //#define DEBUG_LSI
 //#define DEBUG_LSI_REG
@@ -34,6 +35,21 @@ do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__); exit(1);} while
 do { fprintf(stderr, "lsi_scsi: error: " fmt , ## __VA_ARGS__);} while (0)
 #endif
 
+static const char *names[] = {
+    "SCNTL0", "SCNTL1", "SCNTL2", "SCNTL3", "SCID", "SXFER", "SDID", "GPREG",
+    "SFBR", "SOCL", "SSID", "SBCL", "DSTAT", "SSTAT0", "SSTAT1", "SSTAT2",
+    "DSA0", "DSA1", "DSA2", "DSA3", "ISTAT", "0x15", "0x16", "0x17",
+    "CTEST0", "CTEST1", "CTEST2", "CTEST3", "TEMP0", "TEMP1", "TEMP2", "TEMP3",
+    "DFIFO", "CTEST4", "CTEST5", "CTEST6", "DBC0", "DBC1", "DBC2", "DCMD",
+    "DNAD0", "DNAD1", "DNAD2", "DNAD3", "DSP0", "DSP1", "DSP2", "DSP3",
+    "DSPS0", "DSPS1", "DSPS2", "DSPS3", "SCRATCHA0", "SCRATCHA1", "SCRATCHA2", "SCRATCHA3",
+    "DMODE", "DIEN", "SBR", "DCNTL", "ADDER0", "ADDER1", "ADDER2", "ADDER3",
+    "SIEN0", "SIEN1", "SIST0", "SIST1", "SLPAR", "0x45", "MACNTL", "GPCNTL",
+    "STIME0", "STIME1", "RESPID", "0x4b", "STEST0", "STEST1", "STEST2", "STEST3",
+    "SIDL", "0x51", "0x52", "0x53", "SODL", "0x55", "0x56", "0x57",
+    "SBDL", "0x59", "0x5a", "0x5b", "SCRATCHB0", "SCRATCHB1", "SCRATCHB2", "SCRATCHB3",
+};
+
 #define LSI_MAX_DEVS 7
 
 #define LSI_SCNTL0_TRG    0x01
@@ -194,6 +210,7 @@ typedef struct {
     MemoryRegion mmio_io;
     MemoryRegion ram_io;
     MemoryRegion io_io;
+    AddressSpace pci_io_as;
 
     int carry; /* ??? Should this be an a visible register somewhere?  */
     int status;
@@ -309,7 +326,7 @@ static void lsi_soft_reset(LSIState *s)
     s->istat0 = 0;
     s->istat1 = 0;
     s->dcmd = 0x40;
-    s->dstat = LSI_DSTAT_DFE;
+    s->dstat = 0;
     s->dien = 0;
     s->sist0 = 0;
     s->sist1 = 0;
@@ -391,6 +408,30 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val);
 static void lsi_execute_script(LSIState *s);
 static void lsi_reselect(LSIState *s, lsi_request *p);
 
+static inline int lsi_mem_read(LSIState *s, dma_addr_t addr,
+                               void *buf, dma_addr_t len)
+{
+    if (s->dmode & LSI_DMODE_SIOM) {
+        address_space_read(&s->pci_io_as, addr, MEMTXATTRS_UNSPECIFIED,
+                           buf, len);
+        return 0;
+    } else {
+        return pci_dma_read(PCI_DEVICE(s), addr, buf, len);
+    }
+}
+
+static inline int lsi_mem_write(LSIState *s, dma_addr_t addr,
+                                const void *buf, dma_addr_t len)
+{
+    if (s->dmode & LSI_DMODE_DIOM) {
+        address_space_write(&s->pci_io_as, addr, MEMTXATTRS_UNSPECIFIED,
+                            buf, len);
+        return 0;
+    } else {
+        return pci_dma_write(PCI_DEVICE(s), addr, buf, len);
+    }
+}
+
 static inline uint32_t read_dword(LSIState *s, uint32_t addr)
 {
     uint32_t buf;
@@ -534,7 +575,6 @@ static void lsi_bad_selection(LSIState *s, uint32_t id)
 /* Initiate a SCSI layer data transfer.  */
 static void lsi_do_dma(LSIState *s, int out)
 {
-    PCIDevice *pci_dev;
     uint32_t count;
     dma_addr_t addr;
     SCSIDevice *dev;
@@ -546,7 +586,6 @@ static void lsi_do_dma(LSIState *s, int out)
         return;
     }
 
-    pci_dev = PCI_DEVICE(s);
     dev = s->current->req->dev;
     assert(dev);
 
@@ -572,9 +611,9 @@ static void lsi_do_dma(LSIState *s, int out)
     }
     /* ??? Set SFBR to first data byte.  */
     if (out) {
-        pci_dma_read(pci_dev, addr, s->current->dma_buf, count);
+        lsi_mem_read(s, addr, s->current->dma_buf, count);
     } else {
-        pci_dma_write(pci_dev, addr, s->current->dma_buf, count);
+        lsi_mem_write(s, addr, s->current->dma_buf, count);
     }
     s->current->dma_len -= count;
     if (s->current->dma_len == 0) {
@@ -1006,15 +1045,14 @@ static void lsi_do_msgout(LSIState *s)
 #define LSI_BUF_SIZE 4096
 static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count)
 {
-    PCIDevice *d = PCI_DEVICE(s);
     int n;
     uint8_t buf[LSI_BUF_SIZE];
 
     DPRINTF("memcpy dest 0x%08x src 0x%08x count %d\n", dest, src, count);
     while (count) {
         n = (count > LSI_BUF_SIZE) ? LSI_BUF_SIZE : count;
-        pci_dma_read(d, src, buf, n);
-        pci_dma_write(d, dest, buf, n);
+        lsi_mem_read(s, src, buf, n);
+        lsi_mem_write(s, dest, buf, n);
         src += n;
         dest += n;
         count -= n;
@@ -1480,155 +1518,200 @@ static void lsi_execute_script(LSIState *s)
 
 static uint8_t lsi_reg_readb(LSIState *s, int offset)
 {
-    uint8_t tmp;
+    uint8_t ret;
+
 #define CASE_GET_REG24(name, addr) \
-    case addr: return s->name & 0xff; \
-    case addr + 1: return (s->name >> 8) & 0xff; \
-    case addr + 2: return (s->name >> 16) & 0xff;
+    case addr: ret = s->name & 0xff; break; \
+    case addr + 1: ret = (s->name >> 8) & 0xff; break; \
+    case addr + 2: ret = (s->name >> 16) & 0xff; break;
 
 #define CASE_GET_REG32(name, addr) \
-    case addr: return s->name & 0xff; \
-    case addr + 1: return (s->name >> 8) & 0xff; \
-    case addr + 2: return (s->name >> 16) & 0xff; \
-    case addr + 3: return (s->name >> 24) & 0xff;
+    case addr: ret = s->name & 0xff; break; \
+    case addr + 1: ret = (s->name >> 8) & 0xff; break; \
+    case addr + 2: ret = (s->name >> 16) & 0xff; break; \
+    case addr + 3: ret = (s->name >> 24) & 0xff; break;
 
-#ifdef DEBUG_LSI_REG
-    DPRINTF("Read reg %x\n", offset);
-#endif
     switch (offset) {
     case 0x00: /* SCNTL0 */
-        return s->scntl0;
+        ret = s->scntl0;
+        break;
     case 0x01: /* SCNTL1 */
-        return s->scntl1;
+        ret = s->scntl1;
+        break;
     case 0x02: /* SCNTL2 */
-        return s->scntl2;
+        ret = s->scntl2;
+        break;
     case 0x03: /* SCNTL3 */
-        return s->scntl3;
+        ret = s->scntl3;
+        break;
     case 0x04: /* SCID */
-        return s->scid;
+        ret = s->scid;
+        break;
     case 0x05: /* SXFER */
-        return s->sxfer;
+        ret = s->sxfer;
+        break;
     case 0x06: /* SDID */
-        return s->sdid;
+        ret = s->sdid;
+        break;
     case 0x07: /* GPREG0 */
-        return 0x7f;
+        ret = 0x7f;
+        break;
     case 0x08: /* Revision ID */
-        return 0x00;
+        ret = 0x00;
+        break;
     case 0x09: /* SOCL */
-        return s->socl;
+        ret = s->socl;
+        break;
     case 0xa: /* SSID */
-        return s->ssid;
+        ret = s->ssid;
+        break;
     case 0xb: /* SBCL */
         /* ??? This is not correct. However it's (hopefully) only
            used for diagnostics, so should be ok.  */
-        return 0;
+        ret = 0;
+        break;
     case 0xc: /* DSTAT */
-        tmp = s->dstat | LSI_DSTAT_DFE;
+        ret = s->dstat | LSI_DSTAT_DFE;
         if ((s->istat0 & LSI_ISTAT0_INTF) == 0)
             s->dstat = 0;
         lsi_update_irq(s);
-        return tmp;
+        break;
     case 0x0d: /* SSTAT0 */
-        return s->sstat0;
+        ret = s->sstat0;
+        break;
     case 0x0e: /* SSTAT1 */
-        return s->sstat1;
+        ret = s->sstat1;
+        break;
     case 0x0f: /* SSTAT2 */
-        return s->scntl1 & LSI_SCNTL1_CON ? 0 : 2;
+        ret = s->scntl1 & LSI_SCNTL1_CON ? 0 : 2;
+        break;
     CASE_GET_REG32(dsa, 0x10)
     case 0x14: /* ISTAT0 */
-        return s->istat0;
+        ret = s->istat0;
+        break;
     case 0x15: /* ISTAT1 */
-        return s->istat1;
+        ret = s->istat1;
+        break;
     case 0x16: /* MBOX0 */
-        return s->mbox0;
+        ret = s->mbox0;
+        break;
     case 0x17: /* MBOX1 */
-        return s->mbox1;
+        ret = s->mbox1;
+        break;
     case 0x18: /* CTEST0 */
-        return 0xff;
+        ret = 0xff;
+        break;
     case 0x19: /* CTEST1 */
-        return 0;
+        ret = 0;
+        break;
     case 0x1a: /* CTEST2 */
-        tmp = s->ctest2 | LSI_CTEST2_DACK | LSI_CTEST2_CM;
+        ret = s->ctest2 | LSI_CTEST2_DACK | LSI_CTEST2_CM;
         if (s->istat0 & LSI_ISTAT0_SIGP) {
             s->istat0 &= ~LSI_ISTAT0_SIGP;
-            tmp |= LSI_CTEST2_SIGP;
+            ret |= LSI_CTEST2_SIGP;
         }
-        return tmp;
+        break;
     case 0x1b: /* CTEST3 */
-        return s->ctest3;
+        ret = s->ctest3;
+        break;
     CASE_GET_REG32(temp, 0x1c)
     case 0x20: /* DFIFO */
-        return 0;
+        ret = 0;
+        break;
     case 0x21: /* CTEST4 */
-        return s->ctest4;
+        ret = s->ctest4;
+        break;
     case 0x22: /* CTEST5 */
-        return s->ctest5;
+        ret = s->ctest5;
+        break;
     case 0x23: /* CTEST6 */
-         return 0;
+        ret = 0;
+        break;
     CASE_GET_REG24(dbc, 0x24)
     case 0x27: /* DCMD */
-        return s->dcmd;
+        ret = s->dcmd;
+        break;
     CASE_GET_REG32(dnad, 0x28)
     CASE_GET_REG32(dsp, 0x2c)
     CASE_GET_REG32(dsps, 0x30)
     CASE_GET_REG32(scratch[0], 0x34)
     case 0x38: /* DMODE */
-        return s->dmode;
+        ret = s->dmode;
+        break;
     case 0x39: /* DIEN */
-        return s->dien;
+        ret = s->dien;
+        break;
     case 0x3a: /* SBR */
-        return s->sbr;
+        ret = s->sbr;
+        break;
     case 0x3b: /* DCNTL */
-        return s->dcntl;
+        ret = s->dcntl;
+        break;
     /* ADDER Output (Debug of relative jump address) */
     CASE_GET_REG32(adder, 0x3c)
     case 0x40: /* SIEN0 */
-        return s->sien0;
+        ret = s->sien0;
+        break;
     case 0x41: /* SIEN1 */
-        return s->sien1;
+        ret = s->sien1;
+        break;
     case 0x42: /* SIST0 */
-        tmp = s->sist0;
+        ret = s->sist0;
         s->sist0 = 0;
         lsi_update_irq(s);
-        return tmp;
+        break;
     case 0x43: /* SIST1 */
-        tmp = s->sist1;
+        ret = s->sist1;
         s->sist1 = 0;
         lsi_update_irq(s);
-        return tmp;
+        break;
     case 0x46: /* MACNTL */
-        return 0x0f;
+        ret = 0x0f;
+        break;
     case 0x47: /* GPCNTL0 */
-        return 0x0f;
+        ret = 0x0f;
+        break;
     case 0x48: /* STIME0 */
-        return s->stime0;
+        ret = s->stime0;
+        break;
     case 0x4a: /* RESPID0 */
-        return s->respid0;
+        ret = s->respid0;
+        break;
     case 0x4b: /* RESPID1 */
-        return s->respid1;
+        ret = s->respid1;
+        break;
     case 0x4d: /* STEST1 */
-        return s->stest1;
+        ret = s->stest1;
+        break;
     case 0x4e: /* STEST2 */
-        return s->stest2;
+        ret = s->stest2;
+        break;
     case 0x4f: /* STEST3 */
-        return s->stest3;
+        ret = s->stest3;
+        break;
     case 0x50: /* SIDL */
         /* This is needed by the linux drivers.  We currently only update it
            during the MSG IN phase.  */
-        return s->sidl;
+        ret = s->sidl;
+        break;
     case 0x52: /* STEST4 */
-        return 0xe0;
+        ret = 0xe0;
+        break;
     case 0x56: /* CCNTL0 */
-        return s->ccntl0;
+        ret = s->ccntl0;
+        break;
     case 0x57: /* CCNTL1 */
-        return s->ccntl1;
+        ret = s->ccntl1;
+        break;
     case 0x58: /* SBDL */
         /* Some drivers peek at the data bus during the MSG IN phase.  */
         if ((s->sstat1 & PHASE_MASK) == PHASE_MI)
             return s->msg[0];
-        return 0;
+        ret = 0;
+        break;
     case 0x59: /* SBDL high */
-        return 0;
+        ret = 0;
+        break;
     CASE_GET_REG32(mmrs, 0xa0)
     CASE_GET_REG32(mmws, 0xa4)
     CASE_GET_REG32(sfs, 0xa8)
@@ -1643,18 +1726,34 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset)
     CASE_GET_REG32(ia, 0xd4)
     CASE_GET_REG32(sbc, 0xd8)
     CASE_GET_REG32(csbc, 0xdc)
-    }
-    if (offset >= 0x5c && offset < 0xa0) {
+    case 0x5c ... 0x9f:
+    {
         int n;
         int shift;
         n = (offset - 0x58) >> 2;
         shift = (offset & 3) * 8;
-        return (s->scratch[n] >> shift) & 0xff;
+        ret = (s->scratch[n] >> shift) & 0xff;
+        break;
+    }
+    default:
+    {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "lsi_scsi: invalid read from reg %s %x\n",
+                      offset < ARRAY_SIZE(names) ? names[offset] : "???",
+                      offset);
+        ret = 0xff;
+        break;
+    }
     }
-    BADF("readb 0x%x\n", offset);
-    exit(1);
 #undef CASE_GET_REG24
 #undef CASE_GET_REG32
+
+#ifdef DEBUG_LSI_REG
+    DPRINTF("Read reg %s %x = %02x\n",
+            offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, ret);
+#endif
+
+    return ret;
 }
 
 static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
@@ -1671,7 +1770,8 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
     case addr + 3: s->name &= 0x00ffffff; s->name |= val << 24; break;
 
 #ifdef DEBUG_LSI_REG
-    DPRINTF("Write reg %x = %02x\n", offset, val);
+    DPRINTF("Write reg %s %x = %02x\n",
+            offset < ARRAY_SIZE(names) ? names[offset] : "???", offset, val);
 #endif
     switch (offset) {
     case 0x00: /* SCNTL0 */
@@ -1799,9 +1899,6 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
     CASE_SET_REG32(dsps, 0x30)
     CASE_SET_REG32(scratch[0], 0x34)
     case 0x38: /* DMODE */
-        if (val & (LSI_DMODE_SIOM | LSI_DMODE_DIOM)) {
-            BADF("IO mappings not implemented\n");
-        }
         s->dmode = val;
         break;
     case 0x39: /* DIEN */
@@ -1886,7 +1983,10 @@ static void lsi_reg_writeb(LSIState *s, int offset, uint8_t val)
             shift = (offset & 3) * 8;
             s->scratch[n] = deposit32(s->scratch[n], shift, 8, val);
         } else {
-            BADF("Unhandled writeb 0x%x = 0x%x\n", offset, val);
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "lsi_scsi: invalid write to reg %s %x (0x%02x)\n",
+                          offset < ARRAY_SIZE(names) ? names[offset] : "???",
+                          offset, val);
         }
     }
 #undef CASE_SET_REG24
@@ -2108,6 +2208,8 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
     memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s,
                           "lsi-io", 256);
 
+    address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io");
+
     pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_io);
     pci_register_bar(dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio_io);
     pci_register_bar(dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->ram_io);
@@ -2119,6 +2221,13 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp)
     }
 }
 
+static void lsi_scsi_unrealize(DeviceState *dev, Error **errp)
+{
+    LSIState *s = LSI53C895A(dev);
+
+    address_space_destroy(&s->pci_io_as);
+}
+
 static void lsi_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -2129,6 +2238,7 @@ static void lsi_class_init(ObjectClass *klass, void *data)
     k->device_id = PCI_DEVICE_ID_LSI_53C895A;
     k->class_id = PCI_CLASS_STORAGE_SCSI;
     k->subsystem_id = 0x1000;
+    dc->unrealize = lsi_scsi_unrealize;
     dc->reset = lsi_scsi_reset;
     dc->vmsd = &vmstate_lsi_scsi;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index e968302fdc..67fc1e7893 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -300,12 +300,6 @@ static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
     return iov_count - i;
 }
 
-static void megasas_unmap_sgl(MegasasCmd *cmd)
-{
-    qemu_sglist_destroy(&cmd->qsg);
-    cmd->iov_offset = 0;
-}
-
 /*
  * passthrough sense and io sense are at the same offset
  */
@@ -461,9 +455,12 @@ static void megasas_unmap_frame(MegasasState *s, MegasasCmd *cmd)
 {
     PCIDevice *p = PCI_DEVICE(s);
 
-    pci_dma_unmap(p, cmd->frame, cmd->pa_size, 0, 0);
+    if (cmd->pa_size) {
+        pci_dma_unmap(p, cmd->frame, cmd->pa_size, 0, 0);
+    }
     cmd->frame = NULL;
     cmd->pa = 0;
+    cmd->pa_size = 0;
     clear_bit(cmd->index, s->frame_map);
 }
 
@@ -577,6 +574,20 @@ static void megasas_complete_frame(MegasasState *s, uint64_t context)
     }
 }
 
+static void megasas_complete_command(MegasasCmd *cmd)
+{
+    qemu_sglist_destroy(&cmd->qsg);
+    cmd->iov_size = 0;
+    cmd->iov_offset = 0;
+
+    cmd->req->hba_private = NULL;
+    scsi_req_unref(cmd->req);
+    cmd->req = NULL;
+
+    megasas_unmap_frame(cmd->state, cmd);
+    megasas_complete_frame(cmd->state, cmd->context);
+}
+
 static void megasas_reset_frames(MegasasState *s)
 {
     int i;
@@ -593,9 +604,9 @@ static void megasas_reset_frames(MegasasState *s)
 
 static void megasas_abort_command(MegasasCmd *cmd)
 {
-    if (cmd->req) {
+    /* Never abort internal commands.  */
+    if (cmd->req != NULL) {
         scsi_req_cancel(cmd->req);
-        cmd->req = NULL;
     }
 }
 
@@ -686,9 +697,6 @@ static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size)
 {
     trace_megasas_finish_dcmd(cmd->index, iov_size);
 
-    if (cmd->frame->header.sge_count) {
-        qemu_sglist_destroy(&cmd->qsg);
-    }
     if (iov_size > cmd->iov_size) {
         if (megasas_frame_is_ieee_sgl(cmd)) {
             cmd->frame->dcmd.sgl.sg_skinny->len = cpu_to_le32(iov_size);
@@ -698,7 +706,6 @@ static void megasas_finish_dcmd(MegasasCmd *cmd, uint32_t iov_size)
             cmd->frame->dcmd.sgl.sg32->len = cpu_to_le32(iov_size);
         }
     }
-    cmd->iov_size = 0;
 }
 
 static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
@@ -1586,7 +1593,6 @@ static int megasas_finish_internal_dcmd(MegasasCmd *cmd,
     int lun = req->lun;
 
     opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
-    scsi_req_unref(req);
     trace_megasas_dcmd_internal_finish(cmd->index, opcode, lun);
     switch (opcode) {
     case MFI_DCMD_PD_GET_INFO:
@@ -1857,7 +1863,11 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status,
 
     trace_megasas_command_complete(cmd->index, status, resid);
 
-    if (cmd->req != req) {
+    if (req->io_canceled) {
+        return;
+    }
+
+    if (cmd->req == NULL) {
         /*
          * Internal command complete
          */
@@ -1876,25 +1886,21 @@ static void megasas_command_complete(SCSIRequest *req, uint32_t status,
             megasas_copy_sense(cmd);
         }
 
-        megasas_unmap_sgl(cmd);
         cmd->frame->header.scsi_status = req->status;
-        scsi_req_unref(cmd->req);
-        cmd->req = NULL;
     }
     cmd->frame->header.cmd_status = cmd_status;
-    megasas_unmap_frame(cmd->state, cmd);
-    megasas_complete_frame(cmd->state, cmd->context);
+    megasas_complete_command(cmd);
 }
 
-static void megasas_command_cancel(SCSIRequest *req)
+static void megasas_command_cancelled(SCSIRequest *req)
 {
     MegasasCmd *cmd = req->hba_private;
 
-    if (cmd) {
-        megasas_abort_command(cmd);
-    } else {
-        scsi_req_unref(req);
+    if (!cmd) {
+        return;
     }
+    cmd->frame->header.cmd_status = MFI_STAT_SCSI_IO_FAILED;
+    megasas_complete_command(cmd);
 }
 
 static int megasas_handle_abort(MegasasState *s, MegasasCmd *cmd)
@@ -1981,7 +1987,11 @@ static void megasas_handle_frame(MegasasState *s, uint64_t frame_addr,
         break;
     }
     if (frame_status != MFI_STAT_INVALID_STATUS) {
-        cmd->frame->header.cmd_status = frame_status;
+        if (cmd->frame) {
+            cmd->frame->header.cmd_status = frame_status;
+        } else {
+            megasas_frame_set_cmd_status(s, frame_addr, frame_status);
+        }
         megasas_unmap_frame(s, cmd);
         megasas_complete_frame(s, cmd->context);
     }
@@ -2309,7 +2319,7 @@ static const struct SCSIBusInfo megasas_scsi_info = {
     .transfer_data = megasas_xfer_complete,
     .get_sg_list = megasas_get_sg_list,
     .complete = megasas_command_complete,
-    .cancel = megasas_command_cancel,
+    .cancel = megasas_command_cancelled,
 };
 
 static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
diff --git a/hw/scsi/mptconfig.c b/hw/scsi/mptconfig.c
index 707185469e..87a416a5cb 100644
--- a/hw/scsi/mptconfig.c
+++ b/hw/scsi/mptconfig.c
@@ -158,7 +158,7 @@ static size_t mptsas_config_pack(uint8_t **data, const char *fmt, ...)
     va_end(ap);
 
     if (data) {
-        assert(ret < 256 && (ret % 4) == 0);
+        assert(ret / 4 < 256 && (ret % 4) == 0);
         stb_p(*data + 1, ret / 4);
     }
     return ret;
@@ -203,7 +203,7 @@ size_t mptsas_config_manufacturing_1(MPTSASState *s, uint8_t **data, int address
 {
     /* VPD - all zeros */
     return MPTSAS_CONFIG_PACK(1, MPI_CONFIG_PAGETYPE_MANUFACTURING, 0x00,
-                              "s256");
+                              "*s256");
 }
 
 static
@@ -328,7 +328,7 @@ size_t mptsas_config_ioc_0(MPTSASState *s, uint8_t **data, int address)
     return MPTSAS_CONFIG_PACK(0, MPI_CONFIG_PAGETYPE_IOC, 0x01,
                               "*l*lwwb*b*b*blww",
                               pcic->vendor_id, pcic->device_id, pcic->revision,
-                              pcic->subsystem_vendor_id,
+                              pcic->class_id, pcic->subsystem_vendor_id,
                               pcic->subsystem_id);
 }
 
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 0e0a22f696..ad87e78fe2 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -304,7 +304,7 @@ static int mptsas_process_scsi_io_request(MPTSASState *s,
         goto bad;
     }
 
-    req = g_new(MPTSASRequest, 1);
+    req = g_new0(MPTSASRequest, 1);
     QTAILQ_INSERT_TAIL(&s->pending, req, next);
     req->scsi_io = *scsi_io;
     req->dev = s;
@@ -1269,7 +1269,7 @@ static const struct SCSIBusInfo mptsas_scsi_info = {
     .load_request = mptsas_load_request,
 };
 
-static void mptsas_scsi_init(PCIDevice *dev, Error **errp)
+static void mptsas_scsi_realize(PCIDevice *dev, Error **errp)
 {
     DeviceState *d = DEVICE(dev);
     MPTSASState *s = MPT_SAS(dev);
@@ -1426,7 +1426,7 @@ static void mptsas1068_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
 
-    pc->realize = mptsas_scsi_init;
+    pc->realize = mptsas_scsi_realize;
     pc->exit = mptsas_scsi_uninit;
     pc->romfile = 0;
     pc->vendor_id = PCI_VENDOR_ID_LSI_LOGIC;
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 836a1553ed..a96319138a 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -341,6 +341,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
         r->req.resid -= r->req.sg->size;
         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
+                                  BDRV_SECTOR_SIZE,
                                   sdc->dma_readv, r, scsi_dma_complete, r,
                                   DMA_DIRECTION_FROM_DEVICE);
     } else {
@@ -396,7 +397,7 @@ static void scsi_read_data(SCSIRequest *req)
         return;
     }
 
-    if (s->tray_open) {
+    if (!blk_is_available(req->dev->conf.blk)) {
         scsi_read_complete(r, -ENOMEDIUM);
         return;
     }
@@ -519,7 +520,7 @@ static void scsi_write_data(SCSIRequest *req)
         scsi_write_complete_noio(r, 0);
         return;
     }
-    if (s->tray_open) {
+    if (!blk_is_available(req->dev->conf.blk)) {
         scsi_write_complete_noio(r, -ENOMEDIUM);
         return;
     }
@@ -539,6 +540,7 @@ static void scsi_write_data(SCSIRequest *req)
         r->req.resid -= r->req.sg->size;
         r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
                                   r->req.sg, r->sector << BDRV_SECTOR_BITS,
+                                  BDRV_SECTOR_SIZE,
                                   sdc->dma_writev, r, scsi_dma_complete, r,
                                   DMA_DIRECTION_TO_DEVICE);
     } else {
@@ -599,8 +601,8 @@ static int scsi_disk_emulate_inquiry(SCSIRequest *req, uint8_t *outbuf)
             }
 
             l = strlen(s->serial);
-            if (l > 20) {
-                l = 20;
+            if (l > 36) {
+                l = 36;
             }
 
             DPRINTF("Inquiry EVPD[Serial number] "
@@ -792,10 +794,7 @@ static inline bool media_is_dvd(SCSIDiskState *s)
     if (s->qdev.type != TYPE_ROM) {
         return false;
     }
-    if (!blk_is_inserted(s->qdev.conf.blk)) {
-        return false;
-    }
-    if (s->tray_open) {
+    if (!blk_is_available(s->qdev.conf.blk)) {
         return false;
     }
     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
@@ -808,10 +807,7 @@ static inline bool media_is_cd(SCSIDiskState *s)
     if (s->qdev.type != TYPE_ROM) {
         return false;
     }
-    if (!blk_is_inserted(s->qdev.conf.blk)) {
-        return false;
-    }
-    if (s->tray_open) {
+    if (!blk_is_available(s->qdev.conf.blk)) {
         return false;
     }
     blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
@@ -875,7 +871,7 @@ static int scsi_read_dvd_structure(SCSIDiskState *s, SCSIDiskReq *r,
     }
 
     if (format != 0xff) {
-        if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
+        if (!blk_is_available(s->qdev.conf.blk)) {
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
             return -1;
         }
@@ -1857,7 +1853,7 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
         break;
 
     default:
-        if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
+        if (!blk_is_available(s->qdev.conf.blk)) {
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
             return 0;
         }
@@ -1886,7 +1882,7 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
     memset(outbuf, 0, r->buflen);
     switch (req->cmd.buf[0]) {
     case TEST_UNIT_READY:
-        assert(!s->tray_open && blk_is_inserted(s->qdev.conf.blk));
+        assert(blk_is_available(s->qdev.conf.blk));
         break;
     case INQUIRY:
         buflen = scsi_disk_emulate_inquiry(req, outbuf);
@@ -2126,7 +2122,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
 
     command = buf[0];
 
-    if (s->tray_open || !blk_is_inserted(s->qdev.conf.blk)) {
+    if (!blk_is_available(s->qdev.conf.blk)) {
         scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
         return 0;
     }
@@ -2359,6 +2355,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
 static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
 {
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
+
+    if (!dev->conf.blk) {
+        dev->conf.blk = blk_new();
+    }
+
     s->qdev.blocksize = 2048;
     s->qdev.type = TYPE_ROM;
     s->features |= 1 << SCSI_DISK_F_REMOVABLE;
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index 8fbd50f660..6090a204a0 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -42,19 +42,10 @@
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
 #include "viosrp.h"
+#include "trace.h"
 
 #include <libfdt.h>
 
-/*#define DEBUG_VSCSI*/
-
-#ifdef DEBUG_VSCSI
-#define DPRINTF(fmt, ...) \
-    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
-    do { } while (0)
-#endif
-
 /*
  * Virtual SCSI device
  */
@@ -237,8 +228,7 @@ static int vscsi_send_rsp(VSCSIState *s, vscsi_req *req,
     int total_len = sizeof(iu->srp.rsp);
     uint8_t sol_not = iu->srp.cmd.sol_not;
 
-    DPRINTF("VSCSI: Sending resp status: 0x%x, "
-            "res_in: %d, res_out: %d\n", status, res_in, res_out);
+    trace_spapr_vscsi_send_rsp(status, res_in, res_out);
 
     memset(iu, 0, sizeof(struct srp_rsp));
     iu->srp.rsp.opcode = SRP_RSP;
@@ -298,13 +288,13 @@ static int vscsi_fetch_desc(VSCSIState *s, struct vscsi_req *req,
 
     switch (req->dma_fmt) {
     case SRP_NO_DATA_DESC: {
-        DPRINTF("VSCSI: no data descriptor\n");
+        trace_spapr_vscsi_fetch_desc_no_data();
         return 0;
     }
     case SRP_DATA_DESC_DIRECT: {
         memcpy(ret, cmd->add_data + req->cdb_offset, sizeof(*ret));
         assert(req->cur_desc_num == 0);
-        DPRINTF("VSCSI: direct segment\n");
+        trace_spapr_vscsi_fetch_desc_direct();
         break;
     }
     case SRP_DATA_DESC_INDIRECT: {
@@ -312,30 +302,29 @@ static int vscsi_fetch_desc(VSCSIState *s, struct vscsi_req *req,
                                        (cmd->add_data + req->cdb_offset);
         if (n < req->local_desc) {
             *ret = tmp->desc_list[n];
-            DPRINTF("VSCSI: indirect segment local tag=0x%x desc#%d/%d\n",
-                    req->qtag, n, req->local_desc);
-
+            trace_spapr_vscsi_fetch_desc_indirect(req->qtag, n,
+                                                  req->local_desc);
         } else if (n < req->total_desc) {
             int rc;
             struct srp_direct_buf tbl_desc = vscsi_swap_desc(tmp->table_desc);
             unsigned desc_offset = n * sizeof(struct srp_direct_buf);
 
             if (desc_offset >= tbl_desc.len) {
-                DPRINTF("VSCSI:   #%d is ouf of range (%d bytes)\n",
-                        n, desc_offset);
+                trace_spapr_vscsi_fetch_desc_out_of_range(n, desc_offset);
                 return -1;
             }
             rc = spapr_vio_dma_read(&s->vdev, tbl_desc.va + desc_offset,
                                     ret, sizeof(struct srp_direct_buf));
             if (rc) {
-                DPRINTF("VSCSI: spapr_vio_dma_read -> %d reading ext_desc\n",
-                        rc);
+                trace_spapr_vscsi_fetch_desc_dma_read_error(rc);
                 return -1;
             }
-            DPRINTF("VSCSI: indirect segment ext. tag=0x%x desc#%d/%d { va=%"PRIx64" len=%x }\n",
-                    req->qtag, n, req->total_desc, tbl_desc.va, tbl_desc.len);
+            trace_spapr_vscsi_fetch_desc_indirect_seg_ext(req->qtag, n,
+                                                          req->total_desc,
+                                                          tbl_desc.va,
+                                                          tbl_desc.len);
         } else {
-            DPRINTF("VSCSI:   Out of descriptors !\n");
+            trace_spapr_vscsi_fetch_desc_out_of_desc();
             return 0;
         }
         break;
@@ -347,15 +336,16 @@ static int vscsi_fetch_desc(VSCSIState *s, struct vscsi_req *req,
 
     *ret = vscsi_swap_desc(*ret);
     if (buf_offset > ret->len) {
-        DPRINTF("   offset=%x is out of a descriptor #%d boundary=%x\n",
-                buf_offset, req->cur_desc_num, ret->len);
+        trace_spapr_vscsi_fetch_desc_out_of_desc_boundary(buf_offset,
+                                                          req->cur_desc_num,
+                                                          ret->len);
         return -1;
     }
     ret->va += buf_offset;
     ret->len -= buf_offset;
 
-    DPRINTF("   cur=%d offs=%x ret { va=%"PRIx64" len=%x }\n",
-            req->cur_desc_num, req->cur_desc_offset, ret->va, ret->len);
+    trace_spapr_vscsi_fetch_desc_done(req->cur_desc_num, req->cur_desc_offset,
+                                      ret->va, ret->len);
 
     return ret->len ? 1 : 0;
 }
@@ -398,7 +388,7 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
     int rc = 0;
     uint32_t llen, total = 0;
 
-    DPRINTF("VSCSI: indirect segment 0x%x bytes\n", len);
+    trace_spapr_vscsi_srp_indirect_data(len);
 
     /* While we have data ... */
     while (len) {
@@ -417,11 +407,10 @@ static int vscsi_srp_indirect_data(VSCSIState *s, vscsi_req *req,
             rc = spapr_vio_dma_write(&s->vdev, md.va, buf, llen);
         }
         if (rc) {
-            DPRINTF("VSCSI: spapr_vio_dma_r/w(%d) -> %d\n", req->writing, rc);
+            trace_spapr_vscsi_srp_indirect_data_rw(req->writing, rc);
             break;
         }
-        DPRINTF("VSCSI:     data: %02x %02x %02x %02x...\n",
-                buf[0], buf[1], buf[2], buf[3]);
+        trace_spapr_vscsi_srp_indirect_data_buf(buf[0], buf[1], buf[2], buf[3]);
 
         len -= llen;
         buf += llen;
@@ -447,7 +436,7 @@ static int vscsi_srp_transfer_data(VSCSIState *s, vscsi_req *req,
 
     switch (req->dma_fmt) {
     case SRP_NO_DATA_DESC:
-        DPRINTF("VSCSI: no data desc transfer, skipping 0x%x bytes\n", len);
+        trace_spapr_vscsi_srp_transfer_data(len);
         break;
     case SRP_DATA_DESC_DIRECT:
         err = vscsi_srp_direct_data(s, req, buf, len);
@@ -527,8 +516,7 @@ static void vscsi_transfer_data(SCSIRequest *sreq, uint32_t len)
     uint8_t *buf;
     int rc = 0;
 
-    DPRINTF("VSCSI: SCSI xfer complete tag=0x%x len=0x%x, req=%p\n",
-            sreq->tag, len, req);
+    trace_spapr_vscsi_transfer_data(sreq->tag, len, req);
     if (req == NULL) {
         fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", sreq->tag);
         return;
@@ -557,8 +545,7 @@ static void vscsi_command_complete(SCSIRequest *sreq, uint32_t status, size_t re
     vscsi_req *req = sreq->hba_private;
     int32_t res_in = 0, res_out = 0;
 
-    DPRINTF("VSCSI: SCSI cmd complete, tag=0x%x status=0x%x, req=%p\n",
-            sreq->tag, status, req);
+    trace_spapr_vscsi_command_complete(sreq->tag, status, req);
     if (req == NULL) {
         fprintf(stderr, "VSCSI: Can't find request for tag 0x%x\n", sreq->tag);
         return;
@@ -567,16 +554,15 @@ static void vscsi_command_complete(SCSIRequest *sreq, uint32_t status, size_t re
     if (status == CHECK_CONDITION) {
         req->senselen = scsi_req_get_sense(req->sreq, req->sense,
                                            sizeof(req->sense));
-        DPRINTF("VSCSI: Sense data, %d bytes:\n", req->senselen);
-        DPRINTF("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
+        trace_spapr_vscsi_command_complete_sense_data1(req->senselen,
                 req->sense[0], req->sense[1], req->sense[2], req->sense[3],
                 req->sense[4], req->sense[5], req->sense[6], req->sense[7]);
-        DPRINTF("       %02x  %02x  %02x  %02x  %02x  %02x  %02x  %02x\n",
+        trace_spapr_vscsi_command_complete_sense_data2(
                 req->sense[8], req->sense[9], req->sense[10], req->sense[11],
                 req->sense[12], req->sense[13], req->sense[14], req->sense[15]);
     }
 
-    DPRINTF("VSCSI: Command complete err=%d\n", status);
+    trace_spapr_vscsi_command_complete_status(status);
     if (status == 0) {
         /* We handle overflows, not underflows for normal commands,
          * but hopefully nobody cares
@@ -635,8 +621,8 @@ static void vscsi_save_request(QEMUFile *f, SCSIRequest *sreq)
 
     vmstate_save_state(f, &vmstate_spapr_vscsi_req, req, NULL);
 
-    DPRINTF("VSCSI: saving tag=%u, current desc#%d, offset=%x\n",
-            req->qtag, req->cur_desc_num, req->cur_desc_offset);
+    trace_spapr_vscsi_save_request(req->qtag, req->cur_desc_num,
+                                   req->cur_desc_offset);
 }
 
 static void *vscsi_load_request(QEMUFile *f, SCSIRequest *sreq)
@@ -660,8 +646,8 @@ static void *vscsi_load_request(QEMUFile *f, SCSIRequest *sreq)
 
     req->sreq = scsi_req_ref(sreq);
 
-    DPRINTF("VSCSI: restoring tag=%u, current desc#%d, offset=%x\n",
-            req->qtag, req->cur_desc_num, req->cur_desc_offset);
+    trace_spapr_vscsi_load_request(req->qtag, req->cur_desc_num,
+                                   req->cur_desc_offset);
 
     return req;
 }
@@ -672,7 +658,7 @@ static void vscsi_process_login(VSCSIState *s, vscsi_req *req)
     struct srp_login_rsp *rsp = &iu->srp.login_rsp;
     uint64_t tag = iu->srp.rsp.tag;
 
-    DPRINTF("VSCSI: Got login, sendin response !\n");
+    trace_spapr_vscsi_process_login();
 
     /* TODO handle case that requested size is wrong and
      * buffer format is wrong
@@ -795,8 +781,7 @@ static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req)
 
     sdev = vscsi_device_find(&s->bus, be64_to_cpu(srp->cmd.lun), &lun);
     if (!sdev) {
-        DPRINTF("VSCSI: Command for lun %08" PRIx64 " with no drive\n",
-                be64_to_cpu(srp->cmd.lun));
+        trace_spapr_vscsi_queue_cmd_no_drive(be64_to_cpu(srp->cmd.lun));
         if (srp->cmd.cdb[0] == INQUIRY) {
             vscsi_inquiry_no_target(s, req);
         } else {
@@ -808,9 +793,8 @@ static int vscsi_queue_cmd(VSCSIState *s, vscsi_req *req)
     req->sreq = scsi_req_new(sdev, req->qtag, lun, srp->cmd.cdb, req);
     n = scsi_req_enqueue(req->sreq);
 
-    DPRINTF("VSCSI: Queued command tag 0x%x CMD 0x%x=%s LUN %d ret: %d\n",
-            req->qtag, srp->cmd.cdb[0], scsi_command_name(srp->cmd.cdb[0]),
-            lun, n);
+    trace_spapr_vscsi_queue_cmd(req->qtag, srp->cmd.cdb[0],
+                                scsi_command_name(srp->cmd.cdb[0]), lun, n);
 
     if (n) {
         /* Transfer direction must be set before preprocessing the
@@ -1141,7 +1125,7 @@ static int vscsi_do_crq(struct VIOsPAPRDevice *dev, uint8_t *crq_data)
     crq.s.IU_length = be16_to_cpu(crq.s.IU_length);
     crq.s.IU_data_ptr = be64_to_cpu(crq.s.IU_data_ptr);
 
-    DPRINTF("VSCSI: do_crq %02x %02x ...\n", crq.raw[0], crq.raw[1]);
+    trace_spapr_vscsi_do_crq(crq.raw[0], crq.raw[1]);
 
     switch (crq.s.valid) {
     case 0xc0: /* Init command/response */
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index ed64858fe3..4a2e5d66df 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -202,3 +202,30 @@ esp_pci_dma_abort(uint32_t val) "ABORT (%.8x)"
 esp_pci_dma_start(uint32_t val) "START (%.8x)"
 esp_pci_sbac_read(uint32_t reg) "sbac: 0x%8.8x"
 esp_pci_sbac_write(uint32_t reg, uint32_t val) "sbac: 0x%8.8x -> 0x%8.8x"
+
+# hw/scsi/spapr_vscsi.c
+spapr_vscsi_send_rsp(uint8_t status, int32_t res_in, int32_t res_out) "status: 0x%x, res_in: %"PRId32", res_out: %"PRId32
+spapr_vscsi_fetch_desc_no_data(void) "no data descriptor"
+spapr_vscsi_fetch_desc_direct(void) "direct segment"
+spapr_vscsi_fetch_desc_indirect(uint32_t qtag, unsigned desc, unsigned local_desc) "indirect segment local tag=0x%"PRIx32" desc#%u/%u"
+spapr_vscsi_fetch_desc_out_of_range(unsigned desc, unsigned desc_offset) "#%u is ouf of range (%u bytes)"
+spapr_vscsi_fetch_desc_dma_read_error(int rc) "spapr_vio_dma_read -> %d reading ext_desc"
+spapr_vscsi_fetch_desc_indirect_seg_ext(uint32_t qtag, unsigned n, unsigned desc, uint64_t va, uint32_t len) "indirect segment ext. tag=0x%"PRIx32" desc#%u/%u { va=0x%"PRIx64" len=0x%"PRIx32" }"
+spapr_vscsi_fetch_desc_out_of_desc(void) "Out of descriptors !"
+spapr_vscsi_fetch_desc_out_of_desc_boundary(unsigned offset, unsigned desc, uint32_t len) "   offset=0x%x is out of a descriptor #%u boundary=%"PRIx32
+spapr_vscsi_fetch_desc_done(unsigned desc_num, unsigned desc_offset, uint64_t va, uint32_t len) "   cur=%u offs=0x%x ret { va=0x%"PRIx64" len=0x%"PRIx32" }"
+spapr_vscsi_srp_indirect_data(uint32_t len) "indirect segment 0x%"PRIx32" bytes"
+spapr_vscsi_srp_indirect_data_rw(int writing, int rc) "spapr_vio_dma_r/w(%d) -> %d"
+spapr_vscsi_srp_indirect_data_buf(unsigned a, unsigned b, unsigned c, unsigned d) "     data: %02x %02x %02x %02x..."
+spapr_vscsi_srp_transfer_data(uint32_t len) "no data desc transfer, skipping 0x%"PRIx32" bytes"
+spapr_vscsi_transfer_data(uint32_t tag, uint32_t len, void *req) "SCSI xfer complete tag=0x%"PRIx32" len=0x%"PRIx32", req=%p"
+spapr_vscsi_command_complete(uint32_t tag, uint32_t status, void *req) "SCSI cmd complete, tag=0x%"PRIx32" status=0x%"PRIx32", req=%p"
+spapr_vscsi_command_complete_sense_data1(uint32_t len, unsigned s0, unsigned s1, unsigned s2, unsigned s3, unsigned s4, unsigned s5, unsigned s6, unsigned s7) "Sense data, %d bytes: %02x %02x %02x %02x %02x %02x %02x %02x"
+spapr_vscsi_command_complete_sense_data2(unsigned s8, unsigned s9, unsigned s10, unsigned s11, unsigned s12, unsigned s13, unsigned s14, unsigned s15)         "                      %02x %02x %02x %02x %02x %02x %02x %02x"
+spapr_vscsi_command_complete_status(uint32_t status) "Command complete err=%"PRIu32
+spapr_vscsi_save_request(uint32_t qtag, unsigned desc, unsigned offset) "saving tag=%"PRIu32", current desc#%u, offset=0x%x"
+spapr_vscsi_load_request(uint32_t qtag, unsigned desc, unsigned offset) "restoring tag=%"PRIu32", current desc#%u, offset=0x%x"
+spapr_vscsi_process_login(void) "Got login, sending response !"
+spapr_vscsi_queue_cmd_no_drive(uint64_t lun) "Command for lun %08" PRIx64 " with no drive"
+spapr_vscsi_queue_cmd(uint32_t qtag, unsigned cdb, const char *cmd, int lun, int ret) "Queued command tag 0x%"PRIx32" CMD 0x%x=%s LUN %d ret: %d"
+spapr_vscsi_do_crq(unsigned c0, unsigned c1) "crq: %02x %02x ..."
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index b173b94949..6b8d0f0024 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -12,6 +12,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qapi/error.h"
 #include "hw/virtio/virtio-scsi.h"
 #include "qemu/error-report.h"
 #include "sysemu/block-backend.h"
@@ -21,20 +22,30 @@
 #include "hw/virtio/virtio-access.h"
 
 /* Context: QEMU global mutex held */
-void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread)
+void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
 {
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
-    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
-    assert(!s->ctx);
-    s->ctx = iothread_get_aio_context(vs->conf.iothread);
-
-    /* Don't try if transport does not support notifiers. */
-    if (!k->set_guest_notifiers || !k->ioeventfd_started) {
-        fprintf(stderr, "virtio-scsi: Failed to set iothread "
-                   "(transport does not support notifiers)");
-        exit(1);
+    if (vs->conf.iothread) {
+        if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
+            error_setg(errp,
+                       "device is incompatible with iothread "
+                       "(transport does not support notifiers)");
+            return;
+        }
+        if (!virtio_device_ioeventfd_enabled(vdev)) {
+            error_setg(errp, "ioeventfd is required for iothread");
+            return;
+        }
+        s->ctx = iothread_get_aio_context(vs->conf.iothread);
+    } else {
+        if (!virtio_device_ioeventfd_enabled(vdev)) {
+            return;
+        }
+        s->ctx = qemu_get_aio_context();
     }
 }
 
@@ -84,13 +95,6 @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
     return 0;
 }
 
-void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req)
-{
-    if (virtio_should_notify(vdev, req->vq)) {
-        event_notifier_set(virtio_queue_get_guest_notifier(req->vq));
-    }
-}
-
 /* assumes s->ctx held */
 static void virtio_scsi_clear_aio(VirtIOSCSI *s)
 {
@@ -105,19 +109,19 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s)
 }
 
 /* Context: QEMU global mutex held */
-void virtio_scsi_dataplane_start(VirtIOSCSI *s)
+int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 {
     int i;
     int rc;
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
+    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
     if (s->dataplane_started ||
         s->dataplane_starting ||
-        s->dataplane_fenced ||
-        s->ctx != iothread_get_aio_context(vs->conf.iothread)) {
-        return;
+        s->dataplane_fenced) {
+        return 0;
     }
 
     s->dataplane_starting = true;
@@ -152,7 +156,7 @@ void virtio_scsi_dataplane_start(VirtIOSCSI *s)
     s->dataplane_starting = false;
     s->dataplane_started = true;
     aio_context_release(s->ctx);
-    return;
+    return 0;
 
 fail_vrings:
     virtio_scsi_clear_aio(s);
@@ -165,14 +169,16 @@ void virtio_scsi_dataplane_start(VirtIOSCSI *s)
     s->dataplane_fenced = true;
     s->dataplane_starting = false;
     s->dataplane_started = true;
+    return -ENOSYS;
 }
 
 /* Context: QEMU global mutex held */
-void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
+void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
 {
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
+    VirtIOSCSI *s = VIRTIO_SCSI(vdev);
     int i;
 
     if (!s->dataplane_started || s->dataplane_stopping) {
@@ -186,16 +192,13 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
         return;
     }
     s->dataplane_stopping = true;
-    assert(s->ctx == iothread_get_aio_context(vs->conf.iothread));
 
     aio_context_acquire(s->ctx);
-
     virtio_scsi_clear_aio(s);
+    aio_context_release(s->ctx);
 
     blk_drain_all(); /* ensure there are no in-flight requests */
 
-    aio_context_release(s->ctx);
-
     for (i = 0; i < vs->conf.num_queues + 2; i++) {
         virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
     }
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index ce57ef6248..10fd687193 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -69,7 +69,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
     qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
     virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
     if (s->dataplane_started && !s->dataplane_fenced) {
-        virtio_scsi_dataplane_notify(vdev, req);
+        virtio_notify_irqfd(vdev, vq);
     } else {
         virtio_notify(vdev, vq);
     }
@@ -81,10 +81,11 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
     virtio_scsi_free_req(req);
 }
 
-static void virtio_scsi_bad_req(void)
+static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
 {
-    error_report("wrong size for virtio-scsi headers");
-    exit(1);
+    virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
+    virtqueue_detach_element(req->vq, &req->elem, 0);
+    virtio_scsi_free_req(req);
 }
 
 static size_t qemu_sgl_concat(VirtIOSCSIReq *req, struct iovec *iov,
@@ -236,6 +237,13 @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
     g_free(n);
 }
 
+static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
+{
+    if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
+        assert(blk_get_aio_context(d->conf.blk) == s->ctx);
+    }
+}
+
 /* Return 0 if the request is ready to be completed and return to guest;
  * -EINPROGRESS if the request is submitted and will be completed later, in the
  *  case of async cancellation. */
@@ -247,9 +255,7 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
     int target;
     int ret = 0;
 
-    if (s->dataplane_started && d) {
-        assert(blk_get_aio_context(d->conf.blk) == s->ctx);
-    }
+    virtio_scsi_ctx_check(s, d);
     /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE".  */
     req->resp.tmf.response = VIRTIO_SCSI_S_OK;
 
@@ -382,7 +388,7 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
 
     if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
                 &type, sizeof(type)) < sizeof(type)) {
-        virtio_scsi_bad_req();
+        virtio_scsi_bad_req(req);
         return;
     }
 
@@ -390,7 +396,8 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
     if (type == VIRTIO_SCSI_T_TMF) {
         if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
                     sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
-            virtio_scsi_bad_req();
+            virtio_scsi_bad_req(req);
+            return;
         } else {
             r = virtio_scsi_do_tmf(s, req);
         }
@@ -399,7 +406,8 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
                type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
         if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
                     sizeof(VirtIOSCSICtrlANResp)) < 0) {
-            virtio_scsi_bad_req();
+            virtio_scsi_bad_req(req);
+            return;
         } else {
             req->resp.an.event_actual = 0;
             req->resp.an.response = VIRTIO_SCSI_S_OK;
@@ -426,7 +434,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
     VirtIOSCSI *s = (VirtIOSCSI *)vdev;
 
     if (s->ctx) {
-        virtio_scsi_dataplane_start(s);
+        virtio_device_start_ioeventfd(vdev);
         if (!s->dataplane_fenced) {
             return;
         }
@@ -516,7 +524,7 @@ static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req)
     virtio_scsi_complete_cmd_req(req);
 }
 
-static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
+static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
 {
     VirtIOSCSICommon *vs = &s->parent_obj;
     SCSIDevice *d;
@@ -527,21 +535,20 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req
     if (rc < 0) {
         if (rc == -ENOTSUP) {
             virtio_scsi_fail_cmd_req(req);
+            return -ENOTSUP;
         } else {
-            virtio_scsi_bad_req();
+            virtio_scsi_bad_req(req);
+            return -EINVAL;
         }
-        return false;
     }
 
     d = virtio_scsi_device_find(s, req->req.cmd.lun);
     if (!d) {
         req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET;
         virtio_scsi_complete_cmd_req(req);
-        return false;
-    }
-    if (s->dataplane_started) {
-        assert(blk_get_aio_context(d->conf.blk) == s->ctx);
+        return -ENOENT;
     }
+    virtio_scsi_ctx_check(s, d);
     req->sreq = scsi_req_new(d, req->req.cmd.tag,
                              virtio_scsi_get_lun(req->req.cmd.lun),
                              req->req.cmd.cdb, req);
@@ -551,11 +558,11 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req
             req->sreq->cmd.xfer > req->qsgl.size)) {
         req->resp.cmd.response = VIRTIO_SCSI_S_OVERRUN;
         virtio_scsi_complete_cmd_req(req);
-        return false;
+        return -ENOBUFS;
     }
     scsi_req_ref(req->sreq);
     blk_io_plug(d->conf.blk);
-    return true;
+    return 0;
 }
 
 static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
@@ -571,11 +578,24 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req)
 void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
 {
     VirtIOSCSIReq *req, *next;
+    int ret;
+
     QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
 
     while ((req = virtio_scsi_pop_req(s, vq))) {
-        if (virtio_scsi_handle_cmd_req_prepare(s, req)) {
+        ret = virtio_scsi_handle_cmd_req_prepare(s, req);
+        if (!ret) {
             QTAILQ_INSERT_TAIL(&reqs, req, next);
+        } else if (ret == -EINVAL) {
+            /* The device is broken and shouldn't process any request */
+            while (!QTAILQ_EMPTY(&reqs)) {
+                req = QTAILQ_FIRST(&reqs);
+                QTAILQ_REMOVE(&reqs, req, next);
+                blk_io_unplug(req->sreq->dev->conf.blk);
+                scsi_req_unref(req->sreq);
+                virtqueue_detach_element(req->vq, &req->elem, 0);
+                virtio_scsi_free_req(req);
+            }
         }
     }
 
@@ -590,7 +610,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
     VirtIOSCSI *s = (VirtIOSCSI *)vdev;
 
     if (s->ctx) {
-        virtio_scsi_dataplane_start(s);
+        virtio_device_start_ioeventfd(vdev);
         if (!s->dataplane_fenced) {
             return;
         }
@@ -624,8 +644,9 @@ static void virtio_scsi_set_config(VirtIODevice *vdev,
 
     if ((uint32_t) virtio_ldl_p(vdev, &scsiconf->sense_size) >= 65536 ||
         (uint32_t) virtio_ldl_p(vdev, &scsiconf->cdb_size) >= 256) {
-        error_report("bad data written to virtio-scsi configuration space");
-        exit(1);
+        virtio_error(vdev,
+                     "bad data written to virtio-scsi configuration space");
+        return;
     }
 
     vs->sense_size = virtio_ldl_p(vdev, &scsiconf->sense_size);
@@ -648,9 +669,7 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 
-    if (s->ctx) {
-        virtio_scsi_dataplane_stop(s);
-    }
+    assert(!s->dataplane_started);
     s->resetting++;
     qbus_reset_all(&s->bus.qbus);
     s->resetting--;
@@ -660,22 +679,6 @@ static void virtio_scsi_reset(VirtIODevice *vdev)
     s->events_dropped = false;
 }
 
-/* The device does not have anything to save beyond the virtio data.
- * Request data is saved with callbacks from SCSI devices.
- */
-static void virtio_scsi_save(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-    virtio_save(vdev, f);
-}
-
-static int virtio_scsi_load(QEMUFile *f, void *opaque, size_t size)
-{
-    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
-
-    return virtio_load(vdev, f, 1);
-}
-
 void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
                             uint32_t event, uint32_t reason)
 {
@@ -705,7 +708,8 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
     }
 
     if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
-        virtio_scsi_bad_req();
+        virtio_scsi_bad_req(req);
+        goto out;
     }
 
     evt = &req->resp.event;
@@ -743,7 +747,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
     VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
     if (s->ctx) {
-        virtio_scsi_dataplane_start(s);
+        virtio_device_start_ioeventfd(vdev);
         if (!s->dataplane_fenced) {
             return;
         }
@@ -842,14 +846,10 @@ void virtio_scsi_common_realize(DeviceState *dev, Error **errp,
     s->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
     s->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
 
-    s->ctrl_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl);
-    s->event_vq = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, evt);
+    s->ctrl_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, ctrl);
+    s->event_vq = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, evt);
     for (i = 0; i < s->conf.num_queues; i++) {
-        s->cmd_vqs[i] = virtio_add_queue_aio(vdev, VIRTIO_SCSI_VQ_SIZE, cmd);
-    }
-
-    if (s->conf.iothread) {
-        virtio_scsi_set_iothread(VIRTIO_SCSI(s), s->conf.iothread);
+        s->cmd_vqs[i] = virtio_add_queue(vdev, VIRTIO_SCSI_VQ_SIZE, cmd);
     }
 }
 
@@ -879,6 +879,8 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
             return;
         }
     }
+
+    virtio_scsi_dataplane_setup(s, errp);
 }
 
 static void virtio_scsi_instance_init(Object *obj)
@@ -918,7 +920,15 @@ static Property virtio_scsi_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
-VMSTATE_VIRTIO_DEVICE(scsi, 1, virtio_scsi_load, virtio_scsi_save);
+static const VMStateDescription vmstate_virtio_scsi = {
+    .name = "virtio-scsi",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static void virtio_scsi_common_class_init(ObjectClass *klass, void *data)
 {
@@ -943,6 +953,8 @@ static void virtio_scsi_class_init(ObjectClass *klass, void *data)
     vdc->set_config = virtio_scsi_set_config;
     vdc->get_features = virtio_scsi_get_features;
     vdc->reset = virtio_scsi_reset;
+    vdc->start_ioeventfd = virtio_scsi_dataplane_start;
+    vdc->stop_ioeventfd = virtio_scsi_dataplane_stop;
     hc->plug = virtio_scsi_hotplug;
     hc->unplug = virtio_scsi_hotunplug;
 }
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 5116f4ad68..a5ce7dea8e 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -40,6 +40,8 @@
 #define PVSCSI_MAX_DEVS                   (64)
 #define PVSCSI_MSIX_NUM_VECTORS           (1)
 
+#define PVSCSI_MAX_SG_ELEM                2048
+
 #define PVSCSI_MAX_CMD_DATA_WORDS \
     (sizeof(PVSCSICmdDescSetupRings)/sizeof(uint32_t))
 
@@ -152,7 +154,7 @@ pvscsi_log2(uint32_t input)
     return log;
 }
 
-static int
+static void
 pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
 {
     int i;
@@ -160,10 +162,6 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
     uint32_t req_ring_size, cmp_ring_size;
     m->rs_pa = ri->ringsStatePPN << VMW_PAGE_SHIFT;
 
-    if ((ri->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
-        || (ri->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)) {
-        return -1;
-    }
     req_ring_size = ri->reqRingNumPages * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
     cmp_ring_size = ri->cmpRingNumPages * PVSCSI_MAX_NUM_CMP_ENTRIES_PER_PAGE;
     txr_len_log2 = pvscsi_log2(req_ring_size - 1);
@@ -195,8 +193,6 @@ pvscsi_ring_init_data(PVSCSIRingInfo *m, PVSCSICmdDescSetupRings *ri)
 
     /* Flush ring state page changes */
     smp_wmb();
-
-    return 0;
 }
 
 static int
@@ -251,8 +247,11 @@ static hwaddr
 pvscsi_ring_pop_req_descr(PVSCSIRingInfo *mgr)
 {
     uint32_t ready_ptr = RS_GET_FIELD(mgr, reqProdIdx);
+    uint32_t ring_size = PVSCSI_MAX_NUM_PAGES_REQ_RING
+                            * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
 
-    if (ready_ptr != mgr->consumed_ptr) {
+    if (ready_ptr != mgr->consumed_ptr
+        && ready_ptr - mgr->consumed_ptr < ring_size) {
         uint32_t next_ready_ptr =
             mgr->consumed_ptr++ & mgr->txr_len_mask;
         uint32_t next_ready_page =
@@ -634,17 +633,16 @@ pvscsi_queue_pending_descriptor(PVSCSIState *s, SCSIDevice **d,
 static void
 pvscsi_convert_sglist(PVSCSIRequest *r)
 {
-    int chunk_size;
+    uint32_t chunk_size, elmcnt = 0;
     uint64_t data_length = r->req.dataLen;
     PVSCSISGState sg = r->sg;
-    while (data_length) {
-        while (!sg.resid) {
+    while (data_length && elmcnt < PVSCSI_MAX_SG_ELEM) {
+        while (!sg.resid && elmcnt++ < PVSCSI_MAX_SG_ELEM) {
             pvscsi_get_next_sg_elem(&sg);
             trace_pvscsi_convert_sglist(r->req.context, r->sg.dataAddr,
                                         r->sg.resid);
         }
-        assert(data_length > 0);
-        chunk_size = MIN((unsigned) data_length, sg.resid);
+        chunk_size = MIN(data_length, sg.resid);
         if (chunk_size) {
             qemu_sglist_add(&r->sgl, sg.dataAddr, chunk_size);
         }
@@ -746,7 +744,7 @@ pvscsi_dbg_dump_tx_rings_config(PVSCSICmdDescSetupRings *rc)
 
     trace_pvscsi_tx_rings_num_pages("Confirm Ring", rc->cmpRingNumPages);
     for (i = 0; i < rc->cmpRingNumPages; i++) {
-        trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->reqRingPPNs[i]);
+        trace_pvscsi_tx_rings_ppn("Confirm Ring", rc->cmpRingPPNs[i]);
     }
 }
 
@@ -779,11 +777,16 @@ pvscsi_on_cmd_setup_rings(PVSCSIState *s)
 
     trace_pvscsi_on_cmd_arrived("PVSCSI_CMD_SETUP_RINGS");
 
-    pvscsi_dbg_dump_tx_rings_config(rc);
-    if (pvscsi_ring_init_data(&s->rings, rc) < 0) {
+    if (!rc->reqRingNumPages
+        || rc->reqRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES
+        || !rc->cmpRingNumPages
+        || rc->cmpRingNumPages > PVSCSI_SETUP_RINGS_MAX_NUM_PAGES) {
         return PVSCSI_COMMAND_PROCESSING_FAILED;
     }
 
+    pvscsi_dbg_dump_tx_rings_config(rc);
+    pvscsi_ring_init_data(&s->rings, rc);
+
     s->rings_info_valid = TRUE;
     return PVSCSI_COMMAND_PROCESSING_SUCCEEDED;
 }
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 87c6dc108d..8e88e8311a 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -1876,6 +1876,14 @@ static void sd_instance_init(Object *obj)
     sd->ocr_power_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sd_ocr_powerup, sd);
 }
 
+static void sd_instance_finalize(Object *obj)
+{
+    SDState *sd = SD_CARD(obj);
+
+    timer_del(sd->ocr_power_timer);
+    timer_free(sd->ocr_power_timer);
+}
+
 static void sd_realize(DeviceState *dev, Error **errp)
 {
     SDState *sd = SD_CARD(dev);
@@ -1927,6 +1935,7 @@ static const TypeInfo sd_info = {
     .class_size = sizeof(SDCardClass),
     .class_init = sd_class_init,
     .instance_init = sd_instance_init,
+    .instance_finalize = sd_instance_finalize,
 };
 
 static void sd_register_types(void)
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 3ff0886dd5..24001dc3e6 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -31,7 +31,7 @@ do { fprintf(stderr, "ssi_sd: error: " fmt , ## __VA_ARGS__);} while (0)
 #endif
 
 typedef enum {
-    SSI_SD_CMD,
+    SSI_SD_CMD = 0,
     SSI_SD_CMDARG,
     SSI_SD_RESPONSE,
     SSI_SD_DATA_START,
@@ -40,13 +40,13 @@ typedef enum {
 
 typedef struct {
     SSISlave ssidev;
-    ssi_sd_mode mode;
+    uint32_t mode;
     int cmd;
     uint8_t cmdarg[4];
     uint8_t response[5];
-    int arglen;
-    int response_pos;
-    int stopping;
+    int32_t arglen;
+    int32_t response_pos;
+    int32_t stopping;
     SDState *sd;
 } ssi_sd_state;
 
@@ -198,61 +198,46 @@ static uint32_t ssi_sd_transfer(SSISlave *dev, uint32_t val)
     return 0xff;
 }
 
-static void ssi_sd_save(QEMUFile *f, void *opaque)
+static int ssi_sd_post_load(void *opaque, int version_id)
 {
-    SSISlave *ss = SSI_SLAVE(opaque);
     ssi_sd_state *s = (ssi_sd_state *)opaque;
-    int i;
 
-    qemu_put_be32(f, s->mode);
-    qemu_put_be32(f, s->cmd);
-    for (i = 0; i < 4; i++)
-        qemu_put_be32(f, s->cmdarg[i]);
-    for (i = 0; i < 5; i++)
-        qemu_put_be32(f, s->response[i]);
-    qemu_put_be32(f, s->arglen);
-    qemu_put_be32(f, s->response_pos);
-    qemu_put_be32(f, s->stopping);
-
-    qemu_put_be32(f, ss->cs);
-}
-
-static int ssi_sd_load(QEMUFile *f, void *opaque, int version_id)
-{
-    SSISlave *ss = SSI_SLAVE(opaque);
-    ssi_sd_state *s = (ssi_sd_state *)opaque;
-    int i;
-
-    if (version_id != 1)
+    if (s->mode > SSI_SD_DATA_READ) {
         return -EINVAL;
-
-    s->mode = qemu_get_be32(f);
-    s->cmd = qemu_get_be32(f);
-    for (i = 0; i < 4; i++)
-        s->cmdarg[i] = qemu_get_be32(f);
-    for (i = 0; i < 5; i++)
-        s->response[i] = qemu_get_be32(f);
-    s->arglen = qemu_get_be32(f);
+    }
     if (s->mode == SSI_SD_CMDARG &&
         (s->arglen < 0 || s->arglen >= ARRAY_SIZE(s->cmdarg))) {
         return -EINVAL;
     }
-    s->response_pos = qemu_get_be32(f);
-    s->stopping = qemu_get_be32(f);
     if (s->mode == SSI_SD_RESPONSE &&
         (s->response_pos < 0 || s->response_pos >= ARRAY_SIZE(s->response) ||
         (!s->stopping && s->arglen > ARRAY_SIZE(s->response)))) {
         return -EINVAL;
     }
 
-    ss->cs = qemu_get_be32(f);
-
     return 0;
 }
 
+static const VMStateDescription vmstate_ssi_sd = {
+    .name = "ssi_sd",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .post_load = ssi_sd_post_load,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(mode, ssi_sd_state),
+        VMSTATE_INT32(cmd, ssi_sd_state),
+        VMSTATE_UINT8_ARRAY(cmdarg, ssi_sd_state, 4),
+        VMSTATE_UINT8_ARRAY(response, ssi_sd_state, 5),
+        VMSTATE_INT32(arglen, ssi_sd_state),
+        VMSTATE_INT32(response_pos, ssi_sd_state),
+        VMSTATE_INT32(stopping, ssi_sd_state),
+        VMSTATE_SSI_SLAVE(ssidev, ssi_sd_state),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void ssi_sd_realize(SSISlave *d, Error **errp)
 {
-    DeviceState *dev = DEVICE(d);
     ssi_sd_state *s = FROM_SSI_SLAVE(ssi_sd_state, d);
     DriveInfo *dinfo;
 
@@ -264,16 +249,17 @@ static void ssi_sd_realize(SSISlave *d, Error **errp)
         error_setg(errp, "Device initialization failed.");
         return;
     }
-    register_savevm(dev, "ssi_sd", -1, 1, ssi_sd_save, ssi_sd_load, s);
 }
 
 static void ssi_sd_class_init(ObjectClass *klass, void *data)
 {
+    DeviceClass *dc = DEVICE_CLASS(klass);
     SSISlaveClass *k = SSI_SLAVE_CLASS(klass);
 
     k->realize = ssi_sd_realize;
     k->transfer = ssi_sd_transfer;
     k->cs_polarity = SSI_CS_LOW;
+    dc->vmsd = &vmstate_ssi_sd;
 }
 
 static const TypeInfo ssi_sd_info = {
diff --git a/hw/sh4/shix.c b/hw/sh4/shix.c
index ccc9e75894..14d4007c1c 100644
--- a/hw/sh4/shix.c
+++ b/hw/sh4/shix.c
@@ -23,7 +23,7 @@
  */
 /*
    Shix 2.0 board by Alexis Polti, described at
-   http://perso.enst.fr/~polti/realisations/shix20/
+   https://web.archive.org/web/20070917001736/perso.enst.fr/~polti/realisations/shix20
 
    More information in target-sh4/README.sh4
 */
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index 74c7102929..3a96cededd 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -20,6 +20,7 @@
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
+#include "qemu/uuid.h"
 #include "sysemu/cpus.h"
 #include "hw/smbios/smbios.h"
 #include "hw/loader.h"
@@ -79,7 +80,7 @@ static struct {
 
 static struct {
     const char *manufacturer, *product, *version, *serial, *sku, *family;
-    /* uuid is in qemu_uuid[] */
+    /* uuid is in qemu_uuid */
 } type1;
 
 static struct {
@@ -408,7 +409,7 @@ static void smbios_build_type_1_fields(void)
          * BIOS.
          */
         smbios_add_field(1, offsetof(struct smbios_type_1, uuid),
-                         qemu_uuid, 16);
+                         &qemu_uuid, 16);
     }
 }
 
@@ -483,9 +484,9 @@ static void smbios_build_type_0_table(void)
 /* Encode UUID from the big endian encoding described on RFC4122 to the wire
  * format specified by SMBIOS version 2.6.
  */
-static void smbios_encode_uuid(struct smbios_uuid *uuid, const uint8_t *buf)
+static void smbios_encode_uuid(struct smbios_uuid *uuid, QemuUUID *in)
 {
-    memcpy(uuid, buf, 16);
+    memcpy(uuid, in, 16);
     if (smbios_uuid_encoded) {
         uuid->time_low = bswap32(uuid->time_low);
         uuid->time_mid = bswap16(uuid->time_mid);
@@ -502,7 +503,7 @@ static void smbios_build_type_1_table(void)
     SMBIOS_TABLE_SET_STR(1, version_str, type1.version);
     SMBIOS_TABLE_SET_STR(1, serial_number_str, type1.serial);
     if (qemu_uuid_set) {
-        smbios_encode_uuid(&t->uuid, qemu_uuid);
+        smbios_encode_uuid(&t->uuid, &qemu_uuid);
     } else {
         memset(&t->uuid, 0, 16);
     }
@@ -1001,7 +1002,7 @@ void smbios_entry_add(QemuOpts *opts)
 
             val = qemu_opt_get(opts, "uuid");
             if (val) {
-                if (qemu_uuid_parse(val, qemu_uuid) != 0) {
+                if (qemu_uuid_parse(val, &qemu_uuid) != 0) {
                     error_report("Invalid UUID");
                     exit(1);
                 }
diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c
index 478fda8209..f5b6efddf8 100644
--- a/hw/sparc/sun4m.c
+++ b/hw/sparc/sun4m.c
@@ -35,10 +35,11 @@
 #include "sysemu/sysemu.h"
 #include "net/net.h"
 #include "hw/boards.h"
-#include "hw/nvram/openbios_firmware_abi.h"
 #include "hw/scsi/esp.h"
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
+#include "hw/nvram/sun_nvram.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/char/escc.h"
 #include "hw/empty_slot.h"
@@ -117,39 +118,17 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr,
                        int nvram_machine_id, const char *arch)
 {
     unsigned int i;
-    uint32_t start, end;
+    int sysp_end;
     uint8_t image[0x1ff0];
-    struct OpenBIOS_nvpart_v1 *part_header;
     NvramClass *k = NVRAM_GET_CLASS(nvram);
 
     memset(image, '\0', sizeof(image));
 
-    start = 0;
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(image, 0);
 
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(image, end, prom_envs[i]);
-
-    // End marker
-    image[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = 0x1fd0;
-    OpenBIOS_finish_partition(part_header, end - start);
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
 
     Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr,
                     nvram_machine_id);
@@ -159,20 +138,6 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr,
     }
 }
 
-static DeviceState *slavio_intctl;
-
-void sun4m_hmp_info_pic(Monitor *mon, const QDict *qdict)
-{
-    if (slavio_intctl)
-        slavio_pic_info(mon, slavio_intctl);
-}
-
-void sun4m_hmp_info_irq(Monitor *mon, const QDict *qdict)
-{
-    if (slavio_intctl)
-        slavio_irq_info(mon, slavio_intctl);
-}
-
 void cpu_check_irqs(CPUSPARCState *env)
 {
     CPUState *cs;
@@ -873,6 +838,7 @@ static void dummy_fdc_tc(void *opaque, int irq, int level)
 static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
                           MachineState *machine)
 {
+    DeviceState *slavio_intctl;
     const char *cpu_model = machine->cpu_model;
     unsigned int i;
     void *iommu, *espdma, *ledma, *nvram;
@@ -1067,6 +1033,7 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef,
                  hwdef->ecc_version);
 
     fw_cfg = fw_cfg_init_mem(CFG_ADDR, CFG_ADDR + 2);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c
index 3165e18eb7..466331535b 100644
--- a/hw/sparc64/sun4u.c
+++ b/hw/sparc64/sun4u.c
@@ -36,7 +36,8 @@
 #include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "hw/boards.h"
-#include "hw/nvram/openbios_firmware_abi.h"
+#include "hw/nvram/sun_nvram.h"
+#include "hw/nvram/chrp_nvram.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/sysbus.h"
 #include "hw/ide.h"
@@ -124,39 +125,17 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size,
                                   const uint8_t *macaddr)
 {
     unsigned int i;
-    uint32_t start, end;
+    int sysp_end;
     uint8_t image[0x1ff0];
-    struct OpenBIOS_nvpart_v1 *part_header;
     NvramClass *k = NVRAM_GET_CLASS(nvram);
 
     memset(image, '\0', sizeof(image));
 
-    start = 0;
+    /* OpenBIOS nvram variables partition */
+    sysp_end = chrp_nvram_create_system_partition(image, 0);
 
-    // OpenBIOS nvram variables
-    // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_SYSTEM;
-    pstrcpy(part_header->name, sizeof(part_header->name), "system");
-
-    end = start + sizeof(struct OpenBIOS_nvpart_v1);
-    for (i = 0; i < nb_prom_envs; i++)
-        end = OpenBIOS_set_var(image, end, prom_envs[i]);
-
-    // End marker
-    image[end++] = '\0';
-
-    end = start + ((end - start + 15) & ~15);
-    OpenBIOS_finish_partition(part_header, end - start);
-
-    // free partition
-    start = end;
-    part_header = (struct OpenBIOS_nvpart_v1 *)&image[start];
-    part_header->signature = OPENBIOS_PART_FREE;
-    pstrcpy(part_header->name, sizeof(part_header->name), "free");
-
-    end = 0x1fd0;
-    OpenBIOS_finish_partition(part_header, end - start);
+    /* Free space partition */
+    chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end);
 
     Sun_init_header((struct Sun_nvram *)&image[0x1fd8], macaddr, 0x80);
 
@@ -824,7 +803,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
         i++;
     }
 
-    serial_hds_isa_init(isa_bus, MAX_SERIAL_PORTS);
+    serial_hds_isa_init(isa_bus, i, MAX_SERIAL_PORTS);
     parallel_hds_isa_init(isa_bus, MAX_PARALLEL_PORTS);
 
     for(i = 0; i < nb_nics; i++)
@@ -876,6 +855,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
                            (uint8_t *)&nd_table[0].macaddr);
 
     fw_cfg = fw_cfg_init_io(BIOS_CFG_IOPORT);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
     fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/ssi/Makefile.objs b/hw/ssi/Makefile.objs
index c79a8dcd86..487add2879 100644
--- a/hw/ssi/Makefile.objs
+++ b/hw/ssi/Makefile.objs
@@ -3,6 +3,7 @@ common-obj-$(CONFIG_SSI) += ssi.o
 common-obj-$(CONFIG_XILINX_SPI) += xilinx_spi.o
 common-obj-$(CONFIG_XILINX_SPIPS) += xilinx_spips.o
 common-obj-$(CONFIG_ASPEED_SOC) += aspeed_smc.o
+common-obj-$(CONFIG_STM32F2XX_SPI) += stm32f2xx_spi.o
 
 obj-$(CONFIG_OMAP) += omap_spi.o
 obj-$(CONFIG_IMX) += imx_spi.o
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index d319e04a27..6e8403ebc2 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -79,10 +79,10 @@
 
 /* CEx Segment Address Register */
 #define R_SEG_ADDR0       (0x30 / 4)
-#define   SEG_SIZE_SHIFT       24   /* 8MB units */
-#define   SEG_SIZE_MASK        0x7f
+#define   SEG_END_SHIFT        24   /* 8MB units */
+#define   SEG_END_MASK         0xff
 #define   SEG_START_SHIFT      16   /* address bit [A29-A23] */
-#define   SEG_START_MASK       0x7f
+#define   SEG_START_MASK       0xff
 #define R_SEG_ADDR1       (0x34 / 4)
 #define R_SEG_ADDR2       (0x38 / 4)
 #define R_SEG_ADDR3       (0x3C / 4)
@@ -127,18 +127,22 @@
 #define R_SPI_MISC_CTRL   (0x10 / 4)
 #define R_SPI_TIMINGS     (0x14 / 4)
 
+#define ASPEED_SOC_SMC_FLASH_BASE   0x10000000
+#define ASPEED_SOC_FMC_FLASH_BASE   0x20000000
+#define ASPEED_SOC_SPI_FLASH_BASE   0x30000000
+#define ASPEED_SOC_SPI2_FLASH_BASE  0x38000000
+
 /*
  * Default segments mapping addresses and size for each slave per
  * controller. These can be changed when board is initialized with the
- * Segment Address Registers but they don't seem do be used on the
- * field.
+ * Segment Address Registers.
  */
 static const AspeedSegments aspeed_segments_legacy[] = {
     { 0x10000000, 32 * 1024 * 1024 },
 };
 
 static const AspeedSegments aspeed_segments_fmc[] = {
-    { 0x20000000, 64 * 1024 * 1024 },
+    { 0x20000000, 64 * 1024 * 1024 }, /* start address is readonly */
     { 0x24000000, 32 * 1024 * 1024 },
     { 0x26000000, 32 * 1024 * 1024 },
     { 0x28000000, 32 * 1024 * 1024 },
@@ -149,15 +153,155 @@ static const AspeedSegments aspeed_segments_spi[] = {
     { 0x30000000, 64 * 1024 * 1024 },
 };
 
+static const AspeedSegments aspeed_segments_ast2500_fmc[] = {
+    { 0x20000000, 128 * 1024 * 1024 }, /* start address is readonly */
+    { 0x28000000,  32 * 1024 * 1024 },
+    { 0x2A000000,  32 * 1024 * 1024 },
+};
+
+static const AspeedSegments aspeed_segments_ast2500_spi1[] = {
+    { 0x30000000, 32 * 1024 * 1024 }, /* start address is readonly */
+    { 0x32000000, 96 * 1024 * 1024 }, /* end address is readonly */
+};
+
+static const AspeedSegments aspeed_segments_ast2500_spi2[] = {
+    { 0x38000000, 32 * 1024 * 1024 }, /* start address is readonly */
+    { 0x3A000000, 96 * 1024 * 1024 }, /* end address is readonly */
+};
+
 static const AspeedSMCController controllers[] = {
     { "aspeed.smc.smc", R_CONF, R_CE_CTRL, R_CTRL0, R_TIMINGS,
-      CONF_ENABLE_W0, 5, aspeed_segments_legacy, 0x6000000 },
+      CONF_ENABLE_W0, 5, aspeed_segments_legacy,
+      ASPEED_SOC_SMC_FLASH_BASE, 0x6000000 },
     { "aspeed.smc.fmc", R_CONF, R_CE_CTRL, R_CTRL0, R_TIMINGS,
-      CONF_ENABLE_W0, 5, aspeed_segments_fmc, 0x10000000 },
+      CONF_ENABLE_W0, 5, aspeed_segments_fmc,
+      ASPEED_SOC_FMC_FLASH_BASE, 0x10000000 },
     { "aspeed.smc.spi", R_SPI_CONF, 0xff, R_SPI_CTRL0, R_SPI_TIMINGS,
-      SPI_CONF_ENABLE_W0, 1, aspeed_segments_spi, 0x10000000 },
+      SPI_CONF_ENABLE_W0, 1, aspeed_segments_spi,
+      ASPEED_SOC_SPI_FLASH_BASE, 0x10000000 },
+    { "aspeed.smc.ast2500-fmc", R_CONF, R_CE_CTRL, R_CTRL0, R_TIMINGS,
+      CONF_ENABLE_W0, 3, aspeed_segments_ast2500_fmc,
+      ASPEED_SOC_FMC_FLASH_BASE, 0x10000000 },
+    { "aspeed.smc.ast2500-spi1", R_CONF, R_CE_CTRL, R_CTRL0, R_TIMINGS,
+      CONF_ENABLE_W0, 2, aspeed_segments_ast2500_spi1,
+      ASPEED_SOC_SPI_FLASH_BASE, 0x8000000 },
+    { "aspeed.smc.ast2500-spi2", R_CONF, R_CE_CTRL, R_CTRL0, R_TIMINGS,
+      CONF_ENABLE_W0, 2, aspeed_segments_ast2500_spi2,
+      ASPEED_SOC_SPI2_FLASH_BASE, 0x8000000 },
 };
 
+/*
+ * The Segment Register uses a 8MB unit to encode the start address
+ * and the end address of the mapping window of a flash SPI slave :
+ *
+ *        | byte 1 | byte 2 | byte 3 | byte 4 |
+ *        +--------+--------+--------+--------+
+ *        |  end   |  start |   0    |   0    |
+ *
+ */
+static inline uint32_t aspeed_smc_segment_to_reg(const AspeedSegments *seg)
+{
+    uint32_t reg = 0;
+    reg |= ((seg->addr >> 23) & SEG_START_MASK) << SEG_START_SHIFT;
+    reg |= (((seg->addr + seg->size) >> 23) & SEG_END_MASK) << SEG_END_SHIFT;
+    return reg;
+}
+
+static inline void aspeed_smc_reg_to_segment(uint32_t reg, AspeedSegments *seg)
+{
+    seg->addr = ((reg >> SEG_START_SHIFT) & SEG_START_MASK) << 23;
+    seg->size = (((reg >> SEG_END_SHIFT) & SEG_END_MASK) << 23) - seg->addr;
+}
+
+static bool aspeed_smc_flash_overlap(const AspeedSMCState *s,
+                                     const AspeedSegments *new,
+                                     int cs)
+{
+    AspeedSegments seg;
+    int i;
+
+    for (i = 0; i < s->ctrl->max_slaves; i++) {
+        if (i == cs) {
+            continue;
+        }
+
+        aspeed_smc_reg_to_segment(s->regs[R_SEG_ADDR0 + i], &seg);
+
+        if (new->addr + new->size > seg.addr &&
+            new->addr < seg.addr + seg.size) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment CS%d [ 0x%"
+                          HWADDR_PRIx" - 0x%"HWADDR_PRIx" ] overlaps with "
+                          "CS%d [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
+                          s->ctrl->name, cs, new->addr, new->addr + new->size,
+                          i, seg.addr, seg.addr + seg.size);
+            return true;
+        }
+    }
+    return false;
+}
+
+static void aspeed_smc_flash_set_segment(AspeedSMCState *s, int cs,
+                                         uint64_t new)
+{
+    AspeedSMCFlash *fl = &s->flashes[cs];
+    AspeedSegments seg;
+
+    aspeed_smc_reg_to_segment(new, &seg);
+
+    /* The start address of CS0 is read-only */
+    if (cs == 0 && seg.addr != s->ctrl->flash_window_base) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Tried to change CS0 start address to 0x%"
+                      HWADDR_PRIx "\n", s->ctrl->name, seg.addr);
+        return;
+    }
+
+    /*
+     * The end address of the AST2500 spi controllers is also
+     * read-only.
+     */
+    if ((s->ctrl->segments == aspeed_segments_ast2500_spi1 ||
+         s->ctrl->segments == aspeed_segments_ast2500_spi2) &&
+        cs == s->ctrl->max_slaves &&
+        seg.addr + seg.size != s->ctrl->segments[cs].addr +
+        s->ctrl->segments[cs].size) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Tried to change CS%d end address to 0x%"
+                      HWADDR_PRIx "\n", s->ctrl->name, cs, seg.addr);
+        return;
+    }
+
+    /* Keep the segment in the overall flash window */
+    if (seg.addr + seg.size <= s->ctrl->flash_window_base ||
+        seg.addr > s->ctrl->flash_window_base + s->ctrl->flash_window_size) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is invalid : "
+                      "[ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
+                      s->ctrl->name, cs, seg.addr, seg.addr + seg.size);
+        return;
+    }
+
+    /* Check start address vs. alignment */
+    if (seg.addr % seg.size) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: new segment for CS%d is not "
+                      "aligned : [ 0x%"HWADDR_PRIx" - 0x%"HWADDR_PRIx" ]\n",
+                      s->ctrl->name, cs, seg.addr, seg.addr + seg.size);
+    }
+
+    /* And segments should not overlap */
+    if (aspeed_smc_flash_overlap(s, &seg, cs)) {
+        return;
+    }
+
+    /* All should be fine now to move the region */
+    memory_region_transaction_begin();
+    memory_region_set_size(&fl->mmio, seg.size);
+    memory_region_set_address(&fl->mmio, seg.addr - s->ctrl->flash_window_base);
+    memory_region_set_enabled(&fl->mmio, true);
+    memory_region_transaction_commit();
+
+    s->regs[R_SEG_ADDR0 + cs] = new;
+}
+
 static uint64_t aspeed_smc_flash_default_read(void *opaque, hwaddr addr,
                                               unsigned size)
 {
@@ -281,6 +425,12 @@ static void aspeed_smc_reset(DeviceState *d)
         s->regs[s->r_ctrl0 + i] |= CTRL_CE_STOP_ACTIVE;
     }
 
+    /* setup default segment register values for all */
+    for (i = 0; i < s->ctrl->max_slaves; ++i) {
+        s->regs[R_SEG_ADDR0 + i] =
+            aspeed_smc_segment_to_reg(&s->ctrl->segments[i]);
+    }
+
     aspeed_smc_update_cs(s);
 }
 
@@ -301,6 +451,7 @@ static uint64_t aspeed_smc_read(void *opaque, hwaddr addr, unsigned int size)
         addr == s->r_timings ||
         addr == s->r_ce_ctrl ||
         addr == R_INTR_CTRL ||
+        (addr >= R_SEG_ADDR0 && addr < R_SEG_ADDR0 + s->ctrl->max_slaves) ||
         (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->num_cs)) {
         return s->regs[addr];
     } else {
@@ -332,6 +483,13 @@ static void aspeed_smc_write(void *opaque, hwaddr addr, uint64_t data,
     } else if (addr >= s->r_ctrl0 && addr < s->r_ctrl0 + s->num_cs) {
         s->regs[addr] = value;
         aspeed_smc_update_cs(s);
+    } else if (addr >= R_SEG_ADDR0 &&
+               addr < R_SEG_ADDR0 + s->ctrl->max_slaves) {
+        int cs = addr - R_SEG_ADDR0;
+
+        if (value != s->regs[R_SEG_ADDR0 + cs]) {
+            aspeed_smc_flash_set_segment(s, cs, value);
+        }
     } else {
         qemu_log_mask(LOG_UNIMP, "%s: not implemented: 0x%" HWADDR_PRIx "\n",
                       __func__, addr);
@@ -384,23 +542,33 @@ static void aspeed_smc_realize(DeviceState *dev, Error **errp)
 
     aspeed_smc_reset(dev);
 
+    /* The memory region for the controller registers */
     memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_ops, s,
                           s->ctrl->name, ASPEED_SMC_R_MAX * 4);
     sysbus_init_mmio(sbd, &s->mmio);
 
     /*
-     * Memory region where flash modules are remapped
+     * The container memory region representing the address space
+     * window in which the flash modules are mapped. The size and
+     * address depends on the SoC model and controller type.
      */
     snprintf(name, sizeof(name), "%s.flash", s->ctrl->name);
 
     memory_region_init_io(&s->mmio_flash, OBJECT(s),
                           &aspeed_smc_flash_default_ops, s, name,
-                          s->ctrl->mapping_window_size);
+                          s->ctrl->flash_window_size);
     sysbus_init_mmio(sbd, &s->mmio_flash);
 
-    s->flashes = g_new0(AspeedSMCFlash, s->num_cs);
+    s->flashes = g_new0(AspeedSMCFlash, s->ctrl->max_slaves);
 
-    for (i = 0; i < s->num_cs; ++i) {
+    /*
+     * Let's create a sub memory region for each possible slave. All
+     * have a configurable memory segment in the overall flash mapping
+     * window of the controller but, there is not necessarily a flash
+     * module behind to handle the memory accesses. This depends on
+     * the board configuration.
+     */
+    for (i = 0; i < s->ctrl->max_slaves; ++i) {
         AspeedSMCFlash *fl = &s->flashes[i];
 
         snprintf(name, sizeof(name), "%s.%d", s->ctrl->name, i);
diff --git a/hw/ssi/imx_spi.c b/hw/ssi/imx_spi.c
index 4226199811..e4e395fa67 100644
--- a/hw/ssi/imx_spi.c
+++ b/hw/ssi/imx_spi.c
@@ -25,7 +25,7 @@
         } \
     } while (0)
 
-static char const *imx_spi_reg_name(uint32_t reg)
+static const char *imx_spi_reg_name(uint32_t reg)
 {
     static char unknown[20];
 
diff --git a/hw/ssi/stm32f2xx_spi.c b/hw/ssi/stm32f2xx_spi.c
new file mode 100644
index 0000000000..26a1b4ddf5
--- /dev/null
+++ b/hw/ssi/stm32f2xx_spi.c
@@ -0,0 +1,225 @@
+/*
+ * STM32F405 SPI
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "hw/ssi/stm32f2xx_spi.h"
+
+#ifndef STM_SPI_ERR_DEBUG
+#define STM_SPI_ERR_DEBUG 0
+#endif
+
+#define DB_PRINT_L(lvl, fmt, args...) do { \
+    if (STM_SPI_ERR_DEBUG >= lvl) { \
+        qemu_log("%s: " fmt, __func__, ## args); \
+    } \
+} while (0);
+
+#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
+
+static void stm32f2xx_spi_reset(DeviceState *dev)
+{
+    STM32F2XXSPIState *s = STM32F2XX_SPI(dev);
+
+    s->spi_cr1 = 0x00000000;
+    s->spi_cr2 = 0x00000000;
+    s->spi_sr = 0x0000000A;
+    s->spi_dr = 0x0000000C;
+    s->spi_crcpr = 0x00000007;
+    s->spi_rxcrcr = 0x00000000;
+    s->spi_txcrcr = 0x00000000;
+    s->spi_i2scfgr = 0x00000000;
+    s->spi_i2spr = 0x00000002;
+}
+
+static void stm32f2xx_spi_transfer(STM32F2XXSPIState *s)
+{
+    DB_PRINT("Data to send: 0x%x\n", s->spi_dr);
+
+    s->spi_dr = ssi_transfer(s->ssi, s->spi_dr);
+    s->spi_sr |= STM_SPI_SR_RXNE;
+
+    DB_PRINT("Data received: 0x%x\n", s->spi_dr);
+}
+
+static uint64_t stm32f2xx_spi_read(void *opaque, hwaddr addr,
+                                     unsigned int size)
+{
+    STM32F2XXSPIState *s = opaque;
+
+    DB_PRINT("Address: 0x%" HWADDR_PRIx "\n", addr);
+
+    switch (addr) {
+    case STM_SPI_CR1:
+        return s->spi_cr1;
+    case STM_SPI_CR2:
+        qemu_log_mask(LOG_UNIMP, "%s: Interrupts and DMA are not implemented\n",
+                      __func__);
+        return s->spi_cr2;
+    case STM_SPI_SR:
+        return s->spi_sr;
+    case STM_SPI_DR:
+        stm32f2xx_spi_transfer(s);
+        s->spi_sr &= ~STM_SPI_SR_RXNE;
+        return s->spi_dr;
+    case STM_SPI_CRCPR:
+        qemu_log_mask(LOG_UNIMP, "%s: CRC is not implemented, the registers " \
+                      "are included for compatibility\n", __func__);
+        return s->spi_crcpr;
+    case STM_SPI_RXCRCR:
+        qemu_log_mask(LOG_UNIMP, "%s: CRC is not implemented, the registers " \
+                      "are included for compatibility\n", __func__);
+        return s->spi_rxcrcr;
+    case STM_SPI_TXCRCR:
+        qemu_log_mask(LOG_UNIMP, "%s: CRC is not implemented, the registers " \
+                      "are included for compatibility\n", __func__);
+        return s->spi_txcrcr;
+    case STM_SPI_I2SCFGR:
+        qemu_log_mask(LOG_UNIMP, "%s: I2S is not implemented, the registers " \
+                      "are included for compatibility\n", __func__);
+        return s->spi_i2scfgr;
+    case STM_SPI_I2SPR:
+        qemu_log_mask(LOG_UNIMP, "%s: I2S is not implemented, the registers " \
+                      "are included for compatibility\n", __func__);
+        return s->spi_i2spr;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+    }
+
+    return 0;
+}
+
+static void stm32f2xx_spi_write(void *opaque, hwaddr addr,
+                                uint64_t val64, unsigned int size)
+{
+    STM32F2XXSPIState *s = opaque;
+    uint32_t value = val64;
+
+    DB_PRINT("Address: 0x%" HWADDR_PRIx ", Value: 0x%x\n", addr, value);
+
+    switch (addr) {
+    case STM_SPI_CR1:
+        s->spi_cr1 = value;
+        return;
+    case STM_SPI_CR2:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "Interrupts and DMA are not implemented\n", __func__);
+        s->spi_cr2 = value;
+        return;
+    case STM_SPI_SR:
+        /* Read only register, except for clearing the CRCERR bit, which
+         * is not supported
+         */
+        return;
+    case STM_SPI_DR:
+        s->spi_dr = value;
+        stm32f2xx_spi_transfer(s);
+        return;
+    case STM_SPI_CRCPR:
+        qemu_log_mask(LOG_UNIMP, "%s: CRC is not implemented\n", __func__);
+        return;
+    case STM_SPI_RXCRCR:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Read only register: " \
+                      "0x%" HWADDR_PRIx "\n", __func__, addr);
+        return;
+    case STM_SPI_TXCRCR:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Read only register: " \
+                      "0x%" HWADDR_PRIx "\n", __func__, addr);
+        return;
+    case STM_SPI_I2SCFGR:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "I2S is not implemented\n", __func__);
+        return;
+    case STM_SPI_I2SPR:
+        qemu_log_mask(LOG_UNIMP, "%s: " \
+                      "I2S is not implemented\n", __func__);
+        return;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Bad offset 0x%" HWADDR_PRIx "\n", __func__, addr);
+    }
+}
+
+static const MemoryRegionOps stm32f2xx_spi_ops = {
+    .read = stm32f2xx_spi_read,
+    .write = stm32f2xx_spi_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_stm32f2xx_spi = {
+    .name = TYPE_STM32F2XX_SPI,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(spi_cr1, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_cr2, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_sr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_dr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_crcpr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_rxcrcr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_txcrcr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_i2scfgr, STM32F2XXSPIState),
+        VMSTATE_UINT32(spi_i2spr, STM32F2XXSPIState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void stm32f2xx_spi_init(Object *obj)
+{
+    STM32F2XXSPIState *s = STM32F2XX_SPI(obj);
+    DeviceState *dev = DEVICE(obj);
+
+    memory_region_init_io(&s->mmio, obj, &stm32f2xx_spi_ops, s,
+                          TYPE_STM32F2XX_SPI, 0x400);
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+    s->ssi = ssi_create_bus(dev, "ssi");
+}
+
+static void stm32f2xx_spi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = stm32f2xx_spi_reset;
+    dc->vmsd = &vmstate_stm32f2xx_spi;
+}
+
+static const TypeInfo stm32f2xx_spi_info = {
+    .name          = TYPE_STM32F2XX_SPI,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(STM32F2XXSPIState),
+    .instance_init = stm32f2xx_spi_init,
+    .class_init    = stm32f2xx_spi_class_init,
+};
+
+static void stm32f2xx_spi_register_types(void)
+{
+    type_register_static(&stm32f2xx_spi_info);
+}
+
+type_init(stm32f2xx_spi_register_types)
diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
index e2b77dc3de..da8adfa443 100644
--- a/hw/ssi/xilinx_spips.c
+++ b/hw/ssi/xilinx_spips.c
@@ -607,6 +607,7 @@ static void xilinx_spips_realize(DeviceState *dev, Error **errp)
     XilinxSPIPS *s = XILINX_SPIPS(dev);
     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
     XilinxSPIPSClass *xsc = XILINX_SPIPS_GET_CLASS(s);
+    qemu_irq *cs;
     int i;
 
     DB_PRINT_L(0, "realized spips\n");
@@ -619,8 +620,10 @@ static void xilinx_spips_realize(DeviceState *dev, Error **errp)
     }
 
     s->cs_lines = g_new0(qemu_irq, s->num_cs * s->num_busses);
-    ssi_auto_connect_slaves(DEVICE(s), s->cs_lines, s->spi[0]);
-    ssi_auto_connect_slaves(DEVICE(s), s->cs_lines, s->spi[1]);
+    for (i = 0, cs = s->cs_lines; i < s->num_busses; ++i, cs += s->num_cs) {
+        ssi_auto_connect_slaves(DEVICE(s), cs, s->spi[i]);
+    }
+
     sysbus_init_irq(sbd, &s->irq);
     for (i = 0; i < s->num_cs * s->num_busses; ++i) {
         sysbus_init_irq(sbd, &s->cs_lines[i]);
diff --git a/hw/timer/a9gtimer.c b/hw/timer/a9gtimer.c
index 772f85f5fd..ce1dc63911 100644
--- a/hw/timer/a9gtimer.c
+++ b/hw/timer/a9gtimer.c
@@ -82,15 +82,15 @@ static void a9_gtimer_update(A9GTimerState *s, bool sync)
         if ((s->control & R_CONTROL_TIMER_ENABLE) &&
                 (gtb->control & R_CONTROL_COMP_ENABLE)) {
             /* R2p0+, where the compare function is >= */
-            while (gtb->compare < update.new) {
+            if (gtb->compare < update.new) {
                 DB_PRINT("Compare event happened for CPU %d\n", i);
                 gtb->status = 1;
-                if (gtb->control & R_CONTROL_AUTO_INCREMENT) {
-                    DB_PRINT("Auto incrementing timer compare by %" PRId32 "\n",
-                             gtb->inc);
-                    gtb->compare += gtb->inc;
-                } else {
-                    break;
+                if (gtb->control & R_CONTROL_AUTO_INCREMENT && gtb->inc) {
+                    uint64_t inc =
+                        QEMU_ALIGN_UP(update.new - gtb->compare, gtb->inc);
+                    DB_PRINT("Auto incrementing timer compare by %"
+                                                        PRId64 "\n", inc);
+                    gtb->compare += inc;
                 }
             }
             cdiff = (int64_t)gtb->compare - (int64_t)update.new + 1;
diff --git a/hw/timer/allwinner-a10-pit.c b/hw/timer/allwinner-a10-pit.c
index 3385e5dc35..22ceabe1d4 100644
--- a/hw/timer/allwinner-a10-pit.c
+++ b/hw/timer/allwinner-a10-pit.c
@@ -267,7 +267,7 @@ static void a10_pit_init(Object *obj)
         tc->container = s;
         tc->index = i;
         bh[i] = qemu_bh_new(a10_pit_timer_cb, tc);
-        s->timer[i] = ptimer_init(bh[i]);
+        s->timer[i] = ptimer_init(bh[i], PTIMER_POLICY_DEFAULT);
     }
 }
 
diff --git a/hw/timer/arm_mptimer.c b/hw/timer/arm_mptimer.c
index d66bbf01b4..daf6c48797 100644
--- a/hw/timer/arm_mptimer.c
+++ b/hw/timer/arm_mptimer.c
@@ -20,22 +20,33 @@
  */
 
 #include "qemu/osdep.h"
+#include "hw/ptimer.h"
 #include "hw/timer/arm_mptimer.h"
 #include "qapi/error.h"
-#include "qemu/timer.h"
+#include "qemu/main-loop.h"
 #include "qom/cpu.h"
 
+#define PTIMER_POLICY                       \
+    (PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD |  \
+     PTIMER_POLICY_CONTINUOUS_TRIGGER    |  \
+     PTIMER_POLICY_NO_IMMEDIATE_TRIGGER  |  \
+     PTIMER_POLICY_NO_IMMEDIATE_RELOAD   |  \
+     PTIMER_POLICY_NO_COUNTER_ROUND_DOWN)
+
 /* This device implements the per-cpu private timer and watchdog block
  * which is used in both the ARM11MPCore and Cortex-A9MP.
  */
 
 static inline int get_current_cpu(ARMMPTimerState *s)
 {
-    if (current_cpu->cpu_index >= s->num_cpu) {
+    int cpu_id = current_cpu ? current_cpu->cpu_index : 0;
+
+    if (cpu_id >= s->num_cpu) {
         hw_error("arm_mptimer: num-cpu %d but this cpu is %d!\n",
-                 s->num_cpu, current_cpu->cpu_index);
+                 s->num_cpu, cpu_id);
     }
-    return current_cpu->cpu_index;
+
+    return cpu_id;
 }
 
 static inline void timerblock_update_irq(TimerBlock *tb)
@@ -44,33 +55,42 @@ static inline void timerblock_update_irq(TimerBlock *tb)
 }
 
 /* Return conversion factor from mpcore timer ticks to qemu timer ticks.  */
-static inline uint32_t timerblock_scale(TimerBlock *tb)
+static inline uint32_t timerblock_scale(uint32_t control)
 {
-    return (((tb->control >> 8) & 0xff) + 1) * 10;
+    return (((control >> 8) & 0xff) + 1) * 10;
 }
 
-static void timerblock_reload(TimerBlock *tb, int restart)
+static inline void timerblock_set_count(struct ptimer_state *timer,
+                                        uint32_t control, uint64_t *count)
 {
-    if (tb->count == 0) {
-        return;
+    /* PTimer would trigger interrupt for periodic timer when counter set
+     * to 0, MPtimer under certain condition only.
+     */
+    if ((control & 3) == 3 && (control & 0xff00) == 0 && *count == 0) {
+        *count = ptimer_get_limit(timer);
     }
-    if (restart) {
-        tb->tick = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    ptimer_set_count(timer, *count);
+}
+
+static inline void timerblock_run(struct ptimer_state *timer,
+                                  uint32_t control, uint32_t load)
+{
+    if ((control & 1) && ((control & 0xff00) || load != 0)) {
+        ptimer_run(timer, !(control & 2));
     }
-    tb->tick += (int64_t)tb->count * timerblock_scale(tb);
-    timer_mod(tb->timer, tb->tick);
 }
 
 static void timerblock_tick(void *opaque)
 {
     TimerBlock *tb = (TimerBlock *)opaque;
-    tb->status = 1;
-    if (tb->control & 2) {
-        tb->count = tb->load;
-        timerblock_reload(tb, 0);
-    } else {
-        tb->count = 0;
+    /* Periodic timer with load = 0 and prescaler != 0 would re-trigger
+     * IRQ after one period, otherwise it either stops or wraps around.
+     */
+    if ((tb->control & 2) && (tb->control & 0xff00) == 0 &&
+            ptimer_get_limit(tb->timer) == 0) {
+        ptimer_stop(tb->timer);
     }
+    tb->status = 1;
     timerblock_update_irq(tb);
 }
 
@@ -78,21 +98,11 @@ static uint64_t timerblock_read(void *opaque, hwaddr addr,
                                 unsigned size)
 {
     TimerBlock *tb = (TimerBlock *)opaque;
-    int64_t val;
     switch (addr) {
     case 0: /* Load */
-        return tb->load;
+        return ptimer_get_limit(tb->timer);
     case 4: /* Counter.  */
-        if (((tb->control & 1) == 0) || (tb->count == 0)) {
-            return 0;
-        }
-        /* Slow and ugly, but hopefully won't happen too often.  */
-        val = tb->tick - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
-        val /= timerblock_scale(tb);
-        if (val < 0) {
-            val = 0;
-        }
-        return val;
+        return ptimer_get_count(tb->timer);
     case 8: /* Control.  */
         return tb->control;
     case 12: /* Interrupt status.  */
@@ -106,37 +116,45 @@ static void timerblock_write(void *opaque, hwaddr addr,
                              uint64_t value, unsigned size)
 {
     TimerBlock *tb = (TimerBlock *)opaque;
-    int64_t old;
+    uint32_t control = tb->control;
     switch (addr) {
     case 0: /* Load */
-        tb->load = value;
-        /* Fall through.  */
-    case 4: /* Counter.  */
-        if ((tb->control & 1) && tb->count) {
-            /* Cancel the previous timer.  */
-            timer_del(tb->timer);
+        /* Setting load to 0 stops the timer without doing the tick if
+         * prescaler = 0.
+         */
+        if ((control & 1) && (control & 0xff00) == 0 && value == 0) {
+            ptimer_stop(tb->timer);
         }
-        tb->count = value;
-        if (tb->control & 1) {
-            timerblock_reload(tb, 1);
+        ptimer_set_limit(tb->timer, value, 1);
+        timerblock_run(tb->timer, control, value);
+        break;
+    case 4: /* Counter.  */
+        /* Setting counter to 0 stops the one-shot timer, or periodic with
+         * load = 0, without doing the tick if prescaler = 0.
+         */
+        if ((control & 1) && (control & 0xff00) == 0 && value == 0 &&
+                (!(control & 2) || ptimer_get_limit(tb->timer) == 0)) {
+            ptimer_stop(tb->timer);
         }
+        timerblock_set_count(tb->timer, control, &value);
+        timerblock_run(tb->timer, control, value);
         break;
     case 8: /* Control.  */
-        old = tb->control;
-        tb->control = value;
+        if ((control & 3) != (value & 3)) {
+            ptimer_stop(tb->timer);
+        }
+        if ((control & 0xff00) != (value & 0xff00)) {
+            ptimer_set_period(tb->timer, timerblock_scale(value));
+        }
         if (value & 1) {
-            if ((old & 1) && (tb->count != 0)) {
-                /* Do nothing if timer is ticking right now.  */
-                break;
+            uint64_t count = ptimer_get_count(tb->timer);
+            /* Re-load periodic timer counter if needed.  */
+            if ((value & 2) && count == 0) {
+                timerblock_set_count(tb->timer, value, &count);
             }
-            if (tb->control & 2) {
-                tb->count = tb->load;
-            }
-            timerblock_reload(tb, 1);
-        } else if (old & 1) {
-            /* Shutdown the timer.  */
-            timer_del(tb->timer);
+            timerblock_run(tb->timer, value, count);
         }
+        tb->control = value;
         break;
     case 12: /* Interrupt status.  */
         tb->status &= ~value;
@@ -186,13 +204,12 @@ static const MemoryRegionOps timerblock_ops = {
 
 static void timerblock_reset(TimerBlock *tb)
 {
-    tb->count = 0;
-    tb->load = 0;
     tb->control = 0;
     tb->status = 0;
-    tb->tick = 0;
     if (tb->timer) {
-        timer_del(tb->timer);
+        ptimer_stop(tb->timer);
+        ptimer_set_limit(tb->timer, 0, 1);
+        ptimer_set_period(tb->timer, timerblock_scale(0));
     }
 }
 
@@ -238,7 +255,8 @@ static void arm_mptimer_realize(DeviceState *dev, Error **errp)
      */
     for (i = 0; i < s->num_cpu; i++) {
         TimerBlock *tb = &s->timerblock[i];
-        tb->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, timerblock_tick, tb);
+        QEMUBH *bh = qemu_bh_new(timerblock_tick, tb);
+        tb->timer = ptimer_init(bh, PTIMER_POLICY);
         sysbus_init_irq(sbd, &tb->irq);
         memory_region_init_io(&tb->iomem, OBJECT(s), &timerblock_ops, tb,
                               "arm_mptimer_timerblock", 0x20);
@@ -248,26 +266,23 @@ static void arm_mptimer_realize(DeviceState *dev, Error **errp)
 
 static const VMStateDescription vmstate_timerblock = {
     .name = "arm_mptimer_timerblock",
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT32(count, TimerBlock),
-        VMSTATE_UINT32(load, TimerBlock),
         VMSTATE_UINT32(control, TimerBlock),
         VMSTATE_UINT32(status, TimerBlock),
-        VMSTATE_INT64(tick, TimerBlock),
-        VMSTATE_TIMER_PTR(timer, TimerBlock),
+        VMSTATE_PTIMER(timer, TimerBlock),
         VMSTATE_END_OF_LIST()
     }
 };
 
 static const VMStateDescription vmstate_arm_mptimer = {
     .name = "arm_mptimer",
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .fields = (VMStateField[]) {
         VMSTATE_STRUCT_VARRAY_UINT32(timerblock, ARMMPTimerState, num_cpu,
-                                     2, vmstate_timerblock, TimerBlock),
+                                     3, vmstate_timerblock, TimerBlock),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/timer/arm_timer.c b/hw/timer/arm_timer.c
index 111a16db37..98fddd7ac1 100644
--- a/hw/timer/arm_timer.c
+++ b/hw/timer/arm_timer.c
@@ -171,7 +171,7 @@ static arm_timer_state *arm_timer_init(uint32_t freq)
     s->control = TIMER_CTRL_IE;
 
     bh = qemu_bh_new(arm_timer_tick, s);
-    s->timer = ptimer_init(bh);
+    s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     vmstate_register(NULL, -1, &vmstate_arm_timer, s);
     return s;
 }
diff --git a/hw/timer/digic-timer.c b/hw/timer/digic-timer.c
index 0f21faf876..e1fcf73c3e 100644
--- a/hw/timer/digic-timer.c
+++ b/hw/timer/digic-timer.c
@@ -127,7 +127,7 @@ static void digic_timer_init(Object *obj)
 {
     DigicTimerState *s = DIGIC_TIMER(obj);
 
-    s->ptimer = ptimer_init(NULL);
+    s->ptimer = ptimer_init(NULL, PTIMER_POLICY_DEFAULT);
 
     /*
      * FIXME: there is no documentation on Digic timer
diff --git a/hw/timer/etraxfs_timer.c b/hw/timer/etraxfs_timer.c
index 36d8f462c4..8e18236c5a 100644
--- a/hw/timer/etraxfs_timer.c
+++ b/hw/timer/etraxfs_timer.c
@@ -322,9 +322,9 @@ static int etraxfs_timer_init(SysBusDevice *dev)
     t->bh_t0 = qemu_bh_new(timer0_hit, t);
     t->bh_t1 = qemu_bh_new(timer1_hit, t);
     t->bh_wd = qemu_bh_new(watchdog_hit, t);
-    t->ptimer_t0 = ptimer_init(t->bh_t0);
-    t->ptimer_t1 = ptimer_init(t->bh_t1);
-    t->ptimer_wd = ptimer_init(t->bh_wd);
+    t->ptimer_t0 = ptimer_init(t->bh_t0, PTIMER_POLICY_DEFAULT);
+    t->ptimer_t1 = ptimer_init(t->bh_t1, PTIMER_POLICY_DEFAULT);
+    t->ptimer_wd = ptimer_init(t->bh_wd, PTIMER_POLICY_DEFAULT);
 
     sysbus_init_irq(dev, &t->irq);
     sysbus_init_irq(dev, &t->nmi);
diff --git a/hw/timer/exynos4210_mct.c b/hw/timer/exynos4210_mct.c
index ae69345f0d..0c189348ae 100644
--- a/hw/timer/exynos4210_mct.c
+++ b/hw/timer/exynos4210_mct.c
@@ -1431,15 +1431,16 @@ static void exynos4210_mct_init(Object *obj)
 
     /* Global timer */
     bh[0] = qemu_bh_new(exynos4210_gfrc_event, s);
-    s->g_timer.ptimer_frc = ptimer_init(bh[0]);
+    s->g_timer.ptimer_frc = ptimer_init(bh[0], PTIMER_POLICY_DEFAULT);
     memset(&s->g_timer.reg, 0, sizeof(struct gregs));
 
     /* Local timers */
     for (i = 0; i < 2; i++) {
         bh[0] = qemu_bh_new(exynos4210_ltick_event, &s->l_timer[i]);
         bh[1] = qemu_bh_new(exynos4210_lfrc_event, &s->l_timer[i]);
-        s->l_timer[i].tick_timer.ptimer_tick = ptimer_init(bh[0]);
-        s->l_timer[i].ptimer_frc = ptimer_init(bh[1]);
+        s->l_timer[i].tick_timer.ptimer_tick =
+                                   ptimer_init(bh[0], PTIMER_POLICY_DEFAULT);
+        s->l_timer[i].ptimer_frc = ptimer_init(bh[1], PTIMER_POLICY_DEFAULT);
         s->l_timer[i].id = i;
     }
 
diff --git a/hw/timer/exynos4210_pwm.c b/hw/timer/exynos4210_pwm.c
index 0e9e2e9bf5..f5765075c7 100644
--- a/hw/timer/exynos4210_pwm.c
+++ b/hw/timer/exynos4210_pwm.c
@@ -390,7 +390,7 @@ static void exynos4210_pwm_init(Object *obj)
     for (i = 0; i < EXYNOS4210_PWM_TIMERS_NUM; i++) {
         bh = qemu_bh_new(exynos4210_pwm_tick, &s->timer[i]);
         sysbus_init_irq(dev, &s->timer[i].irq);
-        s->timer[i].ptimer = ptimer_init(bh);
+        s->timer[i].ptimer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
         s->timer[i].id = i;
         s->timer[i].parent = s;
     }
diff --git a/hw/timer/exynos4210_rtc.c b/hw/timer/exynos4210_rtc.c
index da4dd451b9..1a648c5d9e 100644
--- a/hw/timer/exynos4210_rtc.c
+++ b/hw/timer/exynos4210_rtc.c
@@ -555,12 +555,12 @@ static void exynos4210_rtc_init(Object *obj)
     QEMUBH *bh;
 
     bh = qemu_bh_new(exynos4210_rtc_tick, s);
-    s->ptimer = ptimer_init(bh);
+    s->ptimer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(s->ptimer, RTC_BASE_FREQ);
     exynos4210_rtc_update_freq(s, 0);
 
     bh = qemu_bh_new(exynos4210_rtc_1Hz_tick, s);
-    s->ptimer_1Hz = ptimer_init(bh);
+    s->ptimer_1Hz = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(s->ptimer_1Hz, RTC_BASE_FREQ);
 
     sysbus_init_irq(dev, &s->alm_irq);
diff --git a/hw/timer/grlib_gptimer.c b/hw/timer/grlib_gptimer.c
index dd000f5afa..4ed96e970a 100644
--- a/hw/timer/grlib_gptimer.c
+++ b/hw/timer/grlib_gptimer.c
@@ -26,7 +26,6 @@
 #include "hw/sysbus.h"
 #include "qemu/timer.h"
 #include "hw/ptimer.h"
-#include "qemu/timer.h"
 #include "qemu/main-loop.h"
 
 #include "trace.h"
@@ -363,7 +362,7 @@ static int grlib_gptimer_init(SysBusDevice *dev)
 
         timer->unit   = unit;
         timer->bh     = qemu_bh_new(grlib_gptimer_hit, timer);
-        timer->ptimer = ptimer_init(timer->bh);
+        timer->ptimer = ptimer_init(timer->bh, PTIMER_POLICY_DEFAULT);
         timer->id     = i;
 
         /* One IRQ line for each timer */
diff --git a/hw/timer/imx_epit.c b/hw/timer/imx_epit.c
index eddf3481e8..8677b753b1 100644
--- a/hw/timer/imx_epit.c
+++ b/hw/timer/imx_epit.c
@@ -30,7 +30,7 @@
         } \
     } while (0)
 
-static char const *imx_epit_reg_name(uint32_t reg)
+static const char *imx_epit_reg_name(uint32_t reg)
 {
     switch (reg) {
     case 0:
@@ -314,10 +314,10 @@ static void imx_epit_realize(DeviceState *dev, Error **errp)
                           0x00001000);
     sysbus_init_mmio(sbd, &s->iomem);
 
-    s->timer_reload = ptimer_init(NULL);
+    s->timer_reload = ptimer_init(NULL, PTIMER_POLICY_DEFAULT);
 
     bh = qemu_bh_new(imx_epit_cmp, s);
-    s->timer_cmp = ptimer_init(bh);
+    s->timer_cmp = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
 }
 
 static void imx_epit_class_init(ObjectClass *klass, void *data)
diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
index 82bc73cb86..010ccbf207 100644
--- a/hw/timer/imx_gpt.c
+++ b/hw/timer/imx_gpt.c
@@ -29,7 +29,7 @@
         } \
     } while (0)
 
-static char const *imx_gpt_reg_name(uint32_t reg)
+static const char *imx_gpt_reg_name(uint32_t reg)
 {
     switch (reg) {
     case 0:
@@ -461,7 +461,7 @@ static void imx_gpt_realize(DeviceState *dev, Error **errp)
     sysbus_init_mmio(sbd, &s->iomem);
 
     bh = qemu_bh_new(imx_gpt_timeout, s);
-    s->timer = ptimer_init(bh);
+    s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
 }
 
 static void imx_gpt_class_init(ObjectClass *klass, void *data)
diff --git a/hw/timer/lm32_timer.c b/hw/timer/lm32_timer.c
index e45a65bb9d..2a07b59524 100644
--- a/hw/timer/lm32_timer.c
+++ b/hw/timer/lm32_timer.c
@@ -184,7 +184,7 @@ static void lm32_timer_init(Object *obj)
     sysbus_init_irq(dev, &s->irq);
 
     s->bh = qemu_bh_new(timer_hit, s);
-    s->ptimer = ptimer_init(s->bh);
+    s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT);
 
     memory_region_init_io(&s->iomem, obj, &timer_ops, s,
                           "timer", R_MAX * 4);
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index ea625f25ce..da209d02f0 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -717,11 +717,18 @@ static void rtc_set_date_from_host(ISADevice *dev)
     rtc_set_cmos(s, &tm);
 }
 
+static void rtc_pre_save(void *opaque)
+{
+    RTCState *s = opaque;
+
+    rtc_update_time(s);
+}
+
 static int rtc_post_load(void *opaque, int version_id)
 {
     RTCState *s = opaque;
 
-    if (version_id <= 2) {
+    if (version_id <= 2 || rtc_clock == QEMU_CLOCK_REALTIME) {
         rtc_set_time(s);
         s->offset = 0;
         check_update_timer(s);
@@ -764,6 +771,7 @@ static const VMStateDescription vmstate_rtc = {
     .name = "mc146818rtc",
     .version_id = 3,
     .minimum_version_id = 1,
+    .pre_save = rtc_pre_save,
     .post_load = rtc_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_BUFFER(cmos_data, RTCState),
diff --git a/hw/timer/milkymist-sysctl.c b/hw/timer/milkymist-sysctl.c
index 21948328ce..44885907c9 100644
--- a/hw/timer/milkymist-sysctl.c
+++ b/hw/timer/milkymist-sysctl.c
@@ -281,8 +281,8 @@ static void milkymist_sysctl_init(Object *obj)
 
     s->bh0 = qemu_bh_new(timer0_hit, s);
     s->bh1 = qemu_bh_new(timer1_hit, s);
-    s->ptimer0 = ptimer_init(s->bh0);
-    s->ptimer1 = ptimer_init(s->bh1);
+    s->ptimer0 = ptimer_init(s->bh0, PTIMER_POLICY_DEFAULT);
+    s->ptimer1 = ptimer_init(s->bh1, PTIMER_POLICY_DEFAULT);
 
     memory_region_init_io(&s->regs_region, obj, &sysctl_mmio_ops, s,
             "milkymist-sysctl", R_MAX * 4);
diff --git a/hw/timer/puv3_ost.c b/hw/timer/puv3_ost.c
index 93650b7990..0b3d717e60 100644
--- a/hw/timer/puv3_ost.c
+++ b/hw/timer/puv3_ost.c
@@ -125,7 +125,7 @@ static int puv3_ost_init(SysBusDevice *dev)
     sysbus_init_irq(dev, &s->irq);
 
     s->bh = qemu_bh_new(puv3_ost_tick, s);
-    s->ptimer = ptimer_init(s->bh);
+    s->ptimer = ptimer_init(s->bh, PTIMER_POLICY_DEFAULT);
     ptimer_set_freq(s->ptimer, 50 * 1000 * 1000);
 
     memory_region_init_io(&s->iomem, OBJECT(s), &puv3_ost_ops, s, "puv3_ost",
diff --git a/hw/timer/sh_timer.c b/hw/timer/sh_timer.c
index 255b2fc910..9afb2d048c 100644
--- a/hw/timer/sh_timer.c
+++ b/hw/timer/sh_timer.c
@@ -203,7 +203,7 @@ static void *sh_timer_init(uint32_t freq, int feat, qemu_irq irq)
     s->irq = irq;
 
     bh = qemu_bh_new(sh_timer_tick, s);
-    s->timer = ptimer_init(bh);
+    s->timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
 
     sh_timer_write(s, OFFSET_TCOR >> 2, s->tcor);
     sh_timer_write(s, OFFSET_TCNT >> 2, s->tcnt);
diff --git a/hw/timer/slavio_timer.c b/hw/timer/slavio_timer.c
index fb3e08bedc..bfee1f3027 100644
--- a/hw/timer/slavio_timer.c
+++ b/hw/timer/slavio_timer.c
@@ -389,7 +389,7 @@ static int slavio_timer_init1(SysBusDevice *dev)
         tc->timer_index = i;
 
         bh = qemu_bh_new(slavio_timer_irq, tc);
-        s->cputimer[i].timer = ptimer_init(bh);
+        s->cputimer[i].timer = ptimer_init(bh, PTIMER_POLICY_DEFAULT);
         ptimer_set_period(s->cputimer[i].timer, TIMER_PERIOD);
 
         size = i == 0 ? SYS_TIMER_SIZE : CPU_TIMER_SIZE;
diff --git a/hw/timer/stm32f2xx_timer.c b/hw/timer/stm32f2xx_timer.c
index bf0fb288c4..e5f5e14a90 100644
--- a/hw/timer/stm32f2xx_timer.c
+++ b/hw/timer/stm32f2xx_timer.c
@@ -51,6 +51,15 @@ static void stm32f2xx_timer_interrupt(void *opaque)
         qemu_irq_pulse(s->irq);
         stm32f2xx_timer_set_alarm(s, s->hit_time);
     }
+
+    if (s->tim_ccmr1 & (TIM_CCMR1_OC2M2 | TIM_CCMR1_OC2M1) &&
+        !(s->tim_ccmr1 & TIM_CCMR1_OC2M0) &&
+        s->tim_ccmr1 & TIM_CCMR1_OC2PE &&
+        s->tim_ccer & TIM_CCER_CC2E) {
+        /* PWM 2 - Mode 1 */
+        DB_PRINT("PWM2 Duty Cycle: %d%%\n",
+                s->tim_ccr2 / (100 * (s->tim_psc + 1)));
+    }
 }
 
 static inline int64_t stm32f2xx_ns_to_ticks(STM32F2XXTimerState *s, int64_t t)
@@ -208,7 +217,7 @@ static void stm32f2xx_timer_write(void *opaque, hwaddr offset,
         return;
     case TIM_PSC:
         timer_val = stm32f2xx_ns_to_ticks(s, now) - s->tick_offset;
-        s->tim_psc = value;
+        s->tim_psc = value & 0xFFFF;
         value = timer_val;
         break;
     case TIM_CNT:
diff --git a/hw/timer/xilinx_timer.c b/hw/timer/xilinx_timer.c
index 2ea970dc9d..59439c05be 100644
--- a/hw/timer/xilinx_timer.c
+++ b/hw/timer/xilinx_timer.c
@@ -218,7 +218,7 @@ static void xilinx_timer_realize(DeviceState *dev, Error **errp)
         xt->parent = t;
         xt->nr = i;
         xt->bh = qemu_bh_new(timer_hit, xt);
-        xt->ptimer = ptimer_init(xt->bh);
+        xt->ptimer = ptimer_init(xt->bh, PTIMER_POLICY_DEFAULT);
         ptimer_set_freq(xt->ptimer, t->freq_hz);
     }
 
diff --git a/hw/tpm/tpm_passthrough.c b/hw/tpm/tpm_passthrough.c
index e88c0d20bc..9234eb3459 100644
--- a/hw/tpm/tpm_passthrough.c
+++ b/hw/tpm/tpm_passthrough.c
@@ -165,8 +165,7 @@ static int tpm_passthrough_unix_tx_bufs(TPMPassthruState *tpm_pt,
 
     ret = tpm_passthrough_unix_write(tpm_pt->tpm_fd, in, in_len);
     if (ret != in_len) {
-        if (!tpm_pt->tpm_op_canceled ||
-            (tpm_pt->tpm_op_canceled && errno != ECANCELED)) {
+        if (!tpm_pt->tpm_op_canceled || errno != ECANCELED) {
             error_report("tpm_passthrough: error while transmitting data "
                          "to TPM: %s (%i)",
                          strerror(errno), errno);
@@ -178,8 +177,7 @@ static int tpm_passthrough_unix_tx_bufs(TPMPassthruState *tpm_pt,
 
     ret = tpm_passthrough_unix_read(tpm_pt->tpm_fd, out, out_len);
     if (ret < 0) {
-        if (!tpm_pt->tpm_op_canceled ||
-            (tpm_pt->tpm_op_canceled && errno != ECANCELED)) {
+        if (!tpm_pt->tpm_op_canceled || errno != ECANCELED) {
             error_report("tpm_passthrough: error while reading data from "
                          "TPM: %s (%i)",
                          strerror(errno), errno);
diff --git a/hw/tpm/tpm_tis.c b/hw/tpm/tpm_tis.c
index 381e7266ea..a6440fef91 100644
--- a/hw/tpm/tpm_tis.c
+++ b/hw/tpm/tpm_tis.c
@@ -34,7 +34,6 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "qemu/main-loop.h"
-#include "sysemu/tpm_backend.h"
 
 #define DEBUG_TIS 0
 
diff --git a/hw/tricore/tricore_testboard.c b/hw/tricore/tricore_testboard.c
index 8d3520f5be..19dd587207 100644
--- a/hw/tricore/tricore_testboard.c
+++ b/hw/tricore/tricore_testboard.c
@@ -46,7 +46,7 @@ static void tricore_load_kernel(CPUTriCoreState *env)
     long kernel_size;
 
     kernel_size = load_elf(tricoretb_binfo.kernel_filename, NULL,
-                           NULL, (uint64_t *)&entry, NULL,
+                           NULL, &entry, NULL,
                            NULL, 0,
                            EM_TRICORE, 1, 0);
     if (kernel_size <= 0) {
diff --git a/hw/unicore32/puv3.c b/hw/unicore32/puv3.c
index 31cd171016..032078fd3e 100644
--- a/hw/unicore32/puv3.c
+++ b/hw/unicore32/puv3.c
@@ -13,7 +13,6 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "cpu.h"
-#include "qemu-common.h"
 #include "ui/console.h"
 #include "elf.h"
 #include "exec/address-spaces.h"
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 3213f9f8af..eceb5f3ee2 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -547,7 +547,7 @@ static int emulated_initfn(CCIDCardState *base)
     return 0;
 }
 
-static int emulated_exitfn(CCIDCardState *base)
+static void emulated_exitfn(CCIDCardState *base)
 {
     EmulatedState *card = EMULATED_CCID_CARD(base);
     VEvent *vevent = vevent_new(VEVENT_LAST, NULL, NULL);
@@ -564,7 +564,6 @@ static int emulated_exitfn(CCIDCardState *base)
     qemu_mutex_destroy(&card->handle_apdu_mutex);
     qemu_mutex_destroy(&card->vreader_mutex);
     qemu_mutex_destroy(&card->event_list_mutex);
-    return 0;
 }
 
 static Property emulated_card_properties[] = {
diff --git a/hw/usb/ccid-card-passthru.c b/hw/usb/ccid-card-passthru.c
index c0e90e501c..88cb6d8978 100644
--- a/hw/usb/ccid-card-passthru.c
+++ b/hw/usb/ccid-card-passthru.c
@@ -48,7 +48,7 @@ typedef struct PassthruState PassthruState;
 
 struct PassthruState {
     CCIDCardState base;
-    CharDriverState *cs;
+    CharBackend cs;
     uint8_t  vscard_in_data[VSCARD_IN_SIZE];
     uint32_t vscard_in_pos;
     uint32_t vscard_in_hdr;
@@ -75,8 +75,11 @@ static void ccid_card_vscard_send_msg(PassthruState *s,
     scr_msg_header.type = htonl(type);
     scr_msg_header.reader_id = htonl(reader_id);
     scr_msg_header.length = htonl(length);
-    qemu_chr_fe_write(s->cs, (uint8_t *)&scr_msg_header, sizeof(VSCMsgHeader));
-    qemu_chr_fe_write(s->cs, payload, length);
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(&s->cs, (uint8_t *)&scr_msg_header,
+                          sizeof(VSCMsgHeader));
+    qemu_chr_fe_write_all(&s->cs, payload, length);
 }
 
 static void ccid_card_vscard_send_apdu(PassthruState *s,
@@ -261,7 +264,10 @@ static void ccid_card_vscard_handle_message(PassthruState *card,
 
 static void ccid_card_vscard_drop_connection(PassthruState *card)
 {
-    qemu_chr_delete(card->cs);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&card->cs);
+
+    qemu_chr_fe_deinit(&card->cs);
+    qemu_chr_delete(chr);
     card->vscard_in_pos = card->vscard_in_hdr = 0;
 }
 
@@ -306,8 +312,6 @@ static void ccid_card_vscard_event(void *opaque, int event)
     case CHR_EVENT_BREAK:
         card->vscard_in_pos = card->vscard_in_hdr = 0;
         break;
-    case CHR_EVENT_FOCUS:
-        break;
     case CHR_EVENT_OPENED:
         DPRINTF(card, D_INFO, "%s: CHR_EVENT_OPENED\n", __func__);
         break;
@@ -321,7 +325,7 @@ static void passthru_apdu_from_guest(
 {
     PassthruState *card = PASSTHRU_CCID_CARD(base);
 
-    if (!card->cs) {
+    if (!qemu_chr_fe_get_driver(&card->cs)) {
         printf("ccid-passthru: no chardev, discarding apdu length %d\n", len);
         return;
     }
@@ -342,12 +346,12 @@ static int passthru_initfn(CCIDCardState *base)
 
     card->vscard_in_pos = 0;
     card->vscard_in_hdr = 0;
-    if (card->cs) {
+    if (qemu_chr_fe_get_driver(&card->cs)) {
         DPRINTF(card, D_INFO, "initing chardev\n");
-        qemu_chr_add_handlers(card->cs,
+        qemu_chr_fe_set_handlers(&card->cs,
             ccid_card_vscard_can_read,
             ccid_card_vscard_read,
-            ccid_card_vscard_event, card);
+            ccid_card_vscard_event, card, NULL, true);
         ccid_card_vscard_send_init(card);
     } else {
         error_report("missing chardev");
@@ -361,11 +365,6 @@ static int passthru_initfn(CCIDCardState *base)
     return 0;
 }
 
-static int passthru_exitfn(CCIDCardState *base)
-{
-    return 0;
-}
-
 static VMStateDescription passthru_vmstate = {
     .name = "ccid-card-passthru",
     .version_id = 1,
@@ -392,7 +391,6 @@ static void passthru_class_initfn(ObjectClass *klass, void *data)
     CCIDCardClass *cc = CCID_CARD_CLASS(klass);
 
     cc->initfn = passthru_initfn;
-    cc->exitfn = passthru_exitfn;
     cc->get_atr = passthru_get_atr;
     cc->apdu_from_guest = passthru_apdu_from_guest;
     set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
diff --git a/hw/usb/ccid.h b/hw/usb/ccid.h
index 9334da8acd..1f070116d6 100644
--- a/hw/usb/ccid.h
+++ b/hw/usb/ccid.h
@@ -33,7 +33,7 @@ typedef struct CCIDCardClass {
     void (*apdu_from_guest)(CCIDCardState *card,
                             const uint8_t *apdu,
                             uint32_t len);
-    int (*exitfn)(CCIDCardState *card);
+    void (*exitfn)(CCIDCardState *card);
     int (*initfn)(CCIDCardState *card);
 } CCIDCardClass;
 
diff --git a/hw/usb/desc.c b/hw/usb/desc.c
index 5e0e1d157e..7828e52c6f 100644
--- a/hw/usb/desc.c
+++ b/hw/usb/desc.c
@@ -556,9 +556,7 @@ void usb_desc_create_serial(USBDevice *dev)
     DeviceState *hcd = dev->qdev.parent_bus->parent;
     const USBDesc *desc = usb_device_get_usb_desc(dev);
     int index = desc->id.iSerialNumber;
-    char serial[64];
-    char *path;
-    int dst;
+    char *path, *serial;
 
     if (dev->serial) {
         /* 'serial' usb bus property has priority if present */
@@ -567,14 +565,16 @@ void usb_desc_create_serial(USBDevice *dev)
     }
 
     assert(index != 0 && desc->str[index] != NULL);
-    dst = snprintf(serial, sizeof(serial), "%s", desc->str[index]);
     path = qdev_get_dev_path(hcd);
     if (path) {
-        dst += snprintf(serial+dst, sizeof(serial)-dst, "-%s", path);
+        serial = g_strdup_printf("%s-%s-%s", desc->str[index],
+                                 path, dev->port->path);
+    } else {
+        serial = g_strdup_printf("%s-%s", desc->str[index], dev->port->path);
     }
-    dst += snprintf(serial+dst, sizeof(serial)-dst, "-%s", dev->port->path);
     usb_desc_set_string(dev, index, serial);
     g_free(path);
+    g_free(serial);
 }
 
 const char *usb_desc_get_string(USBDevice *dev, uint8_t index)
diff --git a/hw/usb/dev-mtp.c b/hw/usb/dev-mtp.c
index 1be85ae75a..9cb0f50750 100644
--- a/hw/usb/dev-mtp.c
+++ b/hw/usb/dev-mtp.c
@@ -17,7 +17,6 @@
 #include <sys/statvfs.h>
 #ifdef CONFIG_INOTIFY1
 #include <sys/inotify.h>
-#include "qapi/error.h"
 #include "qemu/main-loop.h"
 #endif
 
@@ -48,6 +47,9 @@ enum mtp_code {
     CMD_GET_OBJECT_INFO            = 0x1008,
     CMD_GET_OBJECT                 = 0x1009,
     CMD_GET_PARTIAL_OBJECT         = 0x101b,
+    CMD_GET_OBJECT_PROPS_SUPPORTED = 0x9801,
+    CMD_GET_OBJECT_PROP_DESC       = 0x9802,
+    CMD_GET_OBJECT_PROP_VALUE      = 0x9803,
 
     /* response codes */
     RES_OK                         = 0x2001,
@@ -59,10 +61,12 @@ enum mtp_code {
     RES_INCOMPLETE_TRANSFER        = 0x2007,
     RES_INVALID_STORAGE_ID         = 0x2008,
     RES_INVALID_OBJECT_HANDLE      = 0x2009,
+    RES_INVALID_OBJECT_FORMAT_CODE = 0x200b,
     RES_SPEC_BY_FORMAT_UNSUPPORTED = 0x2014,
     RES_INVALID_PARENT_OBJECT      = 0x201a,
     RES_INVALID_PARAMETER          = 0x201d,
     RES_SESSION_ALREADY_OPEN       = 0x201e,
+    RES_INVALID_OBJECT_PROP_CODE   = 0xA801,
 
     /* format codes */
     FMT_UNDEFINED_OBJECT           = 0x3000,
@@ -72,6 +76,22 @@ enum mtp_code {
     EVT_OBJ_ADDED                  = 0x4002,
     EVT_OBJ_REMOVED                = 0x4003,
     EVT_OBJ_INFO_CHANGED           = 0x4007,
+
+    /* object properties */
+    PROP_STORAGE_ID                = 0xDC01,
+    PROP_OBJECT_FORMAT             = 0xDC02,
+    PROP_OBJECT_COMPRESSED_SIZE    = 0xDC04,
+    PROP_PARENT_OBJECT             = 0xDC0B,
+    PROP_PERSISTENT_UNIQUE_OBJECT_IDENTIFIER = 0xDC41,
+    PROP_NAME                      = 0xDC44,
+};
+
+enum mtp_data_type {
+    DATA_TYPE_UINT16  = 0x0004,
+    DATA_TYPE_UINT32  = 0x0006,
+    DATA_TYPE_UINT64  = 0x0008,
+    DATA_TYPE_UINT128 = 0x000a,
+    DATA_TYPE_STRING  = 0xffff,
 };
 
 typedef struct {
@@ -115,8 +135,8 @@ struct MTPControl {
 struct MTPData {
     uint16_t     code;
     uint32_t     trans;
-    uint32_t     offset;
-    uint32_t     length;
+    uint64_t     offset;
+    uint64_t     length;
     uint32_t     alloc;
     uint8_t      *data;
     bool         first;
@@ -778,6 +798,9 @@ static MTPData *usb_mtp_get_device_info(MTPState *s, MTPControl *c)
         CMD_GET_OBJECT_INFO,
         CMD_GET_OBJECT,
         CMD_GET_PARTIAL_OBJECT,
+        CMD_GET_OBJECT_PROPS_SUPPORTED,
+        CMD_GET_OBJECT_PROP_DESC,
+        CMD_GET_OBJECT_PROP_VALUE,
     };
     static const uint16_t fmt[] = {
         FMT_UNDEFINED_OBJECT,
@@ -883,7 +906,12 @@ static MTPData *usb_mtp_get_object_info(MTPState *s, MTPControl *c,
     usb_mtp_add_u32(d, QEMU_STORAGE_ID);
     usb_mtp_add_u16(d, o->format);
     usb_mtp_add_u16(d, 0);
-    usb_mtp_add_u32(d, o->stat.st_size);
+
+    if (o->stat.st_size > 0xFFFFFFFF) {
+        usb_mtp_add_u32(d, 0xFFFFFFFF);
+    } else {
+        usb_mtp_add_u32(d, o->stat.st_size);
+    }
 
     usb_mtp_add_u16(d, 0);
     usb_mtp_add_u32(d, 0);
@@ -966,6 +994,122 @@ static MTPData *usb_mtp_get_partial_object(MTPState *s, MTPControl *c,
     return d;
 }
 
+static MTPData *usb_mtp_get_object_props_supported(MTPState *s, MTPControl *c)
+{
+    static const uint16_t props[] = {
+        PROP_STORAGE_ID,
+        PROP_OBJECT_FORMAT,
+        PROP_OBJECT_COMPRESSED_SIZE,
+        PROP_PARENT_OBJECT,
+        PROP_PERSISTENT_UNIQUE_OBJECT_IDENTIFIER,
+        PROP_NAME,
+    };
+    MTPData *d = usb_mtp_data_alloc(c);
+    usb_mtp_add_u16_array(d, ARRAY_SIZE(props), props);
+
+    return d;
+}
+
+static MTPData *usb_mtp_get_object_prop_desc(MTPState *s, MTPControl *c)
+{
+    MTPData *d = usb_mtp_data_alloc(c);
+    switch (c->argv[0]) {
+    case PROP_STORAGE_ID:
+        usb_mtp_add_u16(d, PROP_STORAGE_ID);
+        usb_mtp_add_u16(d, DATA_TYPE_UINT32);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    case PROP_OBJECT_FORMAT:
+        usb_mtp_add_u16(d, PROP_OBJECT_FORMAT);
+        usb_mtp_add_u16(d, DATA_TYPE_UINT16);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u16(d, 0x0000);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    case PROP_OBJECT_COMPRESSED_SIZE:
+        usb_mtp_add_u16(d, PROP_OBJECT_COMPRESSED_SIZE);
+        usb_mtp_add_u16(d, DATA_TYPE_UINT64);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u64(d, 0x0000000000000000);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    case PROP_PARENT_OBJECT:
+        usb_mtp_add_u16(d, PROP_PARENT_OBJECT);
+        usb_mtp_add_u16(d, DATA_TYPE_UINT32);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    case PROP_PERSISTENT_UNIQUE_OBJECT_IDENTIFIER:
+        usb_mtp_add_u16(d, PROP_PERSISTENT_UNIQUE_OBJECT_IDENTIFIER);
+        usb_mtp_add_u16(d, DATA_TYPE_UINT128);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u64(d, 0x0000000000000000);
+        usb_mtp_add_u64(d, 0x0000000000000000);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    case PROP_NAME:
+        usb_mtp_add_u16(d, PROP_NAME);
+        usb_mtp_add_u16(d, DATA_TYPE_STRING);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u8(d, 0x00);
+        usb_mtp_add_u32(d, 0x00000000);
+        usb_mtp_add_u8(d, 0x00);
+        break;
+    default:
+        usb_mtp_data_free(d);
+        return NULL;
+    }
+
+    return d;
+}
+
+static MTPData *usb_mtp_get_object_prop_value(MTPState *s, MTPControl *c,
+                                              MTPObject *o)
+{
+    MTPData *d = usb_mtp_data_alloc(c);
+    switch (c->argv[1]) {
+    case PROP_STORAGE_ID:
+        usb_mtp_add_u32(d, QEMU_STORAGE_ID);
+        break;
+    case PROP_OBJECT_FORMAT:
+        usb_mtp_add_u16(d, o->format);
+        break;
+    case PROP_OBJECT_COMPRESSED_SIZE:
+        usb_mtp_add_u64(d, o->stat.st_size);
+        break;
+    case PROP_PARENT_OBJECT:
+        if (o->parent == NULL) {
+            usb_mtp_add_u32(d, 0x00000000);
+        } else {
+            usb_mtp_add_u32(d, o->parent->handle);
+        }
+        break;
+    case PROP_PERSISTENT_UNIQUE_OBJECT_IDENTIFIER:
+        /* Should be persistant between sessions,
+         * but using our objedt ID is "good enough"
+         * for now */
+        usb_mtp_add_u64(d, 0x0000000000000000);
+        usb_mtp_add_u64(d, o->handle);
+        break;
+    case PROP_NAME:
+        usb_mtp_add_str(d, o->name);
+        break;
+    default:
+        usb_mtp_data_free(d);
+        return NULL;
+    }
+
+    return d;
+}
+
 static void usb_mtp_command(MTPState *s, MTPControl *c)
 {
     MTPData *data_in = NULL;
@@ -1113,6 +1257,43 @@ static void usb_mtp_command(MTPState *s, MTPControl *c)
         nres = 1;
         res0 = data_in->length;
         break;
+    case CMD_GET_OBJECT_PROPS_SUPPORTED:
+        if (c->argv[0] != FMT_UNDEFINED_OBJECT &&
+            c->argv[0] != FMT_ASSOCIATION) {
+            usb_mtp_queue_result(s, RES_INVALID_OBJECT_FORMAT_CODE,
+                                 c->trans, 0, 0, 0);
+            return;
+        }
+        data_in = usb_mtp_get_object_props_supported(s, c);
+        break;
+    case CMD_GET_OBJECT_PROP_DESC:
+        if (c->argv[1] != FMT_UNDEFINED_OBJECT &&
+            c->argv[1] != FMT_ASSOCIATION) {
+            usb_mtp_queue_result(s, RES_INVALID_OBJECT_FORMAT_CODE,
+                                 c->trans, 0, 0, 0);
+            return;
+        }
+        data_in = usb_mtp_get_object_prop_desc(s, c);
+        if (data_in == NULL) {
+            usb_mtp_queue_result(s, RES_INVALID_OBJECT_PROP_CODE,
+                                 c->trans, 0, 0, 0);
+            return;
+        }
+        break;
+    case CMD_GET_OBJECT_PROP_VALUE:
+        o = usb_mtp_object_lookup(s, c->argv[0]);
+        if (o == NULL) {
+            usb_mtp_queue_result(s, RES_INVALID_OBJECT_HANDLE,
+                                 c->trans, 0, 0, 0);
+            return;
+        }
+        data_in = usb_mtp_get_object_prop_value(s, c, o);
+        if (data_in == NULL) {
+            usb_mtp_queue_result(s, RES_INVALID_OBJECT_PROP_CODE,
+                                 c->trans, 0, 0, 0);
+            return;
+        }
+        break;
     default:
         trace_usb_mtp_op_unknown(s->dev.addr, c->code);
         usb_mtp_queue_result(s, RES_OPERATION_NOT_SUPPORTED,
@@ -1193,10 +1374,15 @@ static void usb_mtp_handle_data(USBDevice *dev, USBPacket *p)
         }
         if (s->data_in !=  NULL) {
             MTPData *d = s->data_in;
-            int dlen = d->length - d->offset;
+            uint64_t dlen = d->length - d->offset;
             if (d->first) {
                 trace_usb_mtp_data_in(s->dev.addr, d->trans, d->length);
-                container.length = cpu_to_le32(d->length + sizeof(container));
+                if (d->length + sizeof(container) > 0xFFFFFFFF) {
+                    container.length = cpu_to_le32(0xFFFFFFFF);
+                } else {
+                    container.length =
+                        cpu_to_le32(d->length + sizeof(container));
+                }
                 container.type   = cpu_to_le16(TYPE_DATA);
                 container.code   = cpu_to_le16(d->code);
                 container.trans  = cpu_to_le32(d->trans);
diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index ba8538e60e..6066d9b0f7 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c
@@ -103,7 +103,7 @@ typedef struct {
     uint8_t event_trigger;
     QEMUSerialSetParams params;
     int latency;        /* ms */
-    CharDriverState *cs;
+    CharBackend cs;
 } USBSerialState;
 
 #define TYPE_USB_SERIAL "usb-serial-dev"
@@ -209,8 +209,10 @@ static uint8_t usb_get_modem_lines(USBSerialState *s)
     int flags;
     uint8_t ret;
 
-    if (qemu_chr_fe_ioctl(s->cs, CHR_IOCTL_SERIAL_GET_TIOCM, &flags) == -ENOTSUP)
+    if (qemu_chr_fe_ioctl(&s->cs,
+                          CHR_IOCTL_SERIAL_GET_TIOCM, &flags) == -ENOTSUP) {
         return FTDI_CTS|FTDI_DSR|FTDI_RLSD;
+    }
 
     ret = 0;
     if (flags & CHR_TIOCM_CTS)
@@ -260,7 +262,7 @@ static void usb_serial_handle_control(USBDevice *dev, USBPacket *p,
     case DeviceOutVendor | FTDI_SET_MDM_CTRL:
     {
         static int flags;
-        qemu_chr_fe_ioctl(s->cs,CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
+        qemu_chr_fe_ioctl(&s->cs, CHR_IOCTL_SERIAL_GET_TIOCM, &flags);
         if (value & FTDI_SET_RTS) {
             if (value & FTDI_RTS)
                 flags |= CHR_TIOCM_RTS;
@@ -273,7 +275,7 @@ static void usb_serial_handle_control(USBDevice *dev, USBPacket *p,
             else
                 flags &= ~CHR_TIOCM_DTR;
         }
-        qemu_chr_fe_ioctl(s->cs,CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
+        qemu_chr_fe_ioctl(&s->cs, CHR_IOCTL_SERIAL_SET_TIOCM, &flags);
         break;
     }
     case DeviceOutVendor | FTDI_SET_FLOW_CTRL:
@@ -292,7 +294,7 @@ static void usb_serial_handle_control(USBDevice *dev, USBPacket *p,
             divisor = 1;
 
         s->params.speed = (48000000 / 2) / (8 * divisor + subdivisor8);
-        qemu_chr_fe_ioctl(s->cs, CHR_IOCTL_SERIAL_SET_PARAMS, &s->params);
+        qemu_chr_fe_ioctl(&s->cs, CHR_IOCTL_SERIAL_SET_PARAMS, &s->params);
         break;
     }
     case DeviceOutVendor | FTDI_SET_DATA:
@@ -321,7 +323,7 @@ static void usb_serial_handle_control(USBDevice *dev, USBPacket *p,
                 DPRINTF("unsupported stop bits %d\n", value & FTDI_STOP);
                 goto fail;
         }
-        qemu_chr_fe_ioctl(s->cs, CHR_IOCTL_SERIAL_SET_PARAMS, &s->params);
+        qemu_chr_fe_ioctl(&s->cs, CHR_IOCTL_SERIAL_SET_PARAMS, &s->params);
         /* TODO: TX ON/OFF */
         break;
     case DeviceInVendor | FTDI_GET_MDM_ST:
@@ -366,7 +368,9 @@ static void usb_serial_handle_data(USBDevice *dev, USBPacket *p)
             goto fail;
         for (i = 0; i < p->iov.niov; i++) {
             iov = p->iov.iov + i;
-            qemu_chr_fe_write(s->cs, iov->iov_base, iov->iov_len);
+            /* XXX this blocks entire thread. Rewrite to use
+             * qemu_chr_fe_write and background I/O callbacks */
+            qemu_chr_fe_write_all(&s->cs, iov->iov_base, iov->iov_len);
         }
         p->actual_length = p->iov.size;
         break;
@@ -462,8 +466,6 @@ static void usb_serial_event(void *opaque, int event)
         case CHR_EVENT_BREAK:
             s->event_trigger |= FTDI_BI;
             break;
-        case CHR_EVENT_FOCUS:
-            break;
         case CHR_EVENT_OPENED:
             if (!s->dev.attached) {
                 usb_device_attach(&s->dev, &error_abort);
@@ -481,12 +483,13 @@ static void usb_serial_realize(USBDevice *dev, Error **errp)
 {
     USBSerialState *s = USB_SERIAL_DEV(dev);
     Error *local_err = NULL;
+    CharDriverState *chr = qemu_chr_fe_get_driver(&s->cs);
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
     dev->auto_attach = 0;
 
-    if (!s->cs) {
+    if (!chr) {
         error_setg(errp, "Property chardev is required");
         return;
     }
@@ -497,11 +500,11 @@ static void usb_serial_realize(USBDevice *dev, Error **errp)
         return;
     }
 
-    qemu_chr_add_handlers(s->cs, usb_serial_can_read, usb_serial_read,
-                          usb_serial_event, s);
+    qemu_chr_fe_set_handlers(&s->cs, usb_serial_can_read, usb_serial_read,
+                             usb_serial_event, s, NULL, true);
     usb_serial_handle_reset(dev);
 
-    if (s->cs->be_open && !dev->attached) {
+    if (chr->be_open && !dev->attached) {
         usb_device_attach(dev, &error_abort);
     }
 }
@@ -545,7 +548,7 @@ static USBDevice *usb_serial_init(USBBus *bus, const char *filename)
     filename++;
 
     snprintf(label, sizeof(label), "usbserial%d", index++);
-    cdrv = qemu_chr_new(label, filename, NULL);
+    cdrv = qemu_chr_new(label, filename);
     if (!cdrv)
         return NULL;
 
@@ -563,7 +566,7 @@ static USBDevice *usb_braille_init(USBBus *bus, const char *unused)
     USBDevice *dev;
     CharDriverState *cdrv;
 
-    cdrv = qemu_chr_new("braille", "braille", NULL);
+    cdrv = qemu_chr_new("braille", "braille");
     if (!cdrv)
         return NULL;
 
diff --git a/hw/usb/dev-smartcard-reader.c b/hw/usb/dev-smartcard-reader.c
index af4b851356..89e11b68c4 100644
--- a/hw/usb/dev-smartcard-reader.c
+++ b/hw/usb/dev-smartcard-reader.c
@@ -508,14 +508,14 @@ static void ccid_card_apdu_from_guest(CCIDCardState *card,
     }
 }
 
-static int ccid_card_exitfn(CCIDCardState *card)
+static void ccid_card_exitfn(CCIDCardState *card)
 {
     CCIDCardClass *cc = CCID_CARD_GET_CLASS(card);
 
     if (cc->exitfn) {
-        return cc->exitfn(card);
+        cc->exitfn(card);
     }
-    return 0;
+
 }
 
 static int ccid_card_initfn(CCIDCardState *card)
@@ -1279,7 +1279,6 @@ void ccid_card_card_inserted(CCIDCardState *card)
 
 static int ccid_card_exit(DeviceState *qdev)
 {
-    int ret = 0;
     CCIDCardState *card = CCID_CARD(qdev);
     USBDevice *dev = USB_DEVICE(qdev->parent_bus->parent);
     USBCCIDState *s = USB_CCID_DEV(dev);
@@ -1287,9 +1286,9 @@ static int ccid_card_exit(DeviceState *qdev)
     if (ccid_card_inserted(s)) {
         ccid_card_card_removed(card);
     }
-    ret = ccid_card_exitfn(card);
+    ccid_card_exitfn(card);
     s->card = NULL;
-    return ret;
+    return 0;
 }
 
 static int ccid_card_init(DeviceState *qdev)
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 7bdac64253..983169b4cb 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1190,6 +1190,7 @@ static int ehci_init_transfer(EHCIPacket *p)
     while (bytes > 0) {
         if (cpage > 4) {
             fprintf(stderr, "cpage out of range (%d)\n", cpage);
+            qemu_sglist_destroy(&p->sgl);
             return -1;
         }
 
@@ -1426,6 +1427,7 @@ static int ehci_process_itd(EHCIState *ehci,
             if (off + len > 4096) {
                 /* transfer crosses page border */
                 if (pg == 6) {
+                    qemu_sglist_destroy(&ehci->isgl);
                     return -1;  /* avoid page pg + 1 */
                 }
                 ptr2 = (itd->bufptr[pg + 1] & ITD_BUFPTR_MASK);
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index fa5703832c..c82a92fff7 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -2139,7 +2139,7 @@ static const TypeInfo ohci_pci_info = {
 
 static Property ohci_sysbus_properties[] = {
     DEFINE_PROP_UINT32("num-ports", OHCISysBusState, num_ports, 3),
-    DEFINE_PROP_DMAADDR("dma-offset", OHCISysBusState, dma_offset, 3),
+    DEFINE_PROP_DMAADDR("dma-offset", OHCISysBusState, dma_offset, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 188f95416a..4acf0c6dd8 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -21,6 +21,7 @@
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "qemu/timer.h"
+#include "qemu/queue.h"
 #include "hw/usb.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
@@ -46,14 +47,14 @@
 #define MAXSLOTS 64
 #define MAXINTRS 16
 
-#define TD_QUEUE 24
-
 /* Very pessimistic, let's hope it's enough for all cases */
-#define EV_QUEUE (((3*TD_QUEUE)+16)*MAXSLOTS)
+#define EV_QUEUE (((3 * 24) + 16) * MAXSLOTS)
 /* Do not deliver ER Full events. NEC's driver does some things not bound
  * to the specs when it gets them */
 #define ER_FULL_HACK
 
+#define TRB_LINK_LIMIT  4
+
 #define LEN_CAP         0x40
 #define LEN_OPER        (0x400 + 0x10 * MAXPORTS)
 #define LEN_RUNTIME     ((MAXINTRS + 1) * 0x20)
@@ -343,7 +344,7 @@ typedef struct XHCIPort {
 } XHCIPort;
 
 typedef struct XHCITransfer {
-    XHCIState *xhci;
+    XHCIEPContext *epctx;
     USBPacket packet;
     QEMUSGList sgl;
     bool running_async;
@@ -351,15 +352,12 @@ typedef struct XHCITransfer {
     bool complete;
     bool int_req;
     unsigned int iso_pkts;
-    unsigned int slotid;
-    unsigned int epid;
     unsigned int streamid;
     bool in_xfer;
     bool iso_xfer;
     bool timed_xfer;
 
     unsigned int trb_count;
-    unsigned int trb_alloced;
     XHCITRB *trbs;
 
     TRBCCode status;
@@ -369,6 +367,8 @@ typedef struct XHCITransfer {
     unsigned int cur_pkt;
 
     uint64_t mfindex_kick;
+
+    QTAILQ_ENTRY(XHCITransfer) next;
 } XHCITransfer;
 
 struct XHCIStreamContext {
@@ -383,9 +383,8 @@ struct XHCIEPContext {
     unsigned int epid;
 
     XHCIRing ring;
-    unsigned int next_xfer;
-    unsigned int comp_xfer;
-    XHCITransfer transfers[TD_QUEUE];
+    uint32_t xfer_count;
+    QTAILQ_HEAD(, XHCITransfer) transfers;
     XHCITransfer *retry;
     EPType type;
     dma_addr_t pctx;
@@ -508,13 +507,13 @@ enum xhci_flags {
 
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
                          unsigned int epid, unsigned int streamid);
+static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid);
 static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid,
                                 unsigned int epid);
 static void xhci_xfer_report(XHCITransfer *xfer);
 static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v);
 static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v);
-static USBEndpoint *xhci_epid_to_usbep(XHCIState *xhci,
-                                       unsigned int slotid, unsigned int epid);
+static USBEndpoint *xhci_epid_to_usbep(XHCIEPContext *epctx);
 
 static const char *TRBType_names[] = {
     [TRB_RESERVED]                     = "TRB_RESERVED",
@@ -1000,6 +999,7 @@ static TRBType xhci_ring_fetch(XHCIState *xhci, XHCIRing *ring, XHCITRB *trb,
                                dma_addr_t *addr)
 {
     PCIDevice *pci_dev = PCI_DEVICE(xhci);
+    uint32_t link_cnt = 0;
 
     while (1) {
         TRBType type;
@@ -1026,6 +1026,9 @@ static TRBType xhci_ring_fetch(XHCIState *xhci, XHCIRing *ring, XHCITRB *trb,
             ring->dequeue += TRB_SIZE;
             return type;
         } else {
+            if (++link_cnt > TRB_LINK_LIMIT) {
+                return 0;
+            }
             ring->dequeue = xhci_mask64(trb->parameter);
             if (trb->control & TRB_LK_TC) {
                 ring->ccs = !ring->ccs;
@@ -1043,6 +1046,7 @@ static int xhci_ring_chain_length(XHCIState *xhci, const XHCIRing *ring)
     bool ccs = ring->ccs;
     /* hack to bundle together the two/three TDs that make a setup transfer */
     bool control_td_set = 0;
+    uint32_t link_cnt = 0;
 
     while (1) {
         TRBType type;
@@ -1058,6 +1062,9 @@ static int xhci_ring_chain_length(XHCIState *xhci, const XHCIRing *ring)
         type = TRB_TYPE(trb);
 
         if (type == TR_LINK) {
+            if (++link_cnt > TRB_LINK_LIMIT) {
+                return -length;
+            }
             dequeue = xhci_mask64(trb.parameter);
             if (trb.control & TRB_LK_TC) {
                 ccs = !ccs;
@@ -1192,7 +1199,7 @@ static int xhci_epmask_to_eps_with_streams(XHCIState *xhci,
         }
 
         epctx = slot->eps[i - 1];
-        ep = xhci_epid_to_usbep(xhci, slotid, i);
+        ep = xhci_epid_to_usbep(epctx);
         if (!epctx || !epctx->nr_pstreams || !ep) {
             continue;
         }
@@ -1353,7 +1360,7 @@ static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx,
 static void xhci_ep_kick_timer(void *opaque)
 {
     XHCIEPContext *epctx = opaque;
-    xhci_kick_ep(epctx->xhci, epctx->slotid, epctx->epid, 0);
+    xhci_kick_epctx(epctx, 0);
 }
 
 static XHCIEPContext *xhci_alloc_epctx(XHCIState *xhci,
@@ -1361,19 +1368,13 @@ static XHCIEPContext *xhci_alloc_epctx(XHCIState *xhci,
                                        unsigned int epid)
 {
     XHCIEPContext *epctx;
-    int i;
 
     epctx = g_new0(XHCIEPContext, 1);
     epctx->xhci = xhci;
     epctx->slotid = slotid;
     epctx->epid = epid;
 
-    for (i = 0; i < ARRAY_SIZE(epctx->transfers); i++) {
-        epctx->transfers[i].xhci = xhci;
-        epctx->transfers[i].slotid = slotid;
-        epctx->transfers[i].epid = epid;
-        usb_packet_init(&epctx->transfers[i].packet);
-    }
+    QTAILQ_INIT(&epctx->transfers);
     epctx->kick_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, xhci_ep_kick_timer, epctx);
 
     return epctx;
@@ -1434,6 +1435,38 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
     return CC_SUCCESS;
 }
 
+static XHCITransfer *xhci_ep_alloc_xfer(XHCIEPContext *epctx,
+                                        uint32_t length)
+{
+    uint32_t limit = epctx->nr_pstreams + 16;
+    XHCITransfer *xfer;
+
+    if (epctx->xfer_count >= limit) {
+        return NULL;
+    }
+
+    xfer = g_new0(XHCITransfer, 1);
+    xfer->epctx = epctx;
+    xfer->trbs = g_new(XHCITRB, length);
+    xfer->trb_count = length;
+    usb_packet_init(&xfer->packet);
+
+    QTAILQ_INSERT_TAIL(&epctx->transfers, xfer, next);
+    epctx->xfer_count++;
+
+    return xfer;
+}
+
+static void xhci_ep_free_xfer(XHCITransfer *xfer)
+{
+    QTAILQ_REMOVE(&xfer->epctx->transfers, xfer, next);
+    xfer->epctx->xfer_count--;
+
+    usb_packet_cleanup(&xfer->packet);
+    g_free(xfer->trbs);
+    g_free(xfer);
+}
+
 static int xhci_ep_nuke_one_xfer(XHCITransfer *t, TRBCCode report)
 {
     int killed = 0;
@@ -1449,10 +1482,9 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t, TRBCCode report)
         killed = 1;
     }
     if (t->running_retry) {
-        XHCIEPContext *epctx = t->xhci->slots[t->slotid-1].eps[t->epid-1];
-        if (epctx) {
-            epctx->retry = NULL;
-            timer_del(epctx->kick_timer);
+        if (t->epctx) {
+            t->epctx->retry = NULL;
+            timer_del(t->epctx->kick_timer);
         }
         t->running_retry = 0;
         killed = 1;
@@ -1460,7 +1492,7 @@ static int xhci_ep_nuke_one_xfer(XHCITransfer *t, TRBCCode report)
     g_free(t->trbs);
 
     t->trbs = NULL;
-    t->trb_count = t->trb_alloced = 0;
+    t->trb_count = 0;
 
     return killed;
 }
@@ -1470,7 +1502,8 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
 {
     XHCISlot *slot;
     XHCIEPContext *epctx;
-    int i, xferi, killed = 0;
+    XHCITransfer *xfer;
+    int killed = 0;
     USBEndpoint *ep = NULL;
     assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
@@ -1485,17 +1518,19 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
 
     epctx = slot->eps[epid-1];
 
-    xferi = epctx->next_xfer;
-    for (i = 0; i < TD_QUEUE; i++) {
-        killed += xhci_ep_nuke_one_xfer(&epctx->transfers[xferi], report);
+    for (;;) {
+        xfer = QTAILQ_FIRST(&epctx->transfers);
+        if (xfer == NULL) {
+            break;
+        }
+        killed += xhci_ep_nuke_one_xfer(xfer, report);
         if (killed) {
             report = 0; /* Only report once */
         }
-        epctx->transfers[xferi].packet.ep = NULL;
-        xferi = (xferi + 1) % TD_QUEUE;
+        xhci_ep_free_xfer(xfer);
     }
 
-    ep = xhci_epid_to_usbep(xhci, slotid, epid);
+    ep = xhci_epid_to_usbep(epctx);
     if (ep) {
         usb_device_ep_stopped(ep->dev, ep);
     }
@@ -1507,7 +1542,6 @@ static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid,
 {
     XHCISlot *slot;
     XHCIEPContext *epctx;
-    int i;
 
     trace_usb_xhci_ep_disable(slotid, epid);
     assert(slotid >= 1 && slotid <= xhci->numslots);
@@ -1528,10 +1562,6 @@ static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid,
         xhci_free_streams(epctx);
     }
 
-    for (i = 0; i < ARRAY_SIZE(epctx->transfers); i++) {
-        usb_packet_cleanup(&epctx->transfers[i].packet);
-    }
-
     /* only touch guest RAM if we're not resetting the HC */
     if (xhci->dcbaap_low || xhci->dcbaap_high) {
         xhci_set_ep_state(xhci, epctx, NULL, EP_DISABLED);
@@ -1684,7 +1714,7 @@ static TRBCCode xhci_set_ep_dequeue(XHCIState *xhci, unsigned int slotid,
 
 static int xhci_xfer_create_sgl(XHCITransfer *xfer, int in_xfer)
 {
-    XHCIState *xhci = xfer->xhci;
+    XHCIState *xhci = xfer->epctx->xhci;
     int i;
 
     xfer->int_req = false;
@@ -1743,7 +1773,7 @@ static void xhci_xfer_report(XHCITransfer *xfer)
     bool reported = 0;
     bool shortpkt = 0;
     XHCIEvent event = {ER_TRANSFER, CC_SUCCESS};
-    XHCIState *xhci = xfer->xhci;
+    XHCIState *xhci = xfer->epctx->xhci;
     int i;
 
     left = xfer->packet.actual_length;
@@ -1753,6 +1783,12 @@ static void xhci_xfer_report(XHCITransfer *xfer)
         unsigned int chunk = 0;
 
         switch (TRB_TYPE(*trb)) {
+        case TR_SETUP:
+            chunk = trb->status & 0x1ffff;
+            if (chunk > 8) {
+                chunk = 8;
+            }
+            break;
         case TR_DATA:
         case TR_NORMAL:
         case TR_ISOCH:
@@ -1775,8 +1811,8 @@ static void xhci_xfer_report(XHCITransfer *xfer)
         if (!reported && ((trb->control & TRB_TR_IOC) ||
                           (shortpkt && (trb->control & TRB_TR_ISP)) ||
                           (xfer->status != CC_SUCCESS && left == 0))) {
-            event.slotid = xfer->slotid;
-            event.epid = xfer->epid;
+            event.slotid = xfer->epctx->slotid;
+            event.epid = xfer->epctx->epid;
             event.length = (trb->status & 0x1ffff) - chunk;
             event.flags = 0;
             event.ptr = trb->addr;
@@ -1811,9 +1847,8 @@ static void xhci_xfer_report(XHCITransfer *xfer)
 
 static void xhci_stall_ep(XHCITransfer *xfer)
 {
-    XHCIState *xhci = xfer->xhci;
-    XHCISlot *slot = &xhci->slots[xfer->slotid-1];
-    XHCIEPContext *epctx = slot->eps[xfer->epid-1];
+    XHCIEPContext *epctx = xfer->epctx;
+    XHCIState *xhci = epctx->xhci;
     uint32_t err;
     XHCIStreamContext *sctx;
 
@@ -1837,7 +1872,6 @@ static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
 
 static int xhci_setup_packet(XHCITransfer *xfer)
 {
-    XHCIState *xhci = xfer->xhci;
     USBEndpoint *ep;
     int dir;
 
@@ -1846,7 +1880,7 @@ static int xhci_setup_packet(XHCITransfer *xfer)
     if (xfer->packet.ep) {
         ep = xfer->packet.ep;
     } else {
-        ep = xhci_epid_to_usbep(xhci, xfer->slotid, xfer->epid);
+        ep = xhci_epid_to_usbep(xfer->epctx);
         if (!ep) {
             DPRINTF("xhci: slot %d has no device\n",
                     xfer->slotid);
@@ -1926,7 +1960,8 @@ static int xhci_fire_ctl_transfer(XHCIState *xhci, XHCITransfer *xfer)
     trb_setup = &xfer->trbs[0];
     trb_status = &xfer->trbs[xfer->trb_count-1];
 
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid, xfer->streamid);
+    trace_usb_xhci_xfer_start(xfer, xfer->epctx->slotid,
+                              xfer->epctx->epid, xfer->streamid);
 
     /* at most one Event Data TRB allowed after STATUS */
     if (TRB_TYPE(*trb_status) == TR_EVDATA && xfer->trb_count > 2) {
@@ -1969,7 +2004,7 @@ static int xhci_fire_ctl_transfer(XHCIState *xhci, XHCITransfer *xfer)
 
     xhci_complete_packet(xfer);
     if (!xfer->running_async && !xfer->running_retry) {
-        xhci_kick_ep(xhci, xfer->slotid, xfer->epid, 0);
+        xhci_kick_epctx(xfer->epctx, 0);
     }
     return 0;
 }
@@ -2073,29 +2108,23 @@ static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx
 
     xhci_complete_packet(xfer);
     if (!xfer->running_async && !xfer->running_retry) {
-        xhci_kick_ep(xhci, xfer->slotid, xfer->epid, xfer->streamid);
+        xhci_kick_epctx(xfer->epctx, xfer->streamid);
     }
     return 0;
 }
 
 static int xhci_fire_transfer(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx)
 {
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid, xfer->streamid);
+    trace_usb_xhci_xfer_start(xfer, xfer->epctx->slotid,
+                              xfer->epctx->epid, xfer->streamid);
     return xhci_submit(xhci, xfer, epctx);
 }
 
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
                          unsigned int epid, unsigned int streamid)
 {
-    XHCIStreamContext *stctx;
     XHCIEPContext *epctx;
-    XHCIRing *ring;
-    USBEndpoint *ep = NULL;
-    uint64_t mfindex;
-    int length;
-    int i;
 
-    trace_usb_xhci_ep_kick(slotid, epid, streamid);
     assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
 
@@ -2110,11 +2139,27 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
         return;
     }
 
+    xhci_kick_epctx(epctx, streamid);
+}
+
+static void xhci_kick_epctx(XHCIEPContext *epctx, unsigned int streamid)
+{
+    XHCIState *xhci = epctx->xhci;
+    XHCIStreamContext *stctx;
+    XHCITransfer *xfer;
+    XHCIRing *ring;
+    USBEndpoint *ep = NULL;
+    uint64_t mfindex;
+    int length;
+    int i;
+
+    trace_usb_xhci_ep_kick(epctx->slotid, epctx->epid, streamid);
+
     /* If the device has been detached, but the guest has not noticed this
        yet the 2 above checks will succeed, but we must NOT continue */
-    if (!xhci->slots[slotid - 1].uport ||
-        !xhci->slots[slotid - 1].uport->dev ||
-        !xhci->slots[slotid - 1].uport->dev->attached) {
+    if (!xhci->slots[epctx->slotid - 1].uport ||
+        !xhci->slots[epctx->slotid - 1].uport->dev ||
+        !xhci->slots[epctx->slotid - 1].uport->dev->attached) {
         return;
     }
 
@@ -2153,6 +2198,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
             xhci_complete_packet(xfer);
         }
         assert(!xfer->running_retry);
+        xhci_ep_free_xfer(epctx->retry);
         epctx->retry = NULL;
     }
 
@@ -2178,27 +2224,14 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
     assert(ring->dequeue != 0);
 
     while (1) {
-        XHCITransfer *xfer = &epctx->transfers[epctx->next_xfer];
-        if (xfer->running_async || xfer->running_retry) {
-            break;
-        }
         length = xhci_ring_chain_length(xhci, ring);
-        if (length < 0) {
+        if (length <= 0) {
             break;
-        } else if (length == 0) {
-            break;
-        }
-        if (xfer->trbs && xfer->trb_alloced < length) {
-            xfer->trb_count = 0;
-            xfer->trb_alloced = 0;
-            g_free(xfer->trbs);
-            xfer->trbs = NULL;
         }
-        if (!xfer->trbs) {
-            xfer->trbs = g_new(XHCITRB, length);
-            xfer->trb_alloced = length;
+        xfer = xhci_ep_alloc_xfer(epctx, length);
+        if (xfer == NULL) {
+            break;
         }
-        xfer->trb_count = length;
 
         for (i = 0; i < length; i++) {
             TRBType type;
@@ -2207,33 +2240,27 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
         }
         xfer->streamid = streamid;
 
-        if (epid == 1) {
-            if (xhci_fire_ctl_transfer(xhci, xfer) >= 0) {
-                epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
-            } else {
-                DPRINTF("xhci: error firing CTL transfer\n");
-            }
+        if (epctx->epid == 1) {
+            xhci_fire_ctl_transfer(xhci, xfer);
         } else {
-            if (xhci_fire_transfer(xhci, xfer, epctx) >= 0) {
-                epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
-            } else {
-                if (!xfer->timed_xfer) {
-                    DPRINTF("xhci: error firing data transfer\n");
-                }
-            }
+            xhci_fire_transfer(xhci, xfer, epctx);
+        }
+        if (xfer->complete) {
+            xhci_ep_free_xfer(xfer);
+            xfer = NULL;
         }
 
         if (epctx->state == EP_HALTED) {
             break;
         }
-        if (xfer->running_retry) {
+        if (xfer != NULL && xfer->running_retry) {
             DPRINTF("xhci: xfer nacked, stopping schedule\n");
             epctx->retry = xfer;
             break;
         }
     }
 
-    ep = xhci_epid_to_usbep(xhci, slotid, epid);
+    ep = xhci_epid_to_usbep(epctx);
     if (ep) {
         usb_device_flush_ep_queue(ep->dev, ep);
     }
@@ -3464,7 +3491,10 @@ static void xhci_complete(USBPort *port, USBPacket *packet)
         return;
     }
     xhci_complete_packet(xfer);
-    xhci_kick_ep(xfer->xhci, xfer->slotid, xfer->epid, xfer->streamid);
+    xhci_kick_epctx(xfer->epctx, xfer->streamid);
+    if (xfer->complete) {
+        xhci_ep_free_xfer(xfer);
+    }
 }
 
 static void xhci_child_detach(USBPort *uport, USBDevice *child)
@@ -3495,17 +3525,20 @@ static int xhci_find_epid(USBEndpoint *ep)
     }
 }
 
-static USBEndpoint *xhci_epid_to_usbep(XHCIState *xhci,
-                                       unsigned int slotid, unsigned int epid)
+static USBEndpoint *xhci_epid_to_usbep(XHCIEPContext *epctx)
 {
-    assert(slotid >= 1 && slotid <= xhci->numslots);
+    USBPort *uport;
+    uint32_t token;
 
-    if (!xhci->slots[slotid - 1].uport) {
+    if (!epctx) {
         return NULL;
     }
-
-    return usb_ep_get(xhci->slots[slotid - 1].uport->dev,
-                      (epid & 1) ? USB_TOKEN_IN : USB_TOKEN_OUT, epid >> 1);
+    uport = epctx->xhci->slots[epctx->slotid - 1].uport;
+    token = (epctx->epid & 1) ? USB_TOKEN_IN : USB_TOKEN_OUT;
+    if (!uport) {
+        return NULL;
+    }
+    return usb_ep_get(uport->dev, token, epctx->epid >> 1);
 }
 
 static void xhci_wakeup_endpoint(USBBus *bus, USBEndpoint *ep,
@@ -3709,8 +3742,7 @@ static void usb_xhci_exit(PCIDevice *dev)
     /* destroy msix memory region */
     if (dev->msix_table && dev->msix_pba
         && dev->msix_entry_used) {
-        memory_region_del_subregion(&xhci->mem, &dev->msix_table_mmio);
-        memory_region_del_subregion(&xhci->mem, &dev->msix_pba_mmio);
+        msix_uninit(dev, &xhci->mem, &xhci->mem);
     }
 
     usb_bus_release(&xhci->bus);
diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c
index e94672c155..bd81d71a98 100644
--- a/hw/usb/host-libusb.c
+++ b/hw/usb/host-libusb.c
@@ -743,10 +743,13 @@ static void usb_host_speed_compat(USBHostDevice *s)
                         rc = libusb_get_ss_endpoint_companion_descriptor
                             (ctx, endp, &endp_ss_comp);
                         if (rc == LIBUSB_SUCCESS) {
+                            int streams = endp_ss_comp->bmAttributes & 0x1f;
+                            if (streams) {
+                                compat_full = false;
+                                compat_high = false;
+                            }
                             libusb_free_ss_endpoint_companion_descriptor
                                 (endp_ss_comp);
-                            compat_full = false;
-                            compat_high = false;
                         }
 #endif
                         break;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 444672a000..a65723781e 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -105,7 +105,7 @@ struct PacketIdQueue {
 struct USBRedirDevice {
     USBDevice dev;
     /* Properties */
-    CharDriverState *cs;
+    CharBackend cs;
     uint8_t debug;
     char *filter_str;
     int32_t bootindex;
@@ -132,6 +132,7 @@ struct USBRedirDevice {
     struct usbredirfilter_rule *filter_rules;
     int filter_rules_count;
     int compatible_speedmask;
+    VMChangeStateEntry *vmstate;
 };
 
 #define TYPE_USB_REDIR "usb-redir"
@@ -283,9 +284,10 @@ static gboolean usbredir_write_unblocked(GIOChannel *chan, GIOCondition cond,
 static int usbredir_write(void *priv, uint8_t *data, int count)
 {
     USBRedirDevice *dev = priv;
+    CharDriverState *chr = qemu_chr_fe_get_driver(&dev->cs);
     int r;
 
-    if (!dev->cs->be_open) {
+    if (!chr->be_open) {
         return 0;
     }
 
@@ -294,10 +296,10 @@ static int usbredir_write(void *priv, uint8_t *data, int count)
         return 0;
     }
 
-    r = qemu_chr_fe_write(dev->cs, data, count);
+    r = qemu_chr_fe_write(&dev->cs, data, count);
     if (r < count) {
         if (!dev->watch) {
-            dev->watch = qemu_chr_fe_add_watch(dev->cs, G_IO_OUT|G_IO_HUP,
+            dev->watch = qemu_chr_fe_add_watch(&dev->cs, G_IO_OUT | G_IO_HUP,
                                                usbredir_write_unblocked, dev);
         }
         if (r < 0) {
@@ -1375,7 +1377,7 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
     USBRedirDevice *dev = USB_REDIRECT(udev);
     int i;
 
-    if (dev->cs == NULL) {
+    if (!qemu_chr_fe_get_driver(&dev->cs)) {
         error_setg(errp, QERR_MISSING_PARAMETER, "chardev");
         return;
     }
@@ -1406,10 +1408,12 @@ static void usbredir_realize(USBDevice *udev, Error **errp)
     dev->compatible_speedmask = USB_SPEED_MASK_FULL | USB_SPEED_MASK_HIGH;
 
     /* Let the backend know we are ready */
-    qemu_chr_add_handlers(dev->cs, usbredir_chardev_can_read,
-                          usbredir_chardev_read, usbredir_chardev_event, dev);
+    qemu_chr_fe_set_handlers(&dev->cs, usbredir_chardev_can_read,
+                             usbredir_chardev_read, usbredir_chardev_event,
+                             dev, NULL, true);
 
-    qemu_add_vm_change_state_handler(usbredir_vm_state_change, dev);
+    dev->vmstate =
+        qemu_add_vm_change_state_handler(usbredir_vm_state_change, dev);
 }
 
 static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
@@ -1426,9 +1430,11 @@ static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
 static void usbredir_handle_destroy(USBDevice *udev)
 {
     USBRedirDevice *dev = USB_REDIRECT(udev);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&dev->cs);
+
+    qemu_chr_fe_deinit(&dev->cs);
+    qemu_chr_delete(chr);
 
-    qemu_chr_delete(dev->cs);
-    dev->cs = NULL;
     /* Note must be done after qemu_chr_close, as that causes a close event */
     qemu_bh_delete(dev->chardev_close_bh);
     qemu_bh_delete(dev->device_reject_bh);
@@ -1446,6 +1452,7 @@ static void usbredir_handle_destroy(USBDevice *udev)
     }
 
     free(dev->filter_rules);
+    qemu_del_vm_change_state_handler(dev->vmstate);
 }
 
 static int usbredir_check_filter(USBRedirDevice *dev)
@@ -2036,18 +2043,22 @@ static void usbredir_interrupt_packet(void *priv, uint64_t id,
     }
 
     if (ep & USB_DIR_IN) {
+        bool q_was_empty;
+
         if (dev->endpoint[EP2I(ep)].interrupt_started == 0) {
             DPRINTF("received int packet while not started ep %02X\n", ep);
             free(data);
             return;
         }
 
-        if (QTAILQ_EMPTY(&dev->endpoint[EP2I(ep)].bufpq)) {
-            usb_wakeup(usb_ep_get(&dev->dev, USB_TOKEN_IN, ep & 0x0f), 0);
-        }
+        q_was_empty = QTAILQ_EMPTY(&dev->endpoint[EP2I(ep)].bufpq);
 
         /* bufp_alloc also adds the packet to the ep queue */
         bufp_alloc(dev, data, data_len, interrupt_packet->status, ep, data);
+
+        if (q_was_empty) {
+            usb_wakeup(usb_ep_get(&dev->dev, USB_TOKEN_IN, ep & 0x0f), 0);
+        }
     } else {
         /*
          * We report output interrupt packets as completed directly upon
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 174d715e3e..8e676e6c96 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -47,7 +47,7 @@
         struct timeval tv;                                          \
                                                                     \
         gettimeofday(&tv, NULL);                                    \
-        xen_be_printf(xendev, lvl, "%8ld.%06ld xen-usb(%s):" fmt,   \
+        xen_pv_printf(xendev, lvl, "%8ld.%06ld xen-usb(%s):" fmt,   \
                       tv.tv_sec, tv.tv_usec, __func__, ##args);     \
     }
 #define TR_BUS(xendev, fmt, args...) TR(xendev, 2, fmt, ##args)
@@ -153,15 +153,15 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
     }
 
     if (nr_segs > USBIF_MAX_SEGMENTS_PER_REQUEST) {
-        xen_be_printf(xendev, 0, "bad number of segments in request (%d)\n",
+        xen_pv_printf(xendev, 0, "bad number of segments in request (%d)\n",
                       nr_segs);
         return -EINVAL;
     }
 
     for (i = 0; i < nr_segs; i++) {
         if ((unsigned)usbback_req->req.seg[i].offset +
-            (unsigned)usbback_req->req.seg[i].length > PAGE_SIZE) {
-            xen_be_printf(xendev, 0, "segment crosses page boundary\n");
+            (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) {
+            xen_pv_printf(xendev, 0, "segment crosses page boundary\n");
             return -EINVAL;
         }
     }
@@ -183,7 +183,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
 
         for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
             seg = usbback_req->req.seg + i;
-            addr = usbback_req->buffer + i * PAGE_SIZE + seg->offset;
+            addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset;
             qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length);
         }
     }
@@ -199,7 +199,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
      */
 
     if (!usbback_req->nr_extra_segs) {
-        xen_be_printf(xendev, 0, "iso request without descriptor segments\n");
+        xen_pv_printf(xendev, 0, "iso request without descriptor segments\n");
         return -EINVAL;
     }
 
@@ -314,7 +314,7 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
         RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&usbif->urb_ring, notify);
 
         if (notify) {
-            xen_be_send_notify(xendev);
+            xen_pv_send_notify(xendev);
         }
     }
 
@@ -551,14 +551,14 @@ static void usbback_dispatch(struct usbback_req *usbback_req)
 
     ret = usbback_init_packet(usbback_req);
     if (ret) {
-        xen_be_printf(&usbif->xendev, 0, "invalid request\n");
+        xen_pv_printf(&usbif->xendev, 0, "invalid request\n");
         ret = -ESHUTDOWN;
         goto fail_free_urb;
     }
 
     ret = usbback_gnttab_map(usbback_req);
     if (ret) {
-        xen_be_printf(&usbif->xendev, 0, "invalid buffer, ret=%d\n", ret);
+        xen_pv_printf(&usbif->xendev, 0, "invalid buffer, ret=%d\n", ret);
         ret = -ESHUTDOWN;
         goto fail_free_urb;
     }
@@ -590,7 +590,7 @@ static void usbback_hotplug_notify(struct usbback_info *usbif)
 
     /* Check for full ring. */
     if ((RING_SIZE(ring) - ring->rsp_prod_pvt - ring->req_cons) == 0) {
-        xen_be_send_notify(&usbif->xendev);
+        xen_pv_send_notify(&usbif->xendev);
         return;
     }
 
@@ -609,7 +609,7 @@ static void usbback_hotplug_notify(struct usbback_info *usbif)
     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(ring, notify);
 
     if (notify) {
-        xen_be_send_notify(&usbif->xendev);
+        xen_pv_send_notify(&usbif->xendev);
     }
 
     TR_BUS(&usbif->xendev, "hotplug port %d speed %d\n", usb_hp->port,
@@ -646,7 +646,7 @@ static void usbback_bh(void *opaque)
 
     if (RING_REQUEST_PROD_OVERFLOW(urb_ring, rp)) {
         rc = urb_ring->rsp_prod_pvt;
-        xen_be_printf(&usbif->xendev, 0, "domU provided bogus ring requests "
+        xen_pv_printf(&usbif->xendev, 0, "domU provided bogus ring requests "
                       "(%#x - %#x = %u). Halting ring processing.\n",
                       rp, rc, rp - rc);
         usbif->ring_error = true;
@@ -712,15 +712,10 @@ static void usbback_portid_detach(struct usbback_info *usbif, unsigned port)
 
 static void usbback_portid_remove(struct usbback_info *usbif, unsigned port)
 {
-    USBPort *p;
-
     if (!usbif->ports[port - 1].dev) {
         return;
     }
 
-    p = &(usbif->ports[port - 1].port);
-    snprintf(p->path, sizeof(p->path), "%d", 99);
-
     object_unparent(OBJECT(usbif->ports[port - 1].dev));
     usbif->ports[port - 1].dev = NULL;
     usbback_portid_detach(usbif, port);
@@ -733,10 +728,10 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
 {
     unsigned speed;
     char *portname;
-    USBPort *p;
     Error *local_err = NULL;
     QDict *qdict;
     QemuOpts *opts;
+    char *tmp;
 
     if (usbif->ports[port - 1].dev) {
         return;
@@ -744,16 +739,21 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
 
     portname = strchr(busid, '-');
     if (!portname) {
-        xen_be_printf(&usbif->xendev, 0, "device %s illegal specification\n",
+        xen_pv_printf(&usbif->xendev, 0, "device %s illegal specification\n",
                       busid);
         return;
     }
     portname++;
-    p = &(usbif->ports[port - 1].port);
-    snprintf(p->path, sizeof(p->path), "%s", portname);
 
     qdict = qdict_new();
     qdict_put(qdict, "driver", qstring_from_str("usb-host"));
+    tmp = g_strdup_printf("%s.0", usbif->xendev.qdev.id);
+    qdict_put(qdict, "bus", qstring_from_str(tmp));
+    g_free(tmp);
+    tmp = g_strdup_printf("%s-%u", usbif->xendev.qdev.id, port);
+    qdict_put(qdict, "id", qstring_from_str(tmp));
+    g_free(tmp);
+    qdict_put(qdict, "port", qint_from_int(port));
     qdict_put(qdict, "hostbus", qint_from_int(atoi(busid)));
     qdict_put(qdict, "hostport", qstring_from_str(portname));
     opts = qemu_opts_from_qdict(qemu_find_opts("device"), qdict, &local_err);
@@ -765,7 +765,6 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
         goto err;
     }
     QDECREF(qdict);
-    snprintf(p->path, sizeof(p->path), "%d", port);
     speed = usbif->ports[port - 1].dev->speed;
     switch (speed) {
     case USB_SPEED_LOW:
@@ -783,7 +782,7 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
         break;
     }
     if (speed == USBIF_SPEED_NONE) {
-        xen_be_printf(&usbif->xendev, 0, "device %s wrong speed\n", busid);
+        xen_pv_printf(&usbif->xendev, 0, "device %s wrong speed\n", busid);
         object_unparent(OBJECT(usbif->ports[port - 1].dev));
         usbif->ports[port - 1].dev = NULL;
         return;
@@ -799,8 +798,7 @@ static void usbback_portid_add(struct usbback_info *usbif, unsigned port,
 
 err:
     QDECREF(qdict);
-    snprintf(p->path, sizeof(p->path), "%d", 99);
-    xen_be_printf(&usbif->xendev, 0, "device %s could not be opened\n", busid);
+    xen_pv_printf(&usbif->xendev, 0, "device %s could not be opened\n", busid);
 }
 
 static void usbback_process_port(struct usbback_info *usbif, unsigned port)
@@ -811,7 +809,7 @@ static void usbback_process_port(struct usbback_info *usbif, unsigned port)
     snprintf(node, sizeof(node), "port/%d", port);
     busid = xenstore_read_be_str(&usbif->xendev, node);
     if (busid == NULL) {
-        xen_be_printf(&usbif->xendev, 0, "xenstore_read %s failed\n", node);
+        xen_pv_printf(&usbif->xendev, 0, "xenstore_read %s failed\n", node);
         return;
     }
 
@@ -834,7 +832,7 @@ static void usbback_disconnect(struct XenDevice *xendev)
 
     usbif = container_of(xendev, struct usbback_info, xendev);
 
-    xen_be_unbind_evtchn(xendev);
+    xen_pv_unbind_evtchn(xendev);
 
     if (usbif->urb_sring) {
         xengnttab_unmap(xendev->gnttabdev, usbif->urb_sring, 1);
@@ -868,15 +866,15 @@ static int usbback_connect(struct XenDevice *xendev)
     usbif = container_of(xendev, struct usbback_info, xendev);
 
     if (xenstore_read_fe_int(xendev, "urb-ring-ref", &urb_ring_ref)) {
-        xen_be_printf(xendev, 0, "error reading urb-ring-ref\n");
+        xen_pv_printf(xendev, 0, "error reading urb-ring-ref\n");
         return -1;
     }
     if (xenstore_read_fe_int(xendev, "conn-ring-ref", &conn_ring_ref)) {
-        xen_be_printf(xendev, 0, "error reading conn-ring-ref\n");
+        xen_pv_printf(xendev, 0, "error reading conn-ring-ref\n");
         return -1;
     }
     if (xenstore_read_fe_int(xendev, "event-channel", &xendev->remote_port)) {
-        xen_be_printf(xendev, 0, "error reading event-channel\n");
+        xen_pv_printf(xendev, 0, "error reading event-channel\n");
         return -1;
     }
 
@@ -887,7 +885,7 @@ static int usbback_connect(struct XenDevice *xendev)
                                                 conn_ring_ref,
                                                 PROT_READ | PROT_WRITE);
     if (!usbif->urb_sring || !usbif->conn_sring) {
-        xen_be_printf(xendev, 0, "error mapping rings\n");
+        xen_pv_printf(xendev, 0, "error mapping rings\n");
         usbback_disconnect(xendev);
         return -1;
     }
@@ -899,7 +897,7 @@ static int usbback_connect(struct XenDevice *xendev)
 
     xen_be_bind_evtchn(xendev);
 
-    xen_be_printf(xendev, 1, "urb-ring-ref %d, conn-ring-ref %d, "
+    xen_pv_printf(xendev, 1, "urb-ring-ref %d, conn-ring-ref %d, "
                   "remote port %d, local port %d\n", urb_ring_ref,
                   conn_ring_ref, xendev->remote_port, xendev->local_port);
 
@@ -935,12 +933,12 @@ static int usbback_init(struct XenDevice *xendev)
 
     if (xenstore_read_be_int(xendev, "num-ports", &usbif->num_ports) ||
         usbif->num_ports < 1 || usbif->num_ports > USBBACK_MAXPORTS) {
-        xen_be_printf(xendev, 0, "num-ports not readable or out of bounds\n");
+        xen_pv_printf(xendev, 0, "num-ports not readable or out of bounds\n");
         return -1;
     }
     if (xenstore_read_be_int(xendev, "usb-ver", &usbif->usb_ver) ||
         (usbif->usb_ver != USB_VER_USB11 && usbif->usb_ver != USB_VER_USB20)) {
-        xen_be_printf(xendev, 0, "usb-ver not readable or out of bounds\n");
+        xen_pv_printf(xendev, 0, "usb-ver not readable or out of bounds\n");
         return -1;
     }
 
@@ -1012,13 +1010,13 @@ static void usbback_alloc(struct XenDevice *xendev)
 
     usbif = container_of(xendev, struct usbback_info, xendev);
 
-    usb_bus_new(&usbif->bus, sizeof(usbif->bus), &xen_usb_bus_ops, xen_sysdev);
+    usb_bus_new(&usbif->bus, sizeof(usbif->bus), &xen_usb_bus_ops,
+                DEVICE(&xendev->qdev));
     for (i = 0; i < USBBACK_MAXPORTS; i++) {
         p = &(usbif->ports[i].port);
         usb_register_port(&usbif->bus, p, usbif, i, &xen_usb_port_ops,
                           USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL |
                           USB_SPEED_MASK_HIGH);
-        snprintf(p->path, sizeof(p->path), "%d", 99);
     }
 
     QTAILQ_INIT(&usbif->req_free_q);
@@ -1028,7 +1026,7 @@ static void usbback_alloc(struct XenDevice *xendev)
     /* max_grants: for each request and for the rings (request and connect). */
     max_grants = USBIF_MAX_SEGMENTS_PER_REQUEST * USB_URB_RING_SIZE + 2;
     if (xengnttab_set_max_grants(xendev->gnttabdev, max_grants) < 0) {
-        xen_be_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+        xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
                       strerror(errno));
     }
 }
@@ -1066,7 +1064,6 @@ static int usbback_free(struct XenDevice *xendev)
     }
 
     usb_bus_release(&usbif->bus);
-    object_unparent(OBJECT(&usbif->bus));
 
     TR_BUS(xendev, "finished\n");
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b313e7c2c6..801578b4b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -34,6 +34,7 @@
 #include "qemu/range.h"
 #include "sysemu/kvm.h"
 #include "trace.h"
+#include "qapi/error.h"
 
 struct vfio_group_head vfio_group_list =
     QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -293,11 +294,10 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
            section->offset_within_address_space & (1ULL << 63);
 }
 
-static void vfio_iommu_map_notify(Notifier *n, void *data)
+static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
     VFIOContainer *container = giommu->container;
-    IOMMUTLBEntry *iotlb = data;
     hwaddr iova = iotlb->iova + giommu->iommu_offset;
     MemoryRegion *mr;
     hwaddr xlat;
@@ -454,6 +454,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
                                section->offset_within_region;
         giommu->container = container;
         giommu->n.notify = vfio_iommu_map_notify;
+        giommu->n.notifier_flags = IOMMU_NOTIFIER_ALL;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
         memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
@@ -609,16 +610,16 @@ vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
     return NULL;
 }
 
-static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
-                                           struct vfio_region_info *info)
+static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
+                                          struct vfio_region_info *info)
 {
     struct vfio_info_cap_header *hdr;
     struct vfio_region_info_cap_sparse_mmap *sparse;
-    int i;
+    int i, j;
 
     hdr = vfio_get_region_info_cap(info, VFIO_REGION_INFO_CAP_SPARSE_MMAP);
     if (!hdr) {
-        return;
+        return -ENODEV;
     }
 
     sparse = container_of(hdr, struct vfio_region_info_cap_sparse_mmap, header);
@@ -626,16 +627,24 @@ static void vfio_setup_region_sparse_mmaps(VFIORegion *region,
     trace_vfio_region_sparse_mmap_header(region->vbasedev->name,
                                          region->nr, sparse->nr_areas);
 
-    region->nr_mmaps = sparse->nr_areas;
-    region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
+    region->mmaps = g_new0(VFIOMmap, sparse->nr_areas);
 
-    for (i = 0; i < region->nr_mmaps; i++) {
-        region->mmaps[i].offset = sparse->areas[i].offset;
-        region->mmaps[i].size = sparse->areas[i].size;
-        trace_vfio_region_sparse_mmap_entry(i, region->mmaps[i].offset,
-                                            region->mmaps[i].offset +
-                                            region->mmaps[i].size);
+    for (i = 0, j = 0; i < sparse->nr_areas; i++) {
+        trace_vfio_region_sparse_mmap_entry(i, sparse->areas[i].offset,
+                                            sparse->areas[i].offset +
+                                            sparse->areas[i].size);
+
+        if (sparse->areas[i].size) {
+            region->mmaps[j].offset = sparse->areas[i].offset;
+            region->mmaps[j].size = sparse->areas[i].size;
+            j++;
+        }
     }
+
+    region->nr_mmaps = j;
+    region->mmaps = g_realloc(region->mmaps, j * sizeof(VFIOMmap));
+
+    return 0;
 }
 
 int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
@@ -661,12 +670,11 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
                               region, name, region->size);
 
         if (!vbasedev->no_mmap &&
-            region->flags & VFIO_REGION_INFO_FLAG_MMAP &&
-            !(region->size & ~qemu_real_host_page_mask)) {
+            region->flags & VFIO_REGION_INFO_FLAG_MMAP) {
 
-            vfio_setup_region_sparse_mmaps(region, info);
+            ret = vfio_setup_region_sparse_mmaps(region, info);
 
-            if (!region->nr_mmaps) {
+            if (ret) {
                 region->nr_mmaps = 1;
                 region->mmaps = g_new0(VFIOMmap, region->nr_mmaps);
                 region->mmaps[0].offset = 0;
@@ -723,12 +731,11 @@ int vfio_region_mmap(VFIORegion *region)
 
         name = g_strdup_printf("%s mmaps[%d]",
                                memory_region_name(region->mem), i);
-        memory_region_init_ram_ptr(&region->mmaps[i].mem,
-                                   memory_region_owner(region->mem),
-                                   name, region->mmaps[i].size,
-                                   region->mmaps[i].mmap);
+        memory_region_init_ram_device_ptr(&region->mmaps[i].mem,
+                                          memory_region_owner(region->mem),
+                                          name, region->mmaps[i].size,
+                                          region->mmaps[i].mmap);
         g_free(name);
-        memory_region_set_skip_dump(&region->mmaps[i].mem);
         memory_region_add_subregion(region->mem, region->mmaps[i].offset,
                                     &region->mmaps[i].mem);
 
@@ -900,7 +907,8 @@ static void vfio_put_address_space(VFIOAddressSpace *space)
     }
 }
 
-static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
+static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
+                                  Error **errp)
 {
     VFIOContainer *container;
     int ret, fd;
@@ -918,15 +926,15 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
     fd = qemu_open("/dev/vfio/vfio", O_RDWR);
     if (fd < 0) {
-        error_report("vfio: failed to open /dev/vfio/vfio: %m");
+        error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio");
         ret = -errno;
         goto put_space_exit;
     }
 
     ret = ioctl(fd, VFIO_GET_API_VERSION);
     if (ret != VFIO_API_VERSION) {
-        error_report("vfio: supported vfio version: %d, "
-                     "reported version: %d", VFIO_API_VERSION, ret);
+        error_setg(errp, "supported vfio version: %d, "
+                   "reported version: %d", VFIO_API_VERSION, ret);
         ret = -EINVAL;
         goto close_fd_exit;
     }
@@ -941,7 +949,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
         ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
         if (ret) {
-            error_report("vfio: failed to set group container: %m");
+            error_setg_errno(errp, errno, "failed to set group container");
             ret = -errno;
             goto free_container_exit;
         }
@@ -949,7 +957,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
         container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
         ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
         if (ret) {
-            error_report("vfio: failed to set iommu for container: %m");
+            error_setg_errno(errp, errno, "failed to set iommu for container");
             ret = -errno;
             goto free_container_exit;
         }
@@ -976,7 +984,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
         ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
         if (ret) {
-            error_report("vfio: failed to set group container: %m");
+            error_setg_errno(errp, errno, "failed to set group container");
             ret = -errno;
             goto free_container_exit;
         }
@@ -984,7 +992,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
             v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
         ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
         if (ret) {
-            error_report("vfio: failed to set iommu for container: %m");
+            error_setg_errno(errp, errno, "failed to set iommu for container");
             ret = -errno;
             goto free_container_exit;
         }
@@ -997,7 +1005,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
         if (!v2) {
             ret = ioctl(fd, VFIO_IOMMU_ENABLE);
             if (ret) {
-                error_report("vfio: failed to enable container: %m");
+                error_setg_errno(errp, errno, "failed to enable container");
                 ret = -errno;
                 goto free_container_exit;
             }
@@ -1008,7 +1016,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
                                      &address_space_memory);
             if (container->error) {
                 memory_listener_unregister(&container->prereg_listener);
-                error_report("vfio: RAM memory listener initialization failed for container");
+                ret = container->error;
+                error_setg(errp,
+                    "RAM memory listener initialization failed for container");
                 goto free_container_exit;
             }
         }
@@ -1016,7 +1026,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
         info.argsz = sizeof(info);
         ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
         if (ret) {
-            error_report("vfio: VFIO_IOMMU_SPAPR_TCE_GET_INFO failed: %m");
+            error_setg_errno(errp, errno,
+                             "VFIO_IOMMU_SPAPR_TCE_GET_INFO failed");
             ret = -errno;
             if (v2) {
                 memory_listener_unregister(&container->prereg_listener);
@@ -1033,6 +1044,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
              */
             ret = vfio_spapr_remove_window(container, info.dma32_window_start);
             if (ret) {
+                error_setg_errno(errp, -ret,
+                                 "failed to remove existing window");
                 goto free_container_exit;
             }
         } else {
@@ -1043,7 +1056,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
                               0x1000);
         }
     } else {
-        error_report("vfio: No available IOMMU models");
+        error_setg(errp, "No available IOMMU models");
         ret = -EINVAL;
         goto free_container_exit;
     }
@@ -1054,7 +1067,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as)
 
     if (container->error) {
         ret = container->error;
-        error_report("vfio: memory listener initialization failed for container");
+        error_setg_errno(errp, -ret,
+                         "memory listener initialization failed for container");
         goto listener_release_exit;
     }
 
@@ -1115,7 +1129,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
     }
 }
 
-VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
+VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
 {
     VFIOGroup *group;
     char path[32];
@@ -1127,8 +1141,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
             if (group->container->space->as == as) {
                 return group;
             } else {
-                error_report("vfio: group %d used in multiple address spaces",
-                             group->groupid);
+                error_setg(errp, "group %d used in multiple address spaces",
+                           group->groupid);
                 return NULL;
             }
         }
@@ -1139,27 +1153,29 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as)
     snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
     group->fd = qemu_open(path, O_RDWR);
     if (group->fd < 0) {
-        error_report("vfio: error opening %s: %m", path);
+        error_setg_errno(errp, errno, "failed to open %s", path);
         goto free_group_exit;
     }
 
     if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
-        error_report("vfio: error getting group status: %m");
+        error_setg_errno(errp, errno, "failed to get group %d status", groupid);
         goto close_fd_exit;
     }
 
     if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
-        error_report("vfio: error, group %d is not viable, please ensure "
-                     "all devices within the iommu_group are bound to their "
-                     "vfio bus driver.", groupid);
+        error_setg(errp, "group %d is not viable", groupid);
+        error_append_hint(errp,
+                          "Please ensure all devices within the iommu_group "
+                          "are bound to their vfio bus driver.\n");
         goto close_fd_exit;
     }
 
     group->groupid = groupid;
     QLIST_INIT(&group->device_list);
 
-    if (vfio_connect_container(group, as)) {
-        error_report("vfio: failed to setup container for group %d", groupid);
+    if (vfio_connect_container(group, as, errp)) {
+        error_prepend(errp, "failed to setup container for group %d: ",
+                      groupid);
         goto close_fd_exit;
     }
 
@@ -1201,23 +1217,24 @@ void vfio_put_group(VFIOGroup *group)
 }
 
 int vfio_get_device(VFIOGroup *group, const char *name,
-                       VFIODevice *vbasedev)
+                    VFIODevice *vbasedev, Error **errp)
 {
     struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
     int ret, fd;
 
     fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
     if (fd < 0) {
-        error_report("vfio: error getting device %s from group %d: %m",
-                     name, group->groupid);
-        error_printf("Verify all devices in group %d are bound to vfio-<bus> "
-                     "or pci-stub and not already in use\n", group->groupid);
+        error_setg_errno(errp, errno, "error getting device from group %d",
+                         group->groupid);
+        error_append_hint(errp,
+                      "Verify all devices in group %d are bound to vfio-<bus> "
+                      "or pci-stub and not already in use\n", group->groupid);
         return fd;
     }
 
     ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info);
     if (ret) {
-        error_report("vfio: error getting device info: %m");
+        error_setg_errno(errp, errno, "error getting device info");
         close(fd);
         return ret;
     }
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index bec694c8d8..811eecd1b4 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -898,7 +898,7 @@ static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
 {
     VFIOrtl8168Quirk *rtl = opaque;
     VFIOPCIDevice *vdev = rtl->vdev;
-    uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x74, size);
+    uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
 
     if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
         hwaddr offset = rtl->addr & 0xfff;
@@ -1056,7 +1056,7 @@ typedef struct VFIOIGDQuirk {
  * of the IGD device.
  */
 int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
-                               struct vfio_region_info *info)
+                               struct vfio_region_info *info, Error **errp)
 {
     int ret;
 
@@ -1064,7 +1064,7 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
     ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
                 info->size, info->offset);
     if (ret != info->size) {
-        error_report("vfio: Error reading IGD OpRegion");
+        error_setg(errp, "failed to read IGD OpRegion");
         g_free(vdev->igd_opregion);
         vdev->igd_opregion = NULL;
         return -EINVAL;
@@ -1363,6 +1363,7 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     uint64_t *bdsm_size;
     uint32_t gmch;
     uint16_t cmd_orig, cmd;
+    Error *err = NULL;
 
     /*
      * This must be an Intel VGA device at address 00:02.0 for us to even
@@ -1464,7 +1465,8 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
      * try to enable it.  Probably shouldn't be using legacy mode without VGA,
      * but also no point in us enabling VGA if disabled in hardware.
      */
-    if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev)) {
+    if (!(gmch & 0x2) && !vdev->vga && vfio_populate_vga(vdev, &err)) {
+        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
         error_report("IGD device %s failed to enable VGA access, "
                      "legacy mode disabled", vdev->vbasedev.name);
         goto out;
@@ -1487,10 +1489,10 @@ static void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
     }
 
     /* Setup OpRegion access */
-    ret = vfio_pci_igd_opregion_init(vdev, opregion);
+    ret = vfio_pci_igd_opregion_init(vdev, opregion, &err);
     if (ret) {
-        error_report("IGD device %s failed to setup OpRegion, "
-                     "legacy mode disabled", vdev->vbasedev.name);
+        error_append_hint(&err, "IGD legacy mode disabled\n");
+        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
         goto out;
     }
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7bfa17ce38..d7dbe0e3e0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -100,7 +100,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
     vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
 }
 
-static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev)
+static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
 {
 #ifdef CONFIG_KVM
     struct kvm_irqfd irqfd = {
@@ -126,7 +126,7 @@ static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev)
 
     /* Get an eventfd for resample/unmask */
     if (event_notifier_init(&vdev->intx.unmask, 0)) {
-        error_report("vfio: Error: event_notifier_init failed eoi");
+        error_setg(errp, "event_notifier_init failed eoi");
         goto fail;
     }
 
@@ -134,7 +134,7 @@ static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev)
     irqfd.resamplefd = event_notifier_get_fd(&vdev->intx.unmask);
 
     if (kvm_vm_ioctl(kvm_state, KVM_IRQFD, &irqfd)) {
-        error_report("vfio: Error: Failed to setup resample irqfd: %m");
+        error_setg_errno(errp, errno, "failed to setup resample irqfd");
         goto fail_irqfd;
     }
 
@@ -153,7 +153,7 @@ static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev)
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
     if (ret) {
-        error_report("vfio: Error: Failed to setup INTx unmask fd: %m");
+        error_setg_errno(errp, -ret, "failed to setup INTx unmask fd");
         goto fail_vfio;
     }
 
@@ -222,6 +222,7 @@ static void vfio_intx_update(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
     PCIINTxRoute route;
+    Error *err = NULL;
 
     if (vdev->interrupt != VFIO_INT_INTx) {
         return;
@@ -244,18 +245,22 @@ static void vfio_intx_update(PCIDevice *pdev)
         return;
     }
 
-    vfio_intx_enable_kvm(vdev);
+    vfio_intx_enable_kvm(vdev, &err);
+    if (err) {
+        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+    }
 
     /* Re-enable the interrupt in cased we missed an EOI */
     vfio_intx_eoi(&vdev->vbasedev);
 }
 
-static int vfio_intx_enable(VFIOPCIDevice *vdev)
+static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
 {
     uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
     int ret, argsz;
     struct vfio_irq_set *irq_set;
     int32_t *pfd;
+    Error *err = NULL;
 
     if (!pin) {
         return 0;
@@ -279,7 +284,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev)
 
     ret = event_notifier_init(&vdev->intx.interrupt, 0);
     if (ret) {
-        error_report("vfio: Error: event_notifier_init failed");
+        error_setg_errno(errp, -ret, "event_notifier_init failed");
         return ret;
     }
 
@@ -299,13 +304,16 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev)
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
     g_free(irq_set);
     if (ret) {
-        error_report("vfio: Error: Failed to setup INTx fd: %m");
+        error_setg_errno(errp, -ret, "failed to setup INTx fd");
         qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
         event_notifier_cleanup(&vdev->intx.interrupt);
         return -errno;
     }
 
-    vfio_intx_enable_kvm(vdev);
+    vfio_intx_enable_kvm(vdev, &err);
+    if (err) {
+        error_reportf_err(err, WARN_PREFIX, vdev->vbasedev.name);
+    }
 
     vdev->interrupt = VFIO_INT_INTx;
 
@@ -496,7 +504,9 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
             vfio_update_kvm_msi_virq(vector, *msg, pdev);
         }
     } else {
-        vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+        if (msg) {
+            vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+        }
     }
 
     /*
@@ -705,6 +715,7 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev)
 
 static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
 {
+    Error *err = NULL;
     int i;
 
     for (i = 0; i < vdev->nr_vectors; i++) {
@@ -724,7 +735,10 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
     vdev->nr_vectors = 0;
     vdev->interrupt = VFIO_INT_NONE;
 
-    vfio_intx_enable(vdev);
+    vfio_intx_enable(vdev, &err);
+    if (err) {
+        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+    }
 }
 
 static void vfio_msix_disable(VFIOPCIDevice *vdev)
@@ -1056,6 +1070,55 @@ static const MemoryRegionOps vfio_vga_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+/*
+ * Expand memory region of sub-page(size < PAGE_SIZE) MMIO BAR to page
+ * size if the BAR is in an exclusive page in host so that we could map
+ * this BAR to guest. But this sub-page BAR may not occupy an exclusive
+ * page in guest. So we should set the priority of the expanded memory
+ * region to zero in case of overlap with BARs which share the same page
+ * with the sub-page BAR in guest. Besides, we should also recover the
+ * size of this sub-page BAR when its base address is changed in guest
+ * and not page aligned any more.
+ */
+static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    VFIORegion *region = &vdev->bars[bar].region;
+    MemoryRegion *mmap_mr, *mr;
+    PCIIORegion *r;
+    pcibus_t bar_addr;
+    uint64_t size = region->size;
+
+    /* Make sure that the whole region is allowed to be mmapped */
+    if (region->nr_mmaps != 1 || !region->mmaps[0].mmap ||
+        region->mmaps[0].size != region->size) {
+        return;
+    }
+
+    r = &pdev->io_regions[bar];
+    bar_addr = r->addr;
+    mr = region->mem;
+    mmap_mr = &region->mmaps[0].mem;
+
+    /* If BAR is mapped and page aligned, update to fill PAGE_SIZE */
+    if (bar_addr != PCI_BAR_UNMAPPED &&
+        !(bar_addr & ~qemu_real_host_page_mask)) {
+        size = qemu_real_host_page_size;
+    }
+
+    memory_region_transaction_begin();
+
+    memory_region_set_size(mr, size);
+    memory_region_set_size(mmap_mr, size);
+    if (size != region->size && memory_region_is_mapped(mr)) {
+        memory_region_del_subregion(r->address_space, mr);
+        memory_region_add_subregion_overlap(r->address_space,
+                                            bar_addr, mr, 0);
+    }
+
+    memory_region_transaction_commit();
+}
+
 /*
  * PCI config space
  */
@@ -1139,6 +1202,24 @@ void vfio_pci_write_config(PCIDevice *pdev,
         } else if (was_enabled && !is_enabled) {
             vfio_msix_disable(vdev);
         }
+    } else if (ranges_overlap(addr, len, PCI_BASE_ADDRESS_0, 24) ||
+        range_covers_byte(addr, len, PCI_COMMAND)) {
+        pcibus_t old_addr[PCI_NUM_REGIONS - 1];
+        int bar;
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            old_addr[bar] = pdev->io_regions[bar].addr;
+        }
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+            if (old_addr[bar] != pdev->io_regions[bar].addr &&
+                pdev->io_regions[bar].size > 0 &&
+                pdev->io_regions[bar].size < qemu_real_host_page_size) {
+                vfio_sub_page_bar_update_mapping(pdev, bar);
+            }
+        }
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);
@@ -1166,7 +1247,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
     }
 }
 
-static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
+static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
 {
     uint16_t ctrl;
     bool msi_64bit, msi_maskbit;
@@ -1175,6 +1256,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
 
     if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
               vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+        error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
         return -errno;
     }
     ctrl = le16_to_cpu(ctrl);
@@ -1190,8 +1272,8 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos)
         if (ret == -ENOTSUP) {
             return 0;
         }
-        error_prepend(&err, "vfio: msi_init failed: ");
-        error_report_err(err);
+        error_prepend(&err, "msi_init failed: ");
+        error_propagate(errp, err);
         return ret;
     }
     vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
@@ -1275,7 +1357,7 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice *vdev)
  * need to first look for where the MSI-X table lives.  So we
  * unfortunately split MSI-X setup across two functions.
  */
-static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
+static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
 {
     uint8_t pos;
     uint16_t ctrl;
@@ -1285,22 +1367,25 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
 
     pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
     if (!pos) {
-        return 0;
+        return;
     }
 
     if (pread(fd, &ctrl, sizeof(ctrl),
               vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
-        return -errno;
+        error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
+        return;
     }
 
     if (pread(fd, &table, sizeof(table),
               vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
-        return -errno;
+        error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
+        return;
     }
 
     if (pread(fd, &pba, sizeof(pba),
               vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
-        return -errno;
+        error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
+        return;
     }
 
     ctrl = le16_to_cpu(ctrl);
@@ -1330,10 +1415,10 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
             (vdev->device_id & 0xff00) == 0x5800) {
             msix->pba_offset = 0x1000;
         } else {
-            error_report("vfio: Hardware reports invalid configuration, "
-                         "MSIX PBA outside of specified BAR");
+            error_setg(errp, "hardware reports invalid configuration, "
+                       "MSIX PBA outside of specified BAR");
             g_free(msix);
-            return -EINVAL;
+            return;
         }
     }
 
@@ -1342,11 +1427,9 @@ static int vfio_msix_early_setup(VFIOPCIDevice *vdev)
     vdev->msix = msix;
 
     vfio_pci_fixup_msix_region(vdev);
-
-    return 0;
 }
 
-static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
+static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
 {
     int ret;
 
@@ -1361,7 +1444,7 @@ static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos)
         if (ret == -ENOTSUP) {
             return 0;
         }
-        error_report("vfio: msix_init failed");
+        error_setg(errp, "msix_init failed");
         return ret;
     }
 
@@ -1546,7 +1629,8 @@ static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
     vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
 }
 
-static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
+static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
+                               Error **errp)
 {
     uint16_t flags;
     uint8_t type;
@@ -1558,8 +1642,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
         type != PCI_EXP_TYPE_LEG_END &&
         type != PCI_EXP_TYPE_RC_END) {
 
-        error_report("vfio: Assignment of PCIe type 0x%x "
-                     "devices is not currently supported", type);
+        error_setg(errp, "assignment of PCIe type 0x%x "
+                   "devices is not currently supported", type);
         return -EINVAL;
     }
 
@@ -1693,7 +1777,7 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
     }
 }
 
-static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
+static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
 {
     PCIDevice *pdev = &vdev->pdev;
     uint8_t cap_id, next, size;
@@ -1718,9 +1802,9 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
      * will be changed as we unwind the stack.
      */
     if (next) {
-        ret = vfio_add_std_cap(vdev, next);
+        ret = vfio_add_std_cap(vdev, next, errp);
         if (ret) {
-            return ret;
+            goto out;
         }
     } else {
         /* Begin the rebuild, use QEMU emulated list bits */
@@ -1734,40 +1818,40 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 
     switch (cap_id) {
     case PCI_CAP_ID_MSI:
-        ret = vfio_msi_setup(vdev, pos);
+        ret = vfio_msi_setup(vdev, pos, errp);
         break;
     case PCI_CAP_ID_EXP:
         vfio_check_pcie_flr(vdev, pos);
-        ret = vfio_setup_pcie_cap(vdev, pos, size);
+        ret = vfio_setup_pcie_cap(vdev, pos, size, errp);
         break;
     case PCI_CAP_ID_MSIX:
-        ret = vfio_msix_setup(vdev, pos);
+        ret = vfio_msix_setup(vdev, pos, errp);
         break;
     case PCI_CAP_ID_PM:
         vfio_check_pm_reset(vdev, pos);
         vdev->pm_cap = pos;
-        ret = pci_add_capability(pdev, cap_id, pos, size);
+        ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
         break;
     case PCI_CAP_ID_AF:
         vfio_check_af_flr(vdev, pos);
-        ret = pci_add_capability(pdev, cap_id, pos, size);
+        ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
         break;
     default:
-        ret = pci_add_capability(pdev, cap_id, pos, size);
+        ret = pci_add_capability2(pdev, cap_id, pos, size, errp);
         break;
     }
-
+out:
     if (ret < 0) {
-        error_report("vfio: %s Error adding PCI capability "
-                     "0x%x[0x%x]@0x%x: %d", vdev->vbasedev.name,
-                     cap_id, size, pos, ret);
+        error_prepend(errp,
+                      "failed to add PCI capability 0x%x[0x%x]@0x%x: ",
+                      cap_id, size, pos);
         return ret;
     }
 
     return 0;
 }
 
-static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
     PCIDevice *pdev = &vdev->pdev;
     uint32_t header;
@@ -1778,7 +1862,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     /* Only add extended caps if we have them and the guest can see them */
     if (!pci_is_express(pdev) || !pci_bus_is_express(pdev->bus) ||
         !pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
-        return 0;
+        return;
     }
 
     /*
@@ -1843,10 +1927,10 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
     }
 
     g_free(config);
-    return 0;
+    return;
 }
 
-static int vfio_add_capabilities(VFIOPCIDevice *vdev)
+static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
 {
     PCIDevice *pdev = &vdev->pdev;
     int ret;
@@ -1856,12 +1940,13 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev)
         return 0; /* Nothing to add */
     }
 
-    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+    ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST], errp);
     if (ret) {
         return ret;
     }
 
-    return vfio_add_ext_cap(vdev);
+    vfio_add_ext_cap(vdev);
+    return 0;
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
@@ -1903,7 +1988,24 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
 
 static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
 {
-    vfio_intx_enable(vdev);
+    Error *err = NULL;
+    int nr;
+
+    vfio_intx_enable(vdev, &err);
+    if (err) {
+        error_reportf_err(err, ERR_PREFIX, vdev->vbasedev.name);
+    }
+
+    for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
+        off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr);
+        uint32_t val = 0;
+        uint32_t len = sizeof(val);
+
+        if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
+            error_report("%s(%s) reset bar %d failed: %m", __func__,
+                         vdev->vbasedev.name, nr);
+        }
+    }
 }
 
 static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
@@ -1928,7 +2030,9 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 
     trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
 
-    vfio_pci_pre_reset(vdev);
+    if (!single) {
+        vfio_pci_pre_reset(vdev);
+    }
     vdev->vbasedev.needs_reset = false;
 
     info = g_malloc0(sizeof(*info));
@@ -2086,7 +2190,9 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
         }
     }
 out_single:
-    vfio_pci_post_reset(vdev);
+    if (!single) {
+        vfio_pci_post_reset(vdev);
+    }
     g_free(info);
 
     return ret;
@@ -2132,7 +2238,7 @@ static VFIODeviceOps vfio_pci_ops = {
     .vfio_eoi = vfio_intx_eoi,
 };
 
-int vfio_populate_vga(VFIOPCIDevice *vdev)
+int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
     struct vfio_region_info *reg_info;
@@ -2140,15 +2246,18 @@ int vfio_populate_vga(VFIOPCIDevice *vdev)
 
     ret = vfio_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, &reg_info);
     if (ret) {
+        error_setg_errno(errp, -ret,
+                         "failed getting region info for VGA region index %d",
+                         VFIO_PCI_VGA_REGION_INDEX);
         return ret;
     }
 
     if (!(reg_info->flags & VFIO_REGION_INFO_FLAG_READ) ||
         !(reg_info->flags & VFIO_REGION_INFO_FLAG_WRITE) ||
         reg_info->size < 0xbffff + 1) {
-        error_report("vfio: Unexpected VGA info, flags 0x%lx, size 0x%lx",
-                     (unsigned long)reg_info->flags,
-                     (unsigned long)reg_info->size);
+        error_setg(errp, "unexpected VGA info, flags 0x%lx, size 0x%lx",
+                   (unsigned long)reg_info->flags,
+                   (unsigned long)reg_info->size);
         g_free(reg_info);
         return -EINVAL;
     }
@@ -2197,7 +2306,7 @@ int vfio_populate_vga(VFIOPCIDevice *vdev)
     return 0;
 }
 
-static int vfio_populate_device(VFIOPCIDevice *vdev)
+static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
     struct vfio_region_info *reg_info;
@@ -2206,19 +2315,19 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
 
     /* Sanity check device */
     if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PCI)) {
-        error_report("vfio: Um, this isn't a PCI device");
-        goto error;
+        error_setg(errp, "this isn't a PCI device");
+        return;
     }
 
     if (vbasedev->num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1) {
-        error_report("vfio: unexpected number of io regions %u",
-                     vbasedev->num_regions);
-        goto error;
+        error_setg(errp, "unexpected number of io regions %u",
+                   vbasedev->num_regions);
+        return;
     }
 
     if (vbasedev->num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1) {
-        error_report("vfio: unexpected number of irqs %u", vbasedev->num_irqs);
-        goto error;
+        error_setg(errp, "unexpected number of irqs %u", vbasedev->num_irqs);
+        return;
     }
 
     for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
@@ -2229,8 +2338,8 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
         g_free(name);
 
         if (ret) {
-            error_report("vfio: Error getting region %d info: %m", i);
-            goto error;
+            error_setg_errno(errp, -ret, "failed to get region %d info", i);
+            return;
         }
 
         QLIST_INIT(&vdev->bars[i].quirks);
@@ -2239,8 +2348,8 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
     ret = vfio_get_region_info(vbasedev,
                                VFIO_PCI_CONFIG_REGION_INDEX, &reg_info);
     if (ret) {
-        error_report("vfio: Error getting config info: %m");
-        goto error;
+        error_setg_errno(errp, -ret, "failed to get config info");
+        return;
     }
 
     trace_vfio_populate_device_config(vdev->vbasedev.name,
@@ -2257,11 +2366,11 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
     g_free(reg_info);
 
     if (vdev->features & VFIO_FEATURE_ENABLE_VGA) {
-        ret = vfio_populate_vga(vdev);
+        ret = vfio_populate_vga(vdev, errp);
         if (ret) {
-            error_report(
-                "vfio: Device does not support requested feature x-vga");
-            goto error;
+            error_append_hint(errp, "device does not support "
+                              "requested feature x-vga\n");
+            return;
         }
     }
 
@@ -2271,17 +2380,13 @@ static int vfio_populate_device(VFIOPCIDevice *vdev)
     if (ret) {
         /* This can fail for an old kernel or legacy PCI dev */
         trace_vfio_populate_device_get_irq_info_failure();
-        ret = 0;
     } else if (irq_info.count == 1) {
         vdev->pci_aer = true;
     } else {
-        error_report("vfio: %s "
+        error_report(WARN_PREFIX
                      "Could not enable error recovery for the device",
                      vbasedev->name);
     }
-
-error:
-    return ret;
 }
 
 static void vfio_put_device(VFIOPCIDevice *vdev)
@@ -2485,18 +2590,26 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
-static int vfio_initfn(PCIDevice *pdev)
+static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
     char *tmp, group_path[PATH_MAX], *group_name;
+    Error *err = NULL;
     ssize_t len;
     struct stat st;
     int groupid;
     int i, ret;
 
     if (!vdev->vbasedev.sysfsdev) {
+        if (!(~vdev->host.domain || ~vdev->host.bus ||
+              ~vdev->host.slot || ~vdev->host.function)) {
+            error_setg(errp, "No provided host device");
+            error_append_hint(errp, "Use -vfio-pci,host=DDDD:BB:DD.F "
+                              "or -vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+            return;
+        }
         vdev->vbasedev.sysfsdev =
             g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
                             vdev->host.domain, vdev->host.bus,
@@ -2504,9 +2617,9 @@ static int vfio_initfn(PCIDevice *pdev)
     }
 
     if (stat(vdev->vbasedev.sysfsdev, &st) < 0) {
-        error_report("vfio: error: no such host device: %s",
-                     vdev->vbasedev.sysfsdev);
-        return -errno;
+        error_setg_errno(errp, errno, "no such host device");
+        error_prepend(errp, ERR_PREFIX, vdev->vbasedev.sysfsdev);
+        return;
     }
 
     vdev->vbasedev.name = g_strdup(basename(vdev->vbasedev.sysfsdev));
@@ -2518,45 +2631,44 @@ static int vfio_initfn(PCIDevice *pdev)
     g_free(tmp);
 
     if (len <= 0 || len >= sizeof(group_path)) {
-        error_report("vfio: error no iommu_group for device");
-        return len < 0 ? -errno : -ENAMETOOLONG;
+        error_setg_errno(errp, len < 0 ? errno : ENAMETOOLONG,
+                         "no iommu_group found");
+        goto error;
     }
 
     group_path[len] = 0;
 
     group_name = basename(group_path);
     if (sscanf(group_name, "%d", &groupid) != 1) {
-        error_report("vfio: error reading %s: %m", group_path);
-        return -errno;
+        error_setg_errno(errp, errno, "failed to read %s", group_path);
+        goto error;
     }
 
-    trace_vfio_initfn(vdev->vbasedev.name, groupid);
+    trace_vfio_realize(vdev->vbasedev.name, groupid);
 
-    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev));
+    group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp);
     if (!group) {
-        error_report("vfio: failed to get group %d", groupid);
-        return -ENOENT;
+        goto error;
     }
 
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
-            error_report("vfio: error: device %s is already attached",
-                         vdev->vbasedev.name);
+            error_setg(errp, "device is already attached");
             vfio_put_group(group);
-            return -EBUSY;
+            goto error;
         }
     }
 
-    ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev);
+    ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
     if (ret) {
-        error_report("vfio: failed to get device %s", vdev->vbasedev.name);
         vfio_put_group(group);
-        return ret;
+        goto error;
     }
 
-    ret = vfio_populate_device(vdev);
-    if (ret) {
-        return ret;
+    vfio_populate_device(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto error;
     }
 
     /* Get a copy of config space */
@@ -2565,8 +2677,8 @@ static int vfio_initfn(PCIDevice *pdev)
                 vdev->config_offset);
     if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
         ret = ret < 0 ? -errno : -EFAULT;
-        error_report("vfio: Failed to read device config space");
-        return ret;
+        error_setg_errno(errp, -ret, "failed to read device config space");
+        goto error;
     }
 
     /* vfio emulates a lot for us, but some bits need extra love */
@@ -2582,8 +2694,8 @@ static int vfio_initfn(PCIDevice *pdev)
      */
     if (vdev->vendor_id != PCI_ANY_ID) {
         if (vdev->vendor_id >= 0xffff) {
-            error_report("vfio: Invalid PCI vendor ID provided");
-            return -EINVAL;
+            error_setg(errp, "invalid PCI vendor ID provided");
+            goto error;
         }
         vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
         trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
@@ -2593,8 +2705,8 @@ static int vfio_initfn(PCIDevice *pdev)
 
     if (vdev->device_id != PCI_ANY_ID) {
         if (vdev->device_id > 0xffff) {
-            error_report("vfio: Invalid PCI device ID provided");
-            return -EINVAL;
+            error_setg(errp, "invalid PCI device ID provided");
+            goto error;
         }
         vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
         trace_vfio_pci_emulated_device_id(vdev->vbasedev.name, vdev->device_id);
@@ -2604,8 +2716,8 @@ static int vfio_initfn(PCIDevice *pdev)
 
     if (vdev->sub_vendor_id != PCI_ANY_ID) {
         if (vdev->sub_vendor_id > 0xffff) {
-            error_report("vfio: Invalid PCI subsystem vendor ID provided");
-            return -EINVAL;
+            error_setg(errp, "invalid PCI subsystem vendor ID provided");
+            goto error;
         }
         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
                                vdev->sub_vendor_id, ~0);
@@ -2615,8 +2727,8 @@ static int vfio_initfn(PCIDevice *pdev)
 
     if (vdev->sub_device_id != PCI_ANY_ID) {
         if (vdev->sub_device_id > 0xffff) {
-            error_report("vfio: Invalid PCI subsystem device ID provided");
-            return -EINVAL;
+            error_setg(errp, "invalid PCI subsystem device ID provided");
+            goto error;
         }
         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
         trace_vfio_pci_emulated_sub_device_id(vdev->vbasedev.name,
@@ -2644,14 +2756,15 @@ static int vfio_initfn(PCIDevice *pdev)
 
     vfio_pci_size_rom(vdev);
 
-    ret = vfio_msix_early_setup(vdev);
-    if (ret) {
-        return ret;
+    vfio_msix_early_setup(vdev, &err);
+    if (err) {
+        error_propagate(errp, err);
+        goto error;
     }
 
     vfio_bars_setup(vdev);
 
-    ret = vfio_add_capabilities(vdev);
+    ret = vfio_add_capabilities(vdev, errp);
     if (ret) {
         goto out_teardown;
     }
@@ -2669,9 +2782,9 @@ static int vfio_initfn(PCIDevice *pdev)
         struct vfio_region_info *opregion;
 
         if (vdev->pdev.qdev.hotplugged) {
-            error_report("Cannot support IGD OpRegion feature on hotplugged "
-                         "device %s", vdev->vbasedev.name);
-            ret = -EINVAL;
+            error_setg(errp,
+                       "cannot support IGD OpRegion feature on hotplugged "
+                       "device");
             goto out_teardown;
         }
 
@@ -2679,16 +2792,14 @@ static int vfio_initfn(PCIDevice *pdev)
                         VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
                         VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
         if (ret) {
-            error_report("Device %s does not support requested IGD OpRegion "
-                         "feature", vdev->vbasedev.name);
+            error_setg_errno(errp, -ret,
+                             "does not support requested IGD OpRegion feature");
             goto out_teardown;
         }
 
-        ret = vfio_pci_igd_opregion_init(vdev, opregion);
+        ret = vfio_pci_igd_opregion_init(vdev, opregion, errp);
         g_free(opregion);
         if (ret) {
-            error_report("Device %s IGD OpRegion initialization failed",
-                         vdev->vbasedev.name);
             goto out_teardown;
         }
     }
@@ -2708,7 +2819,7 @@ static int vfio_initfn(PCIDevice *pdev)
         vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                                   vfio_intx_mmap_enable, vdev);
         pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_intx_update);
-        ret = vfio_intx_enable(vdev);
+        ret = vfio_intx_enable(vdev, errp);
         if (ret) {
             goto out_teardown;
         }
@@ -2718,13 +2829,14 @@ static int vfio_initfn(PCIDevice *pdev)
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn_quirk(vdev);
 
-    return 0;
+    return;
 
 out_teardown:
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_teardown_msi(vdev);
     vfio_bars_exit(vdev);
-    return ret;
+error:
+    error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
 }
 
 static void vfio_instance_finalize(Object *obj)
@@ -2806,6 +2918,10 @@ static void vfio_instance_init(Object *obj)
     device_add_bootindex_property(obj, &vdev->bootindex,
                                   "bootindex", NULL,
                                   &pci_dev->qdev, NULL);
+    vdev->host.domain = ~0U;
+    vdev->host.bus = ~0U;
+    vdev->host.slot = ~0U;
+    vdev->host.function = ~0U;
 }
 
 static Property vfio_pci_dev_properties[] = {
@@ -2853,7 +2969,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vfio_pci_vmstate;
     dc->desc = "VFIO-based PCI device assignment";
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-    pdc->init = vfio_initfn;
+    pdc->realize = vfio_realize;
     pdc->exit = vfio_exitfn;
     pdc->config_read = vfio_pci_read_config;
     pdc->config_write = vfio_pci_write_config;
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 7d482d9d21..a8366bb2a7 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -161,9 +161,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr);
 void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr);
 void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
 
-int vfio_populate_vga(VFIOPCIDevice *vdev);
+int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
 
 int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
-                               struct vfio_region_info *info);
+                               struct vfio_region_info *info,
+                               Error **errp);
 
 #endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index a559e7b659..a4663c918e 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -44,9 +44,10 @@ static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
  * and add it into the list of IRQs
  * @vbasedev: the VFIO device handle
  * @info: irq info struct retrieved from VFIO driver
+ * @errp: error object
  */
 static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
-                                struct vfio_irq_info info)
+                                struct vfio_irq_info info, Error **errp)
 {
     int ret;
     VFIOPlatformDevice *vdev =
@@ -69,7 +70,8 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
     if (ret) {
         g_free(intp->interrupt);
         g_free(intp);
-        error_report("vfio: Error: trigger event_notifier_init failed ");
+        error_setg_errno(errp, -ret,
+                         "failed to initialize trigger eventd notifier");
         return NULL;
     }
     if (vfio_irq_is_automasked(intp)) {
@@ -80,7 +82,8 @@ static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
             g_free(intp->interrupt);
             g_free(intp->unmask);
             g_free(intp);
-            error_report("vfio: Error: resamplefd event_notifier_init failed");
+            error_setg_errno(errp, -ret,
+                             "failed to initialize resample eventd notifier");
             return NULL;
         }
     }
@@ -456,9 +459,10 @@ static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
  * vfio_populate_device - Allocate and populate MMIO region
  * and IRQ structs according to driver returned information
  * @vbasedev: the VFIO device handle
+ * @errp: error object
  *
  */
-static int vfio_populate_device(VFIODevice *vbasedev)
+static int vfio_populate_device(VFIODevice *vbasedev, Error **errp)
 {
     VFIOINTp *intp, *tmp;
     int i, ret = -1;
@@ -466,7 +470,7 @@ static int vfio_populate_device(VFIODevice *vbasedev)
         container_of(vbasedev, VFIOPlatformDevice, vbasedev);
 
     if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
-        error_report("vfio: Um, this isn't a platform device");
+        error_setg(errp, "this isn't a platform device");
         return ret;
     }
 
@@ -480,7 +484,7 @@ static int vfio_populate_device(VFIODevice *vbasedev)
                                 vdev->regions[i], i, name);
         g_free(name);
         if (ret) {
-            error_report("vfio: Error getting region %d info: %m", i);
+            error_setg_errno(errp, -ret, "failed to get region %d info", i);
             goto reg_error;
         }
     }
@@ -496,16 +500,15 @@ static int vfio_populate_device(VFIODevice *vbasedev)
         irq.index = i;
         ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq);
         if (ret) {
-            error_report("vfio: error getting device %s irq info",
-                         vbasedev->name);
+            error_setg_errno(errp, -ret, "failed to get device irq info");
             goto irq_err;
         } else {
             trace_vfio_platform_populate_interrupts(irq.index,
                                                     irq.count,
                                                     irq.flags);
-            intp = vfio_init_intp(vbasedev, irq);
+            intp = vfio_init_intp(vbasedev, irq, errp);
             if (!intp) {
-                error_report("vfio: Error installing IRQ %d up", i);
+                ret = -1;
                 goto irq_err;
             }
         }
@@ -538,13 +541,14 @@ static VFIODeviceOps vfio_platform_ops = {
 /**
  * vfio_base_device_init - perform preliminary VFIO setup
  * @vbasedev: the VFIO device handle
+ * @errp: error object
  *
  * Implement the VFIO command sequence that allows to discover
  * assigned device resources: group extraction, device
  * fd retrieval, resource query.
  * Precondition: the device name must be initialized
  */
-static int vfio_base_device_init(VFIODevice *vbasedev)
+static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
 {
     VFIOGroup *group;
     VFIODevice *vbasedev_iter;
@@ -560,6 +564,7 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
         vbasedev->name = g_strdup(basename(vbasedev->sysfsdev));
     } else {
         if (!vbasedev->name || strchr(vbasedev->name, '/')) {
+            error_setg(errp, "wrong host device name");
             return -EINVAL;
         }
 
@@ -568,8 +573,8 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
     }
 
     if (stat(vbasedev->sysfsdev, &st) < 0) {
-        error_report("vfio: error: no such host device: %s",
-                     vbasedev->sysfsdev);
+        error_setg_errno(errp, errno,
+                         "failed to get the sysfs host device file status");
         return -errno;
     }
 
@@ -578,44 +583,41 @@ static int vfio_base_device_init(VFIODevice *vbasedev)
     g_free(tmp);
 
     if (len < 0 || len >= sizeof(group_path)) {
-        error_report("vfio: error no iommu_group for device");
-        return len < 0 ? -errno : -ENAMETOOLONG;
+        ret = len < 0 ? -errno : -ENAMETOOLONG;
+        error_setg_errno(errp, -ret, "no iommu_group found");
+        return ret;
     }
 
     group_path[len] = 0;
 
     group_name = basename(group_path);
     if (sscanf(group_name, "%d", &groupid) != 1) {
-        error_report("vfio: error reading %s: %m", group_path);
+        error_setg_errno(errp, errno, "failed to read %s", group_path);
         return -errno;
     }
 
     trace_vfio_platform_base_device_init(vbasedev->name, groupid);
 
-    group = vfio_get_group(groupid, &address_space_memory);
+    group = vfio_get_group(groupid, &address_space_memory, errp);
     if (!group) {
-        error_report("vfio: failed to get group %d", groupid);
         return -ENOENT;
     }
 
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
-            error_report("vfio: error: device %s is already attached",
-                         vbasedev->name);
+            error_setg(errp, "device is already attached");
             vfio_put_group(group);
             return -EBUSY;
         }
     }
-    ret = vfio_get_device(group, vbasedev->name, vbasedev);
+    ret = vfio_get_device(group, vbasedev->name, vbasedev, errp);
     if (ret) {
-        error_report("vfio: failed to get device %s", vbasedev->name);
         vfio_put_group(group);
         return ret;
     }
 
-    ret = vfio_populate_device(vbasedev);
+    ret = vfio_populate_device(vbasedev, errp);
     if (ret) {
-        error_report("vfio: failed to populate device %s", vbasedev->name);
         vfio_put_group(group);
     }
 
@@ -644,11 +646,9 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
                                 vbasedev->sysfsdev : vbasedev->name,
                                 vdev->compat);
 
-    ret = vfio_base_device_init(vbasedev);
+    ret = vfio_base_device_init(vbasedev, errp);
     if (ret) {
-        error_setg(errp, "vfio: vfio_base_device_init failed for %s",
-                   vbasedev->name);
-        return;
+        goto out;
     }
 
     for (i = 0; i < vbasedev->num_regions; i++) {
@@ -658,6 +658,16 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
         }
         sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
     }
+out:
+    if (!ret) {
+        return;
+    }
+
+    if (vdev->vbasedev.name) {
+        error_prepend(errp, ERR_PREFIX, vdev->vbasedev.name);
+    } else {
+        error_prepend(errp, "vfio error: ");
+    }
 }
 
 static const VMStateDescription vfio_platform_vmstate = {
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 7443d348d9..4409bcc0d7 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -25,7 +25,7 @@ static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
     }
 
     return !memory_region_is_ram(section->mr) ||
-            memory_region_is_skip_dump(section->mr);
+            memory_region_is_ram_device(section->mr);
 }
 
 static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa)
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index da133221de..ef81609b98 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -36,7 +36,7 @@ vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int
 vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
 vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
-vfio_initfn(const char *name, int group_id) " (%s) group %d"
+vfio_realize(const char *name, int group_id) " (%s) group %d"
 vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s %x@%x"
 vfio_pci_reset(const char *name) " (%s)"
 vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 3e2b175da8..95c4c30ea1 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -5,3 +5,7 @@ common-obj-y += virtio-mmio.o
 
 obj-y += virtio.o virtio-balloon.o 
 obj-$(CONFIG_LINUX) += vhost.o vhost-backend.o vhost-user.o
+
+obj-$(CONFIG_VHOST_VSOCK) += vhost-vsock.o
+obj-y += virtio-crypto.o
+obj-$(CONFIG_VIRTIO_PCI) += virtio-crypto-pci.o
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 55184d33b3..7b6f55e70e 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -5,7 +5,7 @@ virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "
 virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
 virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
 virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
-virtio_irq(void *vq) "vq %p"
+virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
 virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
 virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
 
@@ -14,3 +14,8 @@ virtio_rng_guest_not_ready(void *rng) "rng %p: guest not ready"
 virtio_rng_pushed(void *rng, size_t len) "rng %p: %zd bytes pushed"
 virtio_rng_request(void *rng, size_t size, unsigned quota) "rng %p: %zd bytes requested, %u bytes quota left"
 
+# hw/virtio/virtio-balloon.c
+virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s gpa: %"PRIx64
+virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
+virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
+virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 7681f152f3..272a5ec584 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -172,6 +172,19 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx)
     return idx - dev->vq_index;
 }
 
+#ifdef CONFIG_VHOST_VSOCK
+static int vhost_kernel_vsock_set_guest_cid(struct vhost_dev *dev,
+                                            uint64_t guest_cid)
+{
+    return vhost_kernel_call(dev, VHOST_VSOCK_SET_GUEST_CID, &guest_cid);
+}
+
+static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start)
+{
+    return vhost_kernel_call(dev, VHOST_VSOCK_SET_RUNNING, &start);
+}
+#endif /* CONFIG_VHOST_VSOCK */
+
 static const VhostOps kernel_ops = {
         .backend_type = VHOST_BACKEND_TYPE_KERNEL,
         .vhost_backend_init = vhost_kernel_init,
@@ -197,6 +210,10 @@ static const VhostOps kernel_ops = {
         .vhost_set_owner = vhost_kernel_set_owner,
         .vhost_reset_device = vhost_kernel_reset_device,
         .vhost_get_vq_index = vhost_kernel_get_vq_index,
+#ifdef CONFIG_VHOST_VSOCK
+        .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid,
+        .vhost_vsock_set_running = vhost_kernel_vsock_set_running,
+#endif /* CONFIG_VHOST_VSOCK */
 };
 
 int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type)
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index b57454a4b7..7ee92b32c5 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -116,7 +116,7 @@ static bool ioeventfd_enabled(void)
 
 static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
 {
-    CharDriverState *chr = dev->opaque;
+    CharBackend *chr = dev->opaque;
     uint8_t *p = (uint8_t *) msg;
     int r, size = VHOST_USER_HDR_SIZE;
 
@@ -196,7 +196,7 @@ static bool vhost_user_one_time_request(VhostUserRequest request)
 static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                             int *fds, int fd_num)
 {
-    CharDriverState *chr = dev->opaque;
+    CharBackend *chr = dev->opaque;
     int ret, size = VHOST_USER_HDR_SIZE + msg->size;
 
     /*
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
new file mode 100644
index 0000000000..b4815629e1
--- /dev/null
+++ b/hw/virtio/vhost-vsock.c
@@ -0,0 +1,417 @@
+/*
+ * Virtio vsock device
+ *
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_vsock.h"
+#include "qapi/error.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/vhost-vsock.h"
+#include "qemu/iov.h"
+#include "monitor/monitor.h"
+
+enum {
+    VHOST_VSOCK_SAVEVM_VERSION = 0,
+
+    VHOST_VSOCK_QUEUE_SIZE = 128,
+};
+
+static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    struct virtio_vsock_config vsockcfg = {};
+
+    virtio_stq_p(vdev, &vsockcfg.guest_cid, vsock->conf.guest_cid);
+    memcpy(config, &vsockcfg, sizeof(vsockcfg));
+}
+
+static int vhost_vsock_set_guest_cid(VHostVSock *vsock)
+{
+    const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops;
+    int ret;
+
+    if (!vhost_ops->vhost_vsock_set_guest_cid) {
+        return -ENOSYS;
+    }
+
+    ret = vhost_ops->vhost_vsock_set_guest_cid(&vsock->vhost_dev,
+                                               vsock->conf.guest_cid);
+    if (ret < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+static int vhost_vsock_set_running(VHostVSock *vsock, int start)
+{
+    const VhostOps *vhost_ops = vsock->vhost_dev.vhost_ops;
+    int ret;
+
+    if (!vhost_ops->vhost_vsock_set_running) {
+        return -ENOSYS;
+    }
+
+    ret = vhost_ops->vhost_vsock_set_running(&vsock->vhost_dev, start);
+    if (ret < 0) {
+        return -errno;
+    }
+    return 0;
+}
+
+static void vhost_vsock_start(VirtIODevice *vdev)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+    int i;
+
+    if (!k->set_guest_notifiers) {
+        error_report("binding does not support guest notifiers");
+        return;
+    }
+
+    ret = vhost_dev_enable_notifiers(&vsock->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error enabling host notifiers: %d", -ret);
+        return;
+    }
+
+    ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, true);
+    if (ret < 0) {
+        error_report("Error binding guest notifier: %d", -ret);
+        goto err_host_notifiers;
+    }
+
+    vsock->vhost_dev.acked_features = vdev->guest_features;
+    ret = vhost_dev_start(&vsock->vhost_dev, vdev);
+    if (ret < 0) {
+        error_report("Error starting vhost: %d", -ret);
+        goto err_guest_notifiers;
+    }
+
+    ret = vhost_vsock_set_running(vsock, 1);
+    if (ret < 0) {
+        error_report("Error starting vhost vsock: %d", -ret);
+        goto err_dev_start;
+    }
+
+    /* guest_notifier_mask/pending not used yet, so just unmask
+     * everything here.  virtio-pci will do the right thing by
+     * enabling/disabling irqfd.
+     */
+    for (i = 0; i < vsock->vhost_dev.nvqs; i++) {
+        vhost_virtqueue_mask(&vsock->vhost_dev, vdev, i, false);
+    }
+
+    return;
+
+err_dev_start:
+    vhost_dev_stop(&vsock->vhost_dev, vdev);
+err_guest_notifiers:
+    k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false);
+err_host_notifiers:
+    vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev);
+}
+
+static void vhost_vsock_stop(VirtIODevice *vdev)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+    int ret;
+
+    if (!k->set_guest_notifiers) {
+        return;
+    }
+
+    ret = vhost_vsock_set_running(vsock, 0);
+    if (ret < 0) {
+        error_report("vhost vsock set running failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_stop(&vsock->vhost_dev, vdev);
+
+    ret = k->set_guest_notifiers(qbus->parent, vsock->vhost_dev.nvqs, false);
+    if (ret < 0) {
+        error_report("vhost guest notifier cleanup failed: %d", ret);
+        return;
+    }
+
+    vhost_dev_disable_notifiers(&vsock->vhost_dev, vdev);
+}
+
+static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+    bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+    if (!vdev->vm_running) {
+        should_start = false;
+    }
+
+    if (vsock->vhost_dev.started == should_start) {
+        return;
+    }
+
+    if (should_start) {
+        vhost_vsock_start(vdev);
+    } else {
+        vhost_vsock_stop(vdev);
+    }
+}
+
+static uint64_t vhost_vsock_get_features(VirtIODevice *vdev,
+                                         uint64_t requested_features,
+                                         Error **errp)
+{
+    /* No feature bits used yet */
+    return requested_features;
+}
+
+static void vhost_vsock_handle_output(VirtIODevice *vdev, VirtQueue *vq)
+{
+    /* Do nothing */
+}
+
+static void vhost_vsock_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                            bool mask)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+
+    vhost_virtqueue_mask(&vsock->vhost_dev, vdev, idx, mask);
+}
+
+static bool vhost_vsock_guest_notifier_pending(VirtIODevice *vdev, int idx)
+{
+    VHostVSock *vsock = VHOST_VSOCK(vdev);
+
+    return vhost_virtqueue_pending(&vsock->vhost_dev, idx);
+}
+
+static void vhost_vsock_send_transport_reset(VHostVSock *vsock)
+{
+    VirtQueueElement *elem;
+    VirtQueue *vq = vsock->event_vq;
+    struct virtio_vsock_event event = {
+        .id = cpu_to_le32(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET),
+    };
+
+    elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+    if (!elem) {
+        error_report("vhost-vsock missed transport reset event");
+        return;
+    }
+
+    if (elem->out_num) {
+        error_report("invalid vhost-vsock event virtqueue element with "
+                     "out buffers");
+        goto out;
+    }
+
+    if (iov_from_buf(elem->in_sg, elem->in_num, 0,
+                     &event, sizeof(event)) != sizeof(event)) {
+        error_report("vhost-vsock event virtqueue element is too short");
+        goto out;
+    }
+
+    virtqueue_push(vq, elem, sizeof(event));
+    virtio_notify(VIRTIO_DEVICE(vsock), vq);
+
+out:
+    g_free(elem);
+}
+
+static void vhost_vsock_post_load_timer_cleanup(VHostVSock *vsock)
+{
+    if (!vsock->post_load_timer) {
+        return;
+    }
+
+    timer_del(vsock->post_load_timer);
+    timer_free(vsock->post_load_timer);
+    vsock->post_load_timer = NULL;
+}
+
+static void vhost_vsock_post_load_timer_cb(void *opaque)
+{
+    VHostVSock *vsock = opaque;
+
+    vhost_vsock_post_load_timer_cleanup(vsock);
+    vhost_vsock_send_transport_reset(vsock);
+}
+
+static void vhost_vsock_pre_save(void *opaque)
+{
+    VHostVSock *vsock = opaque;
+
+    /* At this point, backend must be stopped, otherwise
+     * it might keep writing to memory. */
+    assert(!vsock->vhost_dev.started);
+}
+
+static int vhost_vsock_post_load(void *opaque, int version_id)
+{
+    VHostVSock *vsock = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vsock);
+
+    if (virtio_queue_get_addr(vdev, 2)) {
+        /* Defer transport reset event to a vm clock timer so that virtqueue
+         * changes happen after migration has completed.
+         */
+        assert(!vsock->post_load_timer);
+        vsock->post_load_timer =
+            timer_new_ns(QEMU_CLOCK_VIRTUAL,
+                         vhost_vsock_post_load_timer_cb,
+                         vsock);
+        timer_mod(vsock->post_load_timer, 1);
+    }
+    return 0;
+}
+
+static const VMStateDescription vmstate_virtio_vhost_vsock = {
+    .name = "virtio-vhost_vsock",
+    .minimum_version_id = VHOST_VSOCK_SAVEVM_VERSION,
+    .version_id = VHOST_VSOCK_SAVEVM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .pre_save = vhost_vsock_pre_save,
+    .post_load = vhost_vsock_post_load,
+};
+
+static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostVSock *vsock = VHOST_VSOCK(dev);
+    int vhostfd;
+    int ret;
+
+    /* Refuse to use reserved CID numbers */
+    if (vsock->conf.guest_cid <= 2) {
+        error_setg(errp, "guest-cid property must be greater than 2");
+        return;
+    }
+
+    if (vsock->conf.guest_cid > UINT32_MAX) {
+        error_setg(errp, "guest-cid property must be a 32-bit number");
+        return;
+    }
+
+    if (vsock->conf.vhostfd) {
+        vhostfd = monitor_fd_param(cur_mon, vsock->conf.vhostfd, errp);
+        if (vhostfd == -1) {
+            error_prepend(errp, "vhost-vsock: unable to parse vhostfd: ");
+            return;
+        }
+    } else {
+        vhostfd = open("/dev/vhost-vsock", O_RDWR);
+        if (vhostfd < 0) {
+            error_setg_errno(errp, -errno,
+                             "vhost-vsock: failed to open vhost device");
+            return;
+        }
+    }
+
+    virtio_init(vdev, "vhost-vsock", VIRTIO_ID_VSOCK,
+                sizeof(struct virtio_vsock_config));
+
+    /* Receive and transmit queues belong to vhost */
+    virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output);
+    virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE, vhost_vsock_handle_output);
+
+    /* The event queue belongs to QEMU */
+    vsock->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+                                       vhost_vsock_handle_output);
+
+    vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs);
+    vsock->vhost_dev.vqs = vsock->vhost_vqs;
+    ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd,
+                         VHOST_BACKEND_TYPE_KERNEL, 0);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed");
+        goto err_virtio;
+    }
+
+    ret = vhost_vsock_set_guest_cid(vsock);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "vhost-vsock: unable to set guest cid");
+        goto err_vhost_dev;
+    }
+
+    vsock->post_load_timer = NULL;
+    return;
+
+err_vhost_dev:
+    vhost_dev_cleanup(&vsock->vhost_dev);
+err_virtio:
+    virtio_cleanup(vdev);
+    close(vhostfd);
+    return;
+}
+
+static void vhost_vsock_device_unrealize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VHostVSock *vsock = VHOST_VSOCK(dev);
+
+    vhost_vsock_post_load_timer_cleanup(vsock);
+
+    /* This will stop vhost backend if appropriate. */
+    vhost_vsock_set_status(vdev, 0);
+
+    vhost_dev_cleanup(&vsock->vhost_dev);
+    virtio_cleanup(vdev);
+}
+
+static Property vhost_vsock_properties[] = {
+    DEFINE_PROP_UINT64("guest-cid", VHostVSock, conf.guest_cid, 0),
+    DEFINE_PROP_STRING("vhostfd", VHostVSock, conf.vhostfd),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    dc->props = vhost_vsock_properties;
+    dc->vmsd = &vmstate_virtio_vhost_vsock;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = vhost_vsock_device_realize;
+    vdc->unrealize = vhost_vsock_device_unrealize;
+    vdc->get_features = vhost_vsock_get_features;
+    vdc->get_config = vhost_vsock_get_config;
+    vdc->set_status = vhost_vsock_set_status;
+    vdc->guest_notifier_mask = vhost_vsock_guest_notifier_mask;
+    vdc->guest_notifier_pending = vhost_vsock_guest_notifier_pending;
+}
+
+static const TypeInfo vhost_vsock_info = {
+    .name = TYPE_VHOST_VSOCK,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VHostVSock),
+    .class_init = vhost_vsock_class_init,
+};
+
+static void vhost_vsock_register_types(void)
+{
+    type_register_static(&vhost_vsock_info);
+}
+
+type_init(vhost_vsock_register_types)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 3d0c807d0e..f7f70237db 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -421,32 +421,73 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
     dev->log_size = size;
 }
 
+
+static int vhost_verify_ring_part_mapping(void *part,
+                                          uint64_t part_addr,
+                                          uint64_t part_size,
+                                          uint64_t start_addr,
+                                          uint64_t size)
+{
+    hwaddr l;
+    void *p;
+    int r = 0;
+
+    if (!ranges_overlap(start_addr, size, part_addr, part_size)) {
+        return 0;
+    }
+    l = part_size;
+    p = cpu_physical_memory_map(part_addr, &l, 1);
+    if (!p || l != part_size) {
+        r = -ENOMEM;
+    }
+    if (p != part) {
+        r = -EBUSY;
+    }
+    cpu_physical_memory_unmap(p, l, 0, 0);
+    return r;
+}
+
 static int vhost_verify_ring_mappings(struct vhost_dev *dev,
                                       uint64_t start_addr,
                                       uint64_t size)
 {
-    int i;
+    int i, j;
     int r = 0;
+    const char *part_name[] = {
+        "descriptor table",
+        "available ring",
+        "used ring"
+    };
 
-    for (i = 0; !r && i < dev->nvqs; ++i) {
+    for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_virtqueue *vq = dev->vqs + i;
-        hwaddr l;
-        void *p;
 
-        if (!ranges_overlap(start_addr, size, vq->ring_phys, vq->ring_size)) {
-            continue;
+        j = 0;
+        r = vhost_verify_ring_part_mapping(vq->desc, vq->desc_phys,
+                                           vq->desc_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        l = vq->ring_size;
-        p = cpu_physical_memory_map(vq->ring_phys, &l, 1);
-        if (!p || l != vq->ring_size) {
-            error_report("Unable to map ring buffer for ring %d", i);
-            r = -ENOMEM;
+
+        j++;
+        r = vhost_verify_ring_part_mapping(vq->avail, vq->avail_phys,
+                                           vq->avail_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        if (p != vq->ring) {
-            error_report("Ring buffer relocated for ring %d", i);
-            r = -EBUSY;
+
+        j++;
+        r = vhost_verify_ring_part_mapping(vq->used, vq->used_phys,
+                                           vq->used_size, start_addr, size);
+        if (!r) {
+            break;
         }
-        cpu_physical_memory_unmap(p, l, 0, 0);
+    }
+
+    if (r == -ENOMEM) {
+        error_report("Unable to map %s for ring %d", part_name[j], i);
+    } else if (r == -EBUSY) {
+        error_report("%s relocated for ring %d", part_name[j], i);
     }
     return r;
 }
@@ -822,6 +863,9 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
                                 struct vhost_virtqueue *vq,
                                 unsigned idx)
 {
+    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
     hwaddr s, l, a;
     int r;
     int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
@@ -857,15 +901,15 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         }
     }
 
-    s = l = virtio_queue_get_desc_size(vdev, idx);
-    a = virtio_queue_get_desc_addr(vdev, idx);
+    vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx);
+    vq->desc_phys = a = virtio_queue_get_desc_addr(vdev, idx);
     vq->desc = cpu_physical_memory_map(a, &l, 0);
     if (!vq->desc || l != s) {
         r = -ENOMEM;
         goto fail_alloc_desc;
     }
-    s = l = virtio_queue_get_avail_size(vdev, idx);
-    a = virtio_queue_get_avail_addr(vdev, idx);
+    vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
+    vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
     vq->avail = cpu_physical_memory_map(a, &l, 0);
     if (!vq->avail || l != s) {
         r = -ENOMEM;
@@ -879,14 +923,6 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         goto fail_alloc_used;
     }
 
-    vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx);
-    vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx);
-    vq->ring = cpu_physical_memory_map(a, &l, 1);
-    if (!vq->ring || l != s) {
-        r = -ENOMEM;
-        goto fail_alloc_ring;
-    }
-
     r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
     if (r < 0) {
         r = -errno;
@@ -912,13 +948,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         vhost_virtqueue_mask(dev, vdev, idx, false);
     }
 
+    if (k->query_guest_notifiers &&
+        k->query_guest_notifiers(qbus->parent) &&
+        virtio_queue_vector(vdev, idx) == VIRTIO_NO_VECTOR) {
+        file.fd = -1;
+        r = dev->vhost_ops->vhost_set_vring_call(dev, &file);
+        if (r) {
+            goto fail_vector;
+        }
+    }
+
     return 0;
 
+fail_vector:
 fail_kick:
 fail_alloc:
-    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
-                              0, 0);
-fail_alloc_ring:
     cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
                               0, 0);
 fail_alloc_used:
@@ -959,8 +1003,6 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
                                                 vhost_vq_index);
     }
 
-    cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
-                              0, virtio_queue_get_ring_size(vdev, idx));
     cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx),
                               1, virtio_queue_get_used_size(vdev, idx));
     cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx),
@@ -1108,7 +1150,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
-        } else if (!qemu_memfd_check()) {
+        } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_check()) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: failed to allocate shared memory");
         }
@@ -1172,13 +1214,14 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-    VirtioBusState *vbus = VIRTIO_BUS(qbus);
-    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
     int i, r, e;
 
-    if (!k->ioeventfd_started) {
+    /* We will pass the notifiers to the kernel, make sure that QEMU
+     * doesn't interfere.
+     */
+    r = virtio_device_grab_ioeventfd(vdev);
+    if (r < 0) {
         error_report("binding does not support host notifiers");
-        r = -ENOSYS;
         goto fail;
     }
 
@@ -1201,6 +1244,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
         assert (e >= 0);
     }
+    virtio_device_release_ioeventfd(vdev);
 fail:
     return r;
 }
@@ -1223,6 +1267,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
         }
         assert (r >= 0);
     }
+    virtio_device_release_ioeventfd(vdev);
 }
 
 /* Test and clear event pending status.
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 5af429a58a..884570a57d 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -34,13 +34,11 @@
 
 static void balloon_page(void *addr, int deflate)
 {
-#if defined(__linux__)
     if (!qemu_balloon_is_inhibited() && (!kvm_enabled() ||
                                          kvm_has_sync_mmu())) {
         qemu_madvise(addr, BALLOON_PAGE_SIZE,
                 deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
     }
-#endif
 }
 
 static const char *balloon_stat_names[] = {
@@ -396,26 +394,9 @@ static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
     trace_virtio_balloon_to_target(target, dev->num_pages);
 }
 
-static void virtio_balloon_save_device(VirtIODevice *vdev, QEMUFile *f)
+static int virtio_balloon_post_load_device(void *opaque, int version_id)
 {
-    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
-
-    qemu_put_be32(f, s->num_pages);
-    qemu_put_be32(f, s->actual);
-}
-
-static int virtio_balloon_load(QEMUFile *f, void *opaque, size_t size)
-{
-    return virtio_load(VIRTIO_DEVICE(opaque), f, 1);
-}
-
-static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
-                                      int version_id)
-{
-    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
-
-    s->num_pages = qemu_get_be32(f);
-    s->actual = qemu_get_be32(f);
+    VirtIOBalloon *s = VIRTIO_BALLOON(opaque);
 
     if (balloon_stats_enabled(s)) {
         balloon_stats_change_timer(s, s->stats_poll_interval);
@@ -423,6 +404,18 @@ static int virtio_balloon_load_device(VirtIODevice *vdev, QEMUFile *f,
     return 0;
 }
 
+static const VMStateDescription vmstate_virtio_balloon_device = {
+    .name = "virtio-balloon-device",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .post_load = virtio_balloon_post_load_device,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(num_pages, VirtIOBalloon),
+        VMSTATE_UINT32(actual, VirtIOBalloon),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -463,11 +456,24 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev)
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
 
     if (s->stats_vq_elem != NULL) {
+        virtqueue_unpop(s->svq, s->stats_vq_elem, 0);
         g_free(s->stats_vq_elem);
         s->stats_vq_elem = NULL;
     }
 }
 
+static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status)
+{
+    VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+
+    if (!s->stats_vq_elem && vdev->vm_running &&
+        (status & VIRTIO_CONFIG_S_DRIVER_OK) && virtqueue_rewind(s->svq, 1)) {
+        /* poll stats queue for the element we have discarded when the VM
+         * was stopped */
+        virtio_balloon_receive_stats(vdev, s->svq);
+    }
+}
+
 static void virtio_balloon_instance_init(Object *obj)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(obj);
@@ -481,7 +487,15 @@ static void virtio_balloon_instance_init(Object *obj)
                         NULL, s, NULL);
 }
 
-VMSTATE_VIRTIO_DEVICE(balloon, 1, virtio_balloon_load, virtio_vmstate_save);
+static const VMStateDescription vmstate_virtio_balloon = {
+    .name = "virtio-balloon",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
 
 static Property virtio_balloon_properties[] = {
     DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features,
@@ -503,8 +517,8 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data)
     vdc->get_config = virtio_balloon_get_config;
     vdc->set_config = virtio_balloon_set_config;
     vdc->get_features = virtio_balloon_get_features;
-    vdc->save = virtio_balloon_save_device;
-    vdc->load = virtio_balloon_load_device;
+    vdc->set_status = virtio_balloon_set_status;
+    vdc->vmsd = &vmstate_virtio_balloon_device;
 }
 
 static const TypeInfo virtio_balloon_info = {
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index a85b7c8abe..d6c0c72bd2 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -49,16 +49,17 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
 
     DPRINTF("%s: plug device.\n", qbus->name);
 
-    if (klass->device_plugged != NULL) {
-        klass->device_plugged(qbus->parent, errp);
+    if (klass->pre_plugged != NULL) {
+        klass->pre_plugged(qbus->parent, errp);
     }
 
     /* Get the features of the plugged device. */
     assert(vdc->get_features != NULL);
     vdev->host_features = vdc->get_features(vdev, vdev->host_features,
                                             errp);
-    if (klass->post_plugged != NULL) {
-        klass->post_plugged(qbus->parent, errp);
+
+    if (klass->device_plugged != NULL) {
+        klass->device_plugged(qbus->parent, errp);
     }
 }
 
@@ -146,130 +147,133 @@ void virtio_bus_set_vdev_config(VirtioBusState *bus, uint8_t *config)
     }
 }
 
-/*
- * This function handles both assigning the ioeventfd handler and
- * registering it with the kernel.
- * assign: register/deregister ioeventfd with the kernel
- * set_handler: use the generic ioeventfd handler
- */
-static int set_host_notifier_internal(DeviceState *proxy, VirtioBusState *bus,
-                                      int n, bool assign, bool set_handler)
+/* On success, ioeventfd ownership belongs to the caller.  */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus)
 {
-    VirtIODevice *vdev = virtio_bus_get_device(bus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
-    VirtQueue *vq = virtio_get_queue(vdev, n);
-    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
-    int r = 0;
 
-    if (assign) {
-        r = event_notifier_init(notifier, 1);
-        if (r < 0) {
-            error_report("%s: unable to init event notifier: %d", __func__, r);
-            return r;
-        }
-        virtio_queue_set_host_notifier_fd_handler(vq, true, set_handler);
-        r = k->ioeventfd_assign(proxy, notifier, n, assign);
-        if (r < 0) {
-            error_report("%s: unable to assign ioeventfd: %d", __func__, r);
-            virtio_queue_set_host_notifier_fd_handler(vq, false, false);
-            event_notifier_cleanup(notifier);
-            return r;
-        }
-    } else {
-        k->ioeventfd_assign(proxy, notifier, n, assign);
-        virtio_queue_set_host_notifier_fd_handler(vq, false, false);
-        event_notifier_cleanup(notifier);
+    /* vhost can be used even if ioeventfd=off in the proxy device,
+     * so do not check k->ioeventfd_enabled.
+     */
+    if (!k->ioeventfd_assign) {
+        return -ENOSYS;
+    }
+
+    if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        virtio_bus_stop_ioeventfd(bus);
+        /* Remember that we need to restart ioeventfd
+         * when ioeventfd_grabbed becomes zero.
+         */
+        bus->ioeventfd_started = true;
+    }
+    bus->ioeventfd_grabbed++;
+    return 0;
+}
+
+void virtio_bus_release_ioeventfd(VirtioBusState *bus)
+{
+    assert(bus->ioeventfd_grabbed != 0);
+    if (--bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) {
+        /* Force virtio_bus_start_ioeventfd to act.  */
+        bus->ioeventfd_started = false;
+        virtio_bus_start_ioeventfd(bus);
     }
-    return r;
 }
 
-void virtio_bus_start_ioeventfd(VirtioBusState *bus)
+int virtio_bus_start_ioeventfd(VirtioBusState *bus)
 {
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
     DeviceState *proxy = DEVICE(BUS(bus)->parent);
-    VirtIODevice *vdev;
-    int n, r;
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    int r;
 
-    if (!k->ioeventfd_started || k->ioeventfd_started(proxy)) {
-        return;
-    }
-    if (k->ioeventfd_disabled(proxy)) {
-        return;
+    if (!k->ioeventfd_assign || !k->ioeventfd_enabled(proxy)) {
+        return -ENOSYS;
     }
-    vdev = virtio_bus_get_device(bus);
-    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
-        if (!virtio_queue_get_num(vdev, n)) {
-            continue;
-        }
-        r = set_host_notifier_internal(proxy, bus, n, true, true);
-        if (r < 0) {
-            goto assign_error;
-        }
+    if (bus->ioeventfd_started) {
+        return 0;
     }
-    k->ioeventfd_set_started(proxy, true, false);
-    return;
 
-assign_error:
-    while (--n >= 0) {
-        if (!virtio_queue_get_num(vdev, n)) {
-            continue;
+    /* Only set our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        r = vdc->start_ioeventfd(vdev);
+        if (r < 0) {
+            error_report("%s: failed. Fallback to userspace (slower).", __func__);
+            return r;
         }
-
-        r = set_host_notifier_internal(proxy, bus, n, false, false);
-        assert(r >= 0);
     }
-    k->ioeventfd_set_started(proxy, false, true);
-    error_report("%s: failed. Fallback to userspace (slower).", __func__);
+    bus->ioeventfd_started = true;
+    return 0;
 }
 
 void virtio_bus_stop_ioeventfd(VirtioBusState *bus)
 {
-    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
-    DeviceState *proxy = DEVICE(BUS(bus)->parent);
     VirtIODevice *vdev;
-    int n, r;
+    VirtioDeviceClass *vdc;
 
-    if (!k->ioeventfd_started || !k->ioeventfd_started(proxy)) {
+    if (!bus->ioeventfd_started) {
         return;
     }
-    vdev = virtio_bus_get_device(bus);
-    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
-        if (!virtio_queue_get_num(vdev, n)) {
-            continue;
-        }
-        r = set_host_notifier_internal(proxy, bus, n, false, false);
-        assert(r >= 0);
+
+    /* Only remove our notifier if we have ownership.  */
+    if (!bus->ioeventfd_grabbed) {
+        vdev = virtio_bus_get_device(bus);
+        vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+        vdc->stop_ioeventfd(vdev);
     }
-    k->ioeventfd_set_started(proxy, false, false);
+    bus->ioeventfd_started = false;
+}
+
+bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus)
+{
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+    DeviceState *proxy = DEVICE(BUS(bus)->parent);
+
+    return k->ioeventfd_assign && k->ioeventfd_enabled(proxy);
 }
 
 /*
- * This function switches from/to the generic ioeventfd handler.
- * assign==false means 'use generic ioeventfd handler'.
+ * This function switches ioeventfd on/off in the device.
+ * The caller must set or clear the handlers for the EventNotifier.
  */
 int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign)
 {
+    VirtIODevice *vdev = virtio_bus_get_device(bus);
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
     DeviceState *proxy = DEVICE(BUS(bus)->parent);
+    VirtQueue *vq = virtio_get_queue(vdev, n);
+    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
+    int r = 0;
 
-    if (!k->ioeventfd_started) {
+    if (!k->ioeventfd_assign) {
         return -ENOSYS;
     }
-    k->ioeventfd_set_disabled(proxy, assign);
+
     if (assign) {
-        /*
-         * Stop using the generic ioeventfd, we are doing eventfd handling
-         * ourselves below
-         *
-         * FIXME: We should just switch the handler and not deassign the
-         * ioeventfd.
-         * Otherwise, there's a window where we don't have an
-         * ioeventfd and we may end up with a notification where
-         * we don't expect one.
-         */
-        virtio_bus_stop_ioeventfd(bus);
+        r = event_notifier_init(notifier, 1);
+        if (r < 0) {
+            error_report("%s: unable to init event notifier: %s (%d)",
+                         __func__, strerror(-r), r);
+            return r;
+        }
+        r = k->ioeventfd_assign(proxy, notifier, n, true);
+        if (r < 0) {
+            error_report("%s: unable to assign ioeventfd: %d", __func__, r);
+            goto cleanup_event_notifier;
+        }
+        return 0;
+    } else {
+        k->ioeventfd_assign(proxy, notifier, n, false);
     }
-    return set_host_notifier_internal(proxy, bus, n, assign, false);
+
+cleanup_event_notifier:
+    /* Test and clear notifier after disabling event,
+     * in case poll callback didn't have time to run.
+     */
+    virtio_queue_host_notifier_read(notifier);
+    event_notifier_cleanup(notifier);
+    return r;
 }
 
 static char *virtio_bus_get_dev_path(DeviceState *dev)
diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c
new file mode 100644
index 0000000000..a1b09064c0
--- /dev/null
+++ b/hw/virtio/virtio-crypto-pci.c
@@ -0,0 +1,77 @@
+/*
+ * Virtio crypto device
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "hw/pci/pci.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-crypto.h"
+#include "qapi/error.h"
+
+static Property virtio_crypto_pci_properties[] = {
+    DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VirtIOCryptoPCI *vcrypto = VIRTIO_CRYPTO_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&vcrypto->vdev);
+
+    qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+    virtio_pci_force_virtio_1(vpci_dev);
+    object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+    object_property_set_link(OBJECT(vcrypto),
+                 OBJECT(vcrypto->vdev.conf.cryptodev), "cryptodev",
+                 NULL);
+}
+
+static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+    k->realize = virtio_crypto_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->props = virtio_crypto_pci_properties;
+    dc->hotpluggable = false;
+    pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void virtio_crypto_initfn(Object *obj)
+{
+    VirtIOCryptoPCI *dev = VIRTIO_CRYPTO_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VIRTIO_CRYPTO);
+    object_property_add_alias(obj, "cryptodev", OBJECT(&dev->vdev),
+                              "cryptodev", &error_abort);
+}
+
+static const TypeInfo virtio_crypto_pci_info = {
+    .name          = TYPE_VIRTIO_CRYPTO_PCI,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VirtIOCryptoPCI),
+    .instance_init = virtio_crypto_initfn,
+    .class_init    = virtio_crypto_pci_class_init,
+};
+
+static void virtio_crypto_pci_register_types(void)
+{
+    type_register_static(&virtio_crypto_pci_info);
+}
+type_init(virtio_crypto_pci_register_types)
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
new file mode 100644
index 0000000000..2f2467e859
--- /dev/null
+++ b/hw/virtio/virtio-crypto.c
@@ -0,0 +1,908 @@
+/*
+ * Virtio crypto Support
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "hw/qdev.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-crypto.h"
+#include "hw/virtio/virtio-access.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+#define VIRTIO_CRYPTO_VM_VERSION 1
+
+/*
+ * Transfer virtqueue index to crypto queue index.
+ * The control virtqueue is after the data virtqueues
+ * so the input value doesn't need to be adjusted
+ */
+static inline int virtio_crypto_vq2q(int queue_index)
+{
+    return queue_index;
+}
+
+static int
+virtio_crypto_cipher_session_helper(VirtIODevice *vdev,
+           CryptoDevBackendSymSessionInfo *info,
+           struct virtio_crypto_cipher_session_para *cipher_para,
+           struct iovec **iov, unsigned int *out_num)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    unsigned int num = *out_num;
+
+    info->cipher_alg = ldl_le_p(&cipher_para->algo);
+    info->key_len = ldl_le_p(&cipher_para->keylen);
+    info->direction = ldl_le_p(&cipher_para->op);
+    DPRINTF("cipher_alg=%" PRIu32 ", info->direction=%" PRIu32 "\n",
+             info->cipher_alg, info->direction);
+
+    if (info->key_len > vcrypto->conf.max_cipher_key_len) {
+        error_report("virtio-crypto length of cipher key is too big: %u",
+                     info->key_len);
+        return -VIRTIO_CRYPTO_ERR;
+    }
+    /* Get cipher key */
+    if (info->key_len > 0) {
+        size_t s;
+        DPRINTF("keylen=%" PRIu32 "\n", info->key_len);
+
+        info->cipher_key = g_malloc(info->key_len);
+        s = iov_to_buf(*iov, num, 0, info->cipher_key, info->key_len);
+        if (unlikely(s != info->key_len)) {
+            virtio_error(vdev, "virtio-crypto cipher key incorrect");
+            return -EFAULT;
+        }
+        iov_discard_front(iov, &num, info->key_len);
+        *out_num = num;
+    }
+
+    return 0;
+}
+
+static int64_t
+virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_sym_create_session_req *sess_req,
+               uint32_t queue_id,
+               uint32_t opcode,
+               struct iovec *iov, unsigned int out_num)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    CryptoDevBackendSymSessionInfo info;
+    int64_t session_id;
+    int queue_index;
+    uint32_t op_type;
+    Error *local_err = NULL;
+    int ret;
+
+    memset(&info, 0, sizeof(info));
+    op_type = ldl_le_p(&sess_req->op_type);
+    info.op_type = op_type;
+    info.op_code = opcode;
+
+    if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        ret = virtio_crypto_cipher_session_helper(vdev, &info,
+                           &sess_req->u.cipher.para,
+                           &iov, &out_num);
+        if (ret < 0) {
+            goto err;
+        }
+    } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        size_t s;
+        /* cipher part */
+        ret = virtio_crypto_cipher_session_helper(vdev, &info,
+                           &sess_req->u.chain.para.cipher_param,
+                           &iov, &out_num);
+        if (ret < 0) {
+            goto err;
+        }
+        /* hash part */
+        info.alg_chain_order = ldl_le_p(
+                                     &sess_req->u.chain.para.alg_chain_order);
+        info.add_len = ldl_le_p(&sess_req->u.chain.para.aad_len);
+        info.hash_mode = ldl_le_p(&sess_req->u.chain.para.hash_mode);
+        if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH) {
+            info.hash_alg = ldl_le_p(&sess_req->u.chain.para.u.mac_param.algo);
+            info.auth_key_len = ldl_le_p(
+                             &sess_req->u.chain.para.u.mac_param.auth_key_len);
+            info.hash_result_len = ldl_le_p(
+                           &sess_req->u.chain.para.u.mac_param.hash_result_len);
+            if (info.auth_key_len > vcrypto->conf.max_auth_key_len) {
+                error_report("virtio-crypto length of auth key is too big: %u",
+                             info.auth_key_len);
+                ret = -VIRTIO_CRYPTO_ERR;
+                goto err;
+            }
+            /* get auth key */
+            if (info.auth_key_len > 0) {
+                DPRINTF("auth_keylen=%" PRIu32 "\n", info.auth_key_len);
+                info.auth_key = g_malloc(info.auth_key_len);
+                s = iov_to_buf(iov, out_num, 0, info.auth_key,
+                               info.auth_key_len);
+                if (unlikely(s != info.auth_key_len)) {
+                    virtio_error(vdev,
+                          "virtio-crypto authenticated key incorrect");
+                    ret = -EFAULT;
+                    goto err;
+                }
+                iov_discard_front(&iov, &out_num, info.auth_key_len);
+            }
+        } else if (info.hash_mode == VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN) {
+            info.hash_alg = ldl_le_p(
+                             &sess_req->u.chain.para.u.hash_param.algo);
+            info.hash_result_len = ldl_le_p(
+                        &sess_req->u.chain.para.u.hash_param.hash_result_len);
+        } else {
+            /* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */
+            error_report("unsupported hash mode");
+            ret = -VIRTIO_CRYPTO_NOTSUPP;
+            goto err;
+        }
+    } else {
+        /* VIRTIO_CRYPTO_SYM_OP_NONE */
+        error_report("unsupported cipher op_type: VIRTIO_CRYPTO_SYM_OP_NONE");
+        ret = -VIRTIO_CRYPTO_NOTSUPP;
+        goto err;
+    }
+
+    queue_index = virtio_crypto_vq2q(queue_id);
+    session_id = cryptodev_backend_sym_create_session(
+                                     vcrypto->cryptodev,
+                                     &info, queue_index, &local_err);
+    if (session_id >= 0) {
+        DPRINTF("create session_id=%" PRIu64 " successfully\n",
+                session_id);
+
+        ret = session_id;
+    } else {
+        if (local_err) {
+            error_report_err(local_err);
+        }
+        ret = -VIRTIO_CRYPTO_ERR;
+    }
+
+err:
+    g_free(info.cipher_key);
+    g_free(info.auth_key);
+    return ret;
+}
+
+static uint8_t
+virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto,
+         struct virtio_crypto_destroy_session_req *close_sess_req,
+         uint32_t queue_id)
+{
+    int ret;
+    uint64_t session_id;
+    uint32_t status;
+    Error *local_err = NULL;
+
+    session_id = ldq_le_p(&close_sess_req->session_id);
+    DPRINTF("close session, id=%" PRIu64 "\n", session_id);
+
+    ret = cryptodev_backend_sym_close_session(
+              vcrypto->cryptodev, session_id, queue_id, &local_err);
+    if (ret == 0) {
+        status = VIRTIO_CRYPTO_OK;
+    } else {
+        if (local_err) {
+            error_report_err(local_err);
+        } else {
+            error_report("destroy session failed");
+        }
+        status = VIRTIO_CRYPTO_ERR;
+    }
+
+    return status;
+}
+
+static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    struct virtio_crypto_op_ctrl_req ctrl;
+    VirtQueueElement *elem;
+    struct iovec *in_iov;
+    struct iovec *out_iov;
+    unsigned in_num;
+    unsigned out_num;
+    uint32_t queue_id;
+    uint32_t opcode;
+    struct virtio_crypto_session_input input;
+    int64_t session_id;
+    uint8_t status;
+    size_t s;
+
+    for (;;) {
+        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+        if (!elem) {
+            break;
+        }
+        if (elem->out_num < 1 || elem->in_num < 1) {
+            virtio_error(vdev, "virtio-crypto ctrl missing headers");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+
+        out_num = elem->out_num;
+        out_iov = elem->out_sg;
+        in_num = elem->in_num;
+        in_iov = elem->in_sg;
+        if (unlikely(iov_to_buf(out_iov, out_num, 0, &ctrl, sizeof(ctrl))
+                    != sizeof(ctrl))) {
+            virtio_error(vdev, "virtio-crypto request ctrl_hdr too short");
+            virtqueue_detach_element(vq, elem, 0);
+            g_free(elem);
+            break;
+        }
+        iov_discard_front(&out_iov, &out_num, sizeof(ctrl));
+
+        opcode = ldl_le_p(&ctrl.header.opcode);
+        queue_id = ldl_le_p(&ctrl.header.queue_id);
+
+        switch (opcode) {
+        case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
+            memset(&input, 0, sizeof(input));
+            session_id = virtio_crypto_create_sym_session(vcrypto,
+                             &ctrl.u.sym_create_session,
+                             queue_id, opcode,
+                             out_iov, out_num);
+            /* Serious errors, need to reset virtio crypto device */
+            if (session_id == -EFAULT) {
+                virtqueue_detach_element(vq, elem, 0);
+                break;
+            } else if (session_id == -VIRTIO_CRYPTO_NOTSUPP) {
+                stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
+            } else if (session_id == -VIRTIO_CRYPTO_ERR) {
+                stl_le_p(&input.status, VIRTIO_CRYPTO_ERR);
+            } else {
+                /* Set the session id */
+                stq_le_p(&input.session_id, session_id);
+                stl_le_p(&input.status, VIRTIO_CRYPTO_OK);
+            }
+
+            s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
+            if (unlikely(s != sizeof(input))) {
+                virtio_error(vdev, "virtio-crypto input incorrect");
+                virtqueue_detach_element(vq, elem, 0);
+                break;
+            }
+            virtqueue_push(vq, elem, sizeof(input));
+            virtio_notify(vdev, vq);
+            break;
+        case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_HASH_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_MAC_DESTROY_SESSION:
+        case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION:
+            status = virtio_crypto_handle_close_session(vcrypto,
+                   &ctrl.u.destroy_session, queue_id);
+            /* The status only occupy one byte, we can directly use it */
+            s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status));
+            if (unlikely(s != sizeof(status))) {
+                virtio_error(vdev, "virtio-crypto status incorrect");
+                virtqueue_detach_element(vq, elem, 0);
+                break;
+            }
+            virtqueue_push(vq, elem, sizeof(status));
+            virtio_notify(vdev, vq);
+            break;
+        case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
+        case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
+        case VIRTIO_CRYPTO_AEAD_CREATE_SESSION:
+        default:
+            error_report("virtio-crypto unsupported ctrl opcode: %d", opcode);
+            memset(&input, 0, sizeof(input));
+            stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP);
+            s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input));
+            if (unlikely(s != sizeof(input))) {
+                virtio_error(vdev, "virtio-crypto input incorrect");
+                virtqueue_detach_element(vq, elem, 0);
+                break;
+            }
+            virtqueue_push(vq, elem, sizeof(input));
+            virtio_notify(vdev, vq);
+
+            break;
+        } /* end switch case */
+
+        g_free(elem);
+    } /* end for loop */
+}
+
+static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq,
+                                VirtIOCryptoReq *req)
+{
+    req->vcrypto = vcrypto;
+    req->vq = vq;
+    req->in = NULL;
+    req->in_iov = NULL;
+    req->in_num = 0;
+    req->in_len = 0;
+    req->flags = CRYPTODEV_BACKEND_ALG__MAX;
+    req->u.sym_op_info = NULL;
+}
+
+static void virtio_crypto_free_request(VirtIOCryptoReq *req)
+{
+    if (req) {
+        if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+            g_free(req->u.sym_op_info);
+        }
+        g_free(req);
+    }
+}
+
+static void
+virtio_crypto_sym_input_data_helper(VirtIODevice *vdev,
+                VirtIOCryptoReq *req,
+                uint32_t status,
+                CryptoDevBackendSymOpInfo *sym_op_info)
+{
+    size_t s, len;
+
+    if (status != VIRTIO_CRYPTO_OK) {
+        return;
+    }
+
+    len = sym_op_info->dst_len;
+    /* Save the cipher result */
+    s = iov_from_buf(req->in_iov, req->in_num, 0, sym_op_info->dst, len);
+    if (s != len) {
+        virtio_error(vdev, "virtio-crypto dest data incorrect");
+        return;
+    }
+
+    iov_discard_front(&req->in_iov, &req->in_num, len);
+
+    if (sym_op_info->op_type ==
+                      VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        /* Save the digest result */
+        s = iov_from_buf(req->in_iov, req->in_num, 0,
+                         sym_op_info->digest_result,
+                         sym_op_info->digest_result_len);
+        if (s != sym_op_info->digest_result_len) {
+            virtio_error(vdev, "virtio-crypto digest result incorrect");
+        }
+    }
+}
+
+static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status)
+{
+    VirtIOCrypto *vcrypto = req->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+
+    if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
+        virtio_crypto_sym_input_data_helper(vdev, req, status,
+                                            req->u.sym_op_info);
+    }
+    stb_p(&req->in->status, status);
+    virtqueue_push(req->vq, &req->elem, req->in_len);
+    virtio_notify(vdev, req->vq);
+}
+
+static VirtIOCryptoReq *
+virtio_crypto_get_request(VirtIOCrypto *s, VirtQueue *vq)
+{
+    VirtIOCryptoReq *req = virtqueue_pop(vq, sizeof(VirtIOCryptoReq));
+
+    if (req) {
+        virtio_crypto_init_request(s, vq, req);
+    }
+    return req;
+}
+
+static CryptoDevBackendSymOpInfo *
+virtio_crypto_sym_op_helper(VirtIODevice *vdev,
+           struct virtio_crypto_cipher_para *cipher_para,
+           struct virtio_crypto_alg_chain_data_para *alg_chain_para,
+           struct iovec *iov, unsigned int out_num)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    CryptoDevBackendSymOpInfo *op_info;
+    uint32_t src_len = 0, dst_len = 0;
+    uint32_t iv_len = 0;
+    uint32_t aad_len = 0, hash_result_len = 0;
+    uint32_t hash_start_src_offset = 0, len_to_hash = 0;
+    uint32_t cipher_start_src_offset = 0, len_to_cipher = 0;
+
+    size_t max_len, curr_size = 0;
+    size_t s;
+
+    /* Plain cipher */
+    if (cipher_para) {
+        iv_len = ldl_le_p(&cipher_para->iv_len);
+        src_len = ldl_le_p(&cipher_para->src_data_len);
+        dst_len = ldl_le_p(&cipher_para->dst_data_len);
+    } else if (alg_chain_para) { /* Algorithm chain */
+        iv_len = ldl_le_p(&alg_chain_para->iv_len);
+        src_len = ldl_le_p(&alg_chain_para->src_data_len);
+        dst_len = ldl_le_p(&alg_chain_para->dst_data_len);
+
+        aad_len = ldl_le_p(&alg_chain_para->aad_len);
+        hash_result_len = ldl_le_p(&alg_chain_para->hash_result_len);
+        hash_start_src_offset = ldl_le_p(
+                         &alg_chain_para->hash_start_src_offset);
+        cipher_start_src_offset = ldl_le_p(
+                         &alg_chain_para->cipher_start_src_offset);
+        len_to_cipher = ldl_le_p(&alg_chain_para->len_to_cipher);
+        len_to_hash = ldl_le_p(&alg_chain_para->len_to_hash);
+    } else {
+        return NULL;
+    }
+
+    max_len = iv_len + aad_len + src_len + dst_len + hash_result_len;
+    if (unlikely(max_len > vcrypto->conf.max_size)) {
+        virtio_error(vdev, "virtio-crypto too big length");
+        return NULL;
+    }
+
+    op_info = g_malloc0(sizeof(CryptoDevBackendSymOpInfo) + max_len);
+    op_info->iv_len = iv_len;
+    op_info->src_len = src_len;
+    op_info->dst_len = dst_len;
+    op_info->aad_len = aad_len;
+    op_info->digest_result_len = hash_result_len;
+    op_info->hash_start_src_offset = hash_start_src_offset;
+    op_info->len_to_hash = len_to_hash;
+    op_info->cipher_start_src_offset = cipher_start_src_offset;
+    op_info->len_to_cipher = len_to_cipher;
+    /* Handle the initilization vector */
+    if (op_info->iv_len > 0) {
+        DPRINTF("iv_len=%" PRIu32 "\n", op_info->iv_len);
+        op_info->iv = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->iv, op_info->iv_len);
+        if (unlikely(s != op_info->iv_len)) {
+            virtio_error(vdev, "virtio-crypto iv incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->iv_len);
+        curr_size += op_info->iv_len;
+    }
+
+    /* Handle additional authentication data if exists */
+    if (op_info->aad_len > 0) {
+        DPRINTF("aad_len=%" PRIu32 "\n", op_info->aad_len);
+        op_info->aad_data = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->aad_data, op_info->aad_len);
+        if (unlikely(s != op_info->aad_len)) {
+            virtio_error(vdev, "virtio-crypto additional auth data incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->aad_len);
+
+        curr_size += op_info->aad_len;
+    }
+
+    /* Handle the source data */
+    if (op_info->src_len > 0) {
+        DPRINTF("src_len=%" PRIu32 "\n", op_info->src_len);
+        op_info->src = op_info->data + curr_size;
+
+        s = iov_to_buf(iov, out_num, 0, op_info->src, op_info->src_len);
+        if (unlikely(s != op_info->src_len)) {
+            virtio_error(vdev, "virtio-crypto source data incorrect");
+            goto err;
+        }
+        iov_discard_front(&iov, &out_num, op_info->src_len);
+
+        curr_size += op_info->src_len;
+    }
+
+    /* Handle the destination data */
+    op_info->dst = op_info->data + curr_size;
+    curr_size += op_info->dst_len;
+
+    DPRINTF("dst_len=%" PRIu32 "\n", op_info->dst_len);
+
+    /* Handle the hash digest result */
+    if (hash_result_len > 0) {
+        DPRINTF("hash_result_len=%" PRIu32 "\n", hash_result_len);
+        op_info->digest_result = op_info->data + curr_size;
+    }
+
+    return op_info;
+
+err:
+    g_free(op_info);
+    return NULL;
+}
+
+static int
+virtio_crypto_handle_sym_req(VirtIOCrypto *vcrypto,
+               struct virtio_crypto_sym_data_req *req,
+               CryptoDevBackendSymOpInfo **sym_op_info,
+               struct iovec *iov, unsigned int out_num)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    uint32_t op_type;
+    CryptoDevBackendSymOpInfo *op_info;
+
+    op_type = ldl_le_p(&req->op_type);
+
+    if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) {
+        op_info = virtio_crypto_sym_op_helper(vdev, &req->u.cipher.para,
+                                              NULL, iov, out_num);
+        if (!op_info) {
+            return -EFAULT;
+        }
+        op_info->op_type = op_type;
+    } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) {
+        op_info = virtio_crypto_sym_op_helper(vdev, NULL,
+                                              &req->u.chain.para,
+                                              iov, out_num);
+        if (!op_info) {
+            return -EFAULT;
+        }
+        op_info->op_type = op_type;
+    } else {
+        /* VIRTIO_CRYPTO_SYM_OP_NONE */
+        error_report("virtio-crypto unsupported cipher type");
+        return -VIRTIO_CRYPTO_NOTSUPP;
+    }
+
+    *sym_op_info = op_info;
+
+    return 0;
+}
+
+static int
+virtio_crypto_handle_request(VirtIOCryptoReq *request)
+{
+    VirtIOCrypto *vcrypto = request->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+    VirtQueueElement *elem = &request->elem;
+    int queue_index = virtio_crypto_vq2q(virtio_get_queue_index(request->vq));
+    struct virtio_crypto_op_data_req req;
+    int ret;
+    struct iovec *in_iov;
+    struct iovec *out_iov;
+    unsigned in_num;
+    unsigned out_num;
+    uint32_t opcode;
+    uint8_t status = VIRTIO_CRYPTO_ERR;
+    uint64_t session_id;
+    CryptoDevBackendSymOpInfo *sym_op_info = NULL;
+    Error *local_err = NULL;
+
+    if (elem->out_num < 1 || elem->in_num < 1) {
+        virtio_error(vdev, "virtio-crypto dataq missing headers");
+        return -1;
+    }
+
+    out_num = elem->out_num;
+    out_iov = elem->out_sg;
+    in_num = elem->in_num;
+    in_iov = elem->in_sg;
+    if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req))
+                != sizeof(req))) {
+        virtio_error(vdev, "virtio-crypto request outhdr too short");
+        return -1;
+    }
+    iov_discard_front(&out_iov, &out_num, sizeof(req));
+
+    if (in_iov[in_num - 1].iov_len <
+            sizeof(struct virtio_crypto_inhdr)) {
+        virtio_error(vdev, "virtio-crypto request inhdr too short");
+        return -1;
+    }
+    /* We always touch the last byte, so just see how big in_iov is. */
+    request->in_len = iov_size(in_iov, in_num);
+    request->in = (void *)in_iov[in_num - 1].iov_base
+              + in_iov[in_num - 1].iov_len
+              - sizeof(struct virtio_crypto_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_crypto_inhdr));
+
+    /*
+     * The length of operation result, including dest_data
+     * and digest_result if exists.
+     */
+    request->in_num = in_num;
+    request->in_iov = in_iov;
+
+    opcode = ldl_le_p(&req.header.opcode);
+    session_id = ldq_le_p(&req.header.session_id);
+
+    switch (opcode) {
+    case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
+    case VIRTIO_CRYPTO_CIPHER_DECRYPT:
+        ret = virtio_crypto_handle_sym_req(vcrypto,
+                         &req.u.sym_req,
+                         &sym_op_info,
+                         out_iov, out_num);
+        /* Serious errors, need to reset virtio crypto device */
+        if (ret == -EFAULT) {
+            return -1;
+        } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) {
+            virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP);
+            virtio_crypto_free_request(request);
+        } else {
+            sym_op_info->session_id = session_id;
+
+            /* Set request's parameter */
+            request->flags = CRYPTODEV_BACKEND_ALG_SYM;
+            request->u.sym_op_info = sym_op_info;
+            ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev,
+                                    request, queue_index, &local_err);
+            if (ret < 0) {
+                status = -ret;
+                if (local_err) {
+                    error_report_err(local_err);
+                }
+            } else { /* ret == VIRTIO_CRYPTO_OK */
+                status = ret;
+            }
+            virtio_crypto_req_complete(request, status);
+            virtio_crypto_free_request(request);
+        }
+        break;
+    case VIRTIO_CRYPTO_HASH:
+    case VIRTIO_CRYPTO_MAC:
+    case VIRTIO_CRYPTO_AEAD_ENCRYPT:
+    case VIRTIO_CRYPTO_AEAD_DECRYPT:
+    default:
+        error_report("virtio-crypto unsupported dataq opcode: %u",
+                     opcode);
+        virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP);
+        virtio_crypto_free_request(request);
+    }
+
+    return 0;
+}
+
+static void virtio_crypto_handle_dataq(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    VirtIOCryptoReq *req;
+
+    while ((req = virtio_crypto_get_request(vcrypto, vq))) {
+        if (virtio_crypto_handle_request(req) < 0) {
+            virtqueue_detach_element(req->vq, &req->elem, 0);
+            virtio_crypto_free_request(req);
+            break;
+        }
+    }
+}
+
+static void virtio_crypto_dataq_bh(void *opaque)
+{
+    VirtIOCryptoQueue *q = opaque;
+    VirtIOCrypto *vcrypto = q->vcrypto;
+    VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+
+    /* This happens when device was stopped but BH wasn't. */
+    if (!vdev->vm_running) {
+        return;
+    }
+
+    /* Just in case the driver is not ready on more */
+    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
+        return;
+    }
+
+    for (;;) {
+        virtio_crypto_handle_dataq(vdev, q->dataq);
+        virtio_queue_set_notification(q->dataq, 1);
+
+        /* Are we done or did the guest add more buffers? */
+        if (virtio_queue_empty(q->dataq)) {
+            break;
+        }
+
+        virtio_queue_set_notification(q->dataq, 0);
+    }
+}
+
+static void
+virtio_crypto_handle_dataq_bh(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    VirtIOCryptoQueue *q =
+         &vcrypto->vqs[virtio_crypto_vq2q(virtio_get_queue_index(vq))];
+
+    /* This happens when device was stopped but VCPU wasn't. */
+    if (!vdev->vm_running) {
+        return;
+    }
+    virtio_queue_set_notification(vq, 0);
+    qemu_bh_schedule(q->dataq_bh);
+}
+
+static uint64_t virtio_crypto_get_features(VirtIODevice *vdev,
+                                           uint64_t features,
+                                           Error **errp)
+{
+    return features;
+}
+
+static void virtio_crypto_reset(VirtIODevice *vdev)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+    /* multiqueue is disabled by default */
+    vcrypto->curr_queues = 1;
+    if (!vcrypto->cryptodev->ready) {
+        vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
+    } else {
+        vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
+    }
+}
+
+static void virtio_crypto_init_config(VirtIODevice *vdev)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);
+
+    vcrypto->conf.crypto_services =
+                     vcrypto->conf.cryptodev->conf.crypto_services;
+    vcrypto->conf.cipher_algo_l =
+                     vcrypto->conf.cryptodev->conf.cipher_algo_l;
+    vcrypto->conf.cipher_algo_h =
+                     vcrypto->conf.cryptodev->conf.cipher_algo_h;
+    vcrypto->conf.hash_algo = vcrypto->conf.cryptodev->conf.hash_algo;
+    vcrypto->conf.mac_algo_l = vcrypto->conf.cryptodev->conf.mac_algo_l;
+    vcrypto->conf.mac_algo_h = vcrypto->conf.cryptodev->conf.mac_algo_h;
+    vcrypto->conf.aead_algo = vcrypto->conf.cryptodev->conf.aead_algo;
+    vcrypto->conf.max_cipher_key_len =
+                  vcrypto->conf.cryptodev->conf.max_cipher_key_len;
+    vcrypto->conf.max_auth_key_len =
+                  vcrypto->conf.cryptodev->conf.max_auth_key_len;
+    vcrypto->conf.max_size = vcrypto->conf.cryptodev->conf.max_size;
+}
+
+static void virtio_crypto_device_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev);
+    int i;
+
+    vcrypto->cryptodev = vcrypto->conf.cryptodev;
+    if (vcrypto->cryptodev == NULL) {
+        error_setg(errp, "'cryptodev' parameter expects a valid object");
+        return;
+    }
+
+    vcrypto->max_queues = MAX(vcrypto->cryptodev->conf.peers.queues, 1);
+    if (vcrypto->max_queues + 1 > VIRTIO_QUEUE_MAX) {
+        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
+                   "must be a postive integer less than %d.",
+                   vcrypto->max_queues, VIRTIO_QUEUE_MAX);
+        return;
+    }
+
+    virtio_init(vdev, "virtio-crypto", VIRTIO_ID_CRYPTO, vcrypto->config_size);
+    vcrypto->curr_queues = 1;
+    vcrypto->vqs = g_malloc0(sizeof(VirtIOCryptoQueue) * vcrypto->max_queues);
+    for (i = 0; i < vcrypto->max_queues; i++) {
+        vcrypto->vqs[i].dataq =
+                 virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh);
+        vcrypto->vqs[i].dataq_bh =
+                 qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]);
+        vcrypto->vqs[i].vcrypto = vcrypto;
+    }
+
+    vcrypto->ctrl_vq = virtio_add_queue(vdev, 64, virtio_crypto_handle_ctrl);
+    if (!vcrypto->cryptodev->ready) {
+        vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY;
+    } else {
+        vcrypto->status |= VIRTIO_CRYPTO_S_HW_READY;
+    }
+
+    virtio_crypto_init_config(vdev);
+}
+
+static void virtio_crypto_device_unrealize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(dev);
+    VirtIOCryptoQueue *q;
+    int i, max_queues;
+
+    max_queues = vcrypto->multiqueue ? vcrypto->max_queues : 1;
+    for (i = 0; i < max_queues; i++) {
+        virtio_del_queue(vdev, i);
+        q = &vcrypto->vqs[i];
+        qemu_bh_delete(q->dataq_bh);
+    }
+
+    g_free(vcrypto->vqs);
+
+    virtio_cleanup(vdev);
+}
+
+static const VMStateDescription vmstate_virtio_crypto = {
+    .name = "virtio-crypto",
+    .unmigratable = 1,
+    .minimum_version_id = VIRTIO_CRYPTO_VM_VERSION,
+    .version_id = VIRTIO_CRYPTO_VM_VERSION,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property virtio_crypto_properties[] = {
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_crypto_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOCrypto *c = VIRTIO_CRYPTO(vdev);
+    struct virtio_crypto_config crypto_cfg = {};
+
+    /*
+     * Virtio-crypto device conforms to VIRTIO 1.0 which is always LE,
+     * so we can use LE accessors directly.
+     */
+    stl_le_p(&crypto_cfg.status, c->status);
+    stl_le_p(&crypto_cfg.max_dataqueues, c->max_queues);
+    stl_le_p(&crypto_cfg.crypto_services, c->conf.crypto_services);
+    stl_le_p(&crypto_cfg.cipher_algo_l, c->conf.cipher_algo_l);
+    stl_le_p(&crypto_cfg.cipher_algo_h, c->conf.cipher_algo_h);
+    stl_le_p(&crypto_cfg.hash_algo, c->conf.hash_algo);
+    stl_le_p(&crypto_cfg.mac_algo_l, c->conf.mac_algo_l);
+    stl_le_p(&crypto_cfg.mac_algo_h, c->conf.mac_algo_h);
+    stl_le_p(&crypto_cfg.aead_algo, c->conf.aead_algo);
+    stl_le_p(&crypto_cfg.max_cipher_key_len, c->conf.max_cipher_key_len);
+    stl_le_p(&crypto_cfg.max_auth_key_len, c->conf.max_auth_key_len);
+    stq_le_p(&crypto_cfg.max_size, c->conf.max_size);
+
+    memcpy(config, &crypto_cfg, c->config_size);
+}
+
+static void virtio_crypto_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    dc->props = virtio_crypto_properties;
+    dc->vmsd = &vmstate_virtio_crypto;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    vdc->realize = virtio_crypto_device_realize;
+    vdc->unrealize = virtio_crypto_device_unrealize;
+    vdc->get_config = virtio_crypto_get_config;
+    vdc->get_features = virtio_crypto_get_features;
+    vdc->reset = virtio_crypto_reset;
+}
+
+static void virtio_crypto_instance_init(Object *obj)
+{
+    VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(obj);
+
+    /*
+     * The default config_size is sizeof(struct virtio_crypto_config).
+     * Can be overriden with virtio_crypto_set_config_size.
+     */
+    vcrypto->config_size = sizeof(struct virtio_crypto_config);
+
+    object_property_add_link(obj, "cryptodev",
+                             TYPE_CRYPTODEV_BACKEND,
+                             (Object **)&vcrypto->conf.cryptodev,
+                             qdev_prop_allow_set_link_before_realize,
+                             OBJ_PROP_LINK_UNREF_ON_RELEASE, NULL);
+}
+
+static const TypeInfo virtio_crypto_info = {
+    .name = TYPE_VIRTIO_CRYPTO,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOCrypto),
+    .instance_init = virtio_crypto_instance_init,
+    .class_init = virtio_crypto_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_crypto_info);
+}
+
+type_init(virtio_register_types)
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 13798b3cb8..17412cb7b5 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -89,38 +89,12 @@ typedef struct {
     uint32_t guest_page_shift;
     /* virtio-bus */
     VirtioBusState bus;
-    bool ioeventfd_disabled;
-    bool ioeventfd_started;
     bool format_transport_address;
 } VirtIOMMIOProxy;
 
-static bool virtio_mmio_ioeventfd_started(DeviceState *d)
+static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
 {
-    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
-
-    return proxy->ioeventfd_started;
-}
-
-static void virtio_mmio_ioeventfd_set_started(DeviceState *d, bool started,
-                                              bool err)
-{
-    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
-
-    proxy->ioeventfd_started = started;
-}
-
-static bool virtio_mmio_ioeventfd_disabled(DeviceState *d)
-{
-    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
-
-    return !kvm_eventfds_enabled() || proxy->ioeventfd_disabled;
-}
-
-static void virtio_mmio_ioeventfd_set_disabled(DeviceState *d, bool disabled)
-{
-    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
-
-    proxy->ioeventfd_disabled = disabled;
+    return kvm_eventfds_enabled();
 }
 
 static int virtio_mmio_ioeventfd_assign(DeviceState *d,
@@ -217,7 +191,7 @@ static uint64_t virtio_mmio_read(void *opaque, hwaddr offset, unsigned size)
         return virtio_queue_get_addr(vdev, vdev->queue_sel)
             >> proxy->guest_page_shift;
     case VIRTIO_MMIO_INTERRUPTSTATUS:
-        return vdev->isr;
+        return atomic_read(&vdev->isr);
     case VIRTIO_MMIO_STATUS:
         return vdev->status;
     case VIRTIO_MMIO_HOSTFEATURESSEL:
@@ -325,7 +299,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value,
         }
         break;
     case VIRTIO_MMIO_INTERRUPTACK:
-        vdev->isr &= ~value;
+        atomic_and(&vdev->isr, ~value);
         virtio_update_irq(vdev);
         break;
     case VIRTIO_MMIO_STATUS:
@@ -373,7 +347,7 @@ static void virtio_mmio_update_irq(DeviceState *opaque, uint16_t vector)
     if (!vdev) {
         return;
     }
-    level = (vdev->isr != 0);
+    level = (atomic_read(&vdev->isr) != 0);
     DPRINTF("virtio_mmio setting IRQ %d\n", level);
     qemu_set_irq(proxy->irq, level);
 }
@@ -557,10 +531,7 @@ static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data)
     k->save_config = virtio_mmio_save_config;
     k->load_config = virtio_mmio_load_config;
     k->set_guest_notifiers = virtio_mmio_set_guest_notifiers;
-    k->ioeventfd_started = virtio_mmio_ioeventfd_started;
-    k->ioeventfd_set_started = virtio_mmio_ioeventfd_set_started;
-    k->ioeventfd_disabled = virtio_mmio_ioeventfd_disabled;
-    k->ioeventfd_set_disabled = virtio_mmio_ioeventfd_set_disabled;
+    k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_mmio_ioeventfd_assign;
     k->has_variable_vring_alignment = true;
     bus_class->max_dev = 1;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 755f9218b7..21c2b9dbfc 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -73,7 +73,7 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector)
         msix_notify(&proxy->pci_dev, vector);
     else {
         VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-        pci_set_irq(&proxy->pci_dev, vdev->isr & 1);
+        pci_set_irq(&proxy->pci_dev, atomic_read(&vdev->isr) & 1);
     }
 }
 
@@ -262,38 +262,21 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f)
     return 0;
 }
 
-static bool virtio_pci_ioeventfd_started(DeviceState *d)
+static bool virtio_pci_ioeventfd_enabled(DeviceState *d)
 {
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 
-    return proxy->ioeventfd_started;
+    return (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) != 0;
 }
 
-static void virtio_pci_ioeventfd_set_started(DeviceState *d, bool started,
-                                             bool err)
-{
-    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
-    proxy->ioeventfd_started = started;
-}
+#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
 
-static bool virtio_pci_ioeventfd_disabled(DeviceState *d)
+static inline int virtio_pci_queue_mem_mult(struct VirtIOPCIProxy *proxy)
 {
-    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
-    return proxy->ioeventfd_disabled ||
-        !(proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD);
+    return (proxy->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ) ?
+        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT : 4;
 }
 
-static void virtio_pci_ioeventfd_set_disabled(DeviceState *d, bool disabled)
-{
-    VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
-    proxy->ioeventfd_disabled = disabled;
-}
-
-#define QEMU_VIRTIO_PCI_QUEUE_MEM_MULT 0x1000
-
 static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
                                        int n, bool assign)
 {
@@ -307,7 +290,7 @@ static int virtio_pci_ioeventfd_assign(DeviceState *d, EventNotifier *notifier,
     MemoryRegion *modern_mr = &proxy->notify.mr;
     MemoryRegion *modern_notify_mr = &proxy->notify_pio.mr;
     MemoryRegion *legacy_mr = &proxy->bar;
-    hwaddr modern_addr = QEMU_VIRTIO_PCI_QUEUE_MEM_MULT *
+    hwaddr modern_addr = virtio_pci_queue_mem_mult(proxy) *
                          virtio_get_queue_index(vq);
     hwaddr legacy_addr = VIRTIO_PCI_QUEUE_NOTIFY;
 
@@ -466,8 +449,7 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
         break;
     case VIRTIO_PCI_ISR:
         /* reading from the ISR also clears it. */
-        ret = vdev->isr;
-        vdev->isr = 0;
+        ret = atomic_xchg(&vdev->isr, 0);
         pci_irq_deassert(&proxy->pci_dev);
         break;
     case VIRTIO_MSI_CONFIG_VECTOR:
@@ -1192,7 +1174,9 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
         break;
     case VIRTIO_PCI_COMMON_DF:
         if (proxy->dfselect <= 1) {
-            val = (vdev->host_features & ~VIRTIO_LEGACY_FEATURES) >>
+            VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+            val = (vdev->host_features & ~vdc->legacy_features) >>
                 (32 * proxy->dfselect);
         }
         break;
@@ -1370,7 +1354,8 @@ static void virtio_pci_notify_write(void *opaque, hwaddr addr,
                                     uint64_t val, unsigned size)
 {
     VirtIODevice *vdev = opaque;
-    unsigned queue = addr / QEMU_VIRTIO_PCI_QUEUE_MEM_MULT;
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent);
+    unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);
 
     if (queue < VIRTIO_QUEUE_MAX) {
         virtio_queue_notify(vdev, queue);
@@ -1393,9 +1378,7 @@ static uint64_t virtio_pci_isr_read(void *opaque, hwaddr addr,
 {
     VirtIOPCIProxy *proxy = opaque;
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
-    uint64_t val = vdev->isr;
-
-    vdev->isr = 0;
+    uint64_t val = atomic_xchg(&vdev->isr, 0);
     pci_irq_deassert(&proxy->pci_dev);
 
     return val;
@@ -1520,7 +1503,7 @@ static void virtio_pci_modern_regions_init(VirtIOPCIProxy *proxy)
                           &notify_pio_ops,
                           virtio_bus_get_device(&proxy->bus),
                           "virtio-pci-notify-pio",
-                          proxy->notify.size);
+                          proxy->notify_pio.size);
 }
 
 static void virtio_pci_modern_region_map(VirtIOPCIProxy *proxy,
@@ -1544,7 +1527,7 @@ static void virtio_pci_modern_mem_region_map(VirtIOPCIProxy *proxy,
                                              struct virtio_pci_cap *cap)
 {
     virtio_pci_modern_region_map(proxy, region, cap,
-                                 &proxy->modern_bar, proxy->modern_mem_bar);
+                                 &proxy->modern_bar, proxy->modern_mem_bar_idx);
 }
 
 static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
@@ -1552,7 +1535,7 @@ static void virtio_pci_modern_io_region_map(VirtIOPCIProxy *proxy,
                                             struct virtio_pci_cap *cap)
 {
     virtio_pci_modern_region_map(proxy, region, cap,
-                                 &proxy->io_bar, proxy->modern_io_bar);
+                                 &proxy->io_bar, proxy->modern_io_bar_idx);
 }
 
 static void virtio_pci_modern_mem_region_unmap(VirtIOPCIProxy *proxy,
@@ -1569,18 +1552,49 @@ static void virtio_pci_modern_io_region_unmap(VirtIOPCIProxy *proxy,
                                 &region->mr);
 }
 
+static void virtio_pci_pre_plugged(DeviceState *d, Error **errp)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    if (virtio_pci_modern(proxy)) {
+        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
+    }
+
+    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
+}
+
 /* This is called by virtio-bus just after the device is plugged. */
 static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
 {
     VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
     VirtioBusState *bus = &proxy->bus;
     bool legacy = virtio_pci_legacy(proxy);
-    bool modern = virtio_pci_modern(proxy);
+    bool modern;
     bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
     uint8_t *config;
     uint32_t size;
     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
 
+    /*
+     * Virtio capabilities present without
+     * VIRTIO_F_VERSION_1 confuses guests
+     */
+    if (!proxy->ignore_backend_features &&
+            !virtio_has_feature(vdev->host_features, VIRTIO_F_VERSION_1)) {
+        virtio_pci_disable_modern(proxy);
+
+        if (!legacy) {
+            error_setg(errp, "Device doesn't support modern mode, and legacy"
+                             " mode is disabled");
+            error_append_hint(errp, "Set disable-legacy to off\n");
+
+            return;
+        }
+    }
+
+    modern = virtio_pci_modern(proxy);
+
     config = proxy->pci_dev.config;
     if (proxy->class_code) {
         pci_config_set_class(config, proxy->class_code);
@@ -1609,7 +1623,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
         struct virtio_pci_notify_cap notify = {
             .cap.cap_len = sizeof notify,
             .notify_off_multiplier =
-                cpu_to_le32(QEMU_VIRTIO_PCI_QUEUE_MEM_MULT),
+                cpu_to_le32(virtio_pci_queue_mem_mult(proxy)),
         };
         struct virtio_pci_cfg_cap cfg = {
             .cap.cap_len = sizeof cfg,
@@ -1622,7 +1636,6 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
 
         struct virtio_pci_cfg_cap *cfg_mask;
 
-        virtio_add_feature(&vdev->host_features, VIRTIO_F_VERSION_1);
         virtio_pci_modern_regions_init(proxy);
 
         virtio_pci_modern_mem_region_map(proxy, &proxy->common, &cap);
@@ -1634,14 +1647,14 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
             memory_region_init(&proxy->io_bar, OBJECT(proxy),
                                "virtio-pci-io", 0x4);
 
-            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar,
+            pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx,
                              PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar);
 
             virtio_pci_modern_io_region_map(proxy, &proxy->notify_pio,
                                             &notify_pio.cap);
         }
 
-        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar,
+        pci_register_bar(&proxy->pci_dev, proxy->modern_mem_bar_idx,
                          PCI_BASE_ADDRESS_SPACE_MEMORY |
                          PCI_BASE_ADDRESS_MEM_PREFETCH |
                          PCI_BASE_ADDRESS_MEM_TYPE_64,
@@ -1657,7 +1670,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
 
     if (proxy->nvectors) {
         int err = msix_init_exclusive_bar(&proxy->pci_dev, proxy->nvectors,
-                                          proxy->msix_bar);
+                                          proxy->msix_bar_idx);
         if (err) {
             /* Notice when a system that supports MSIx can't initialize it.  */
             if (err != -ENOTSUP) {
@@ -1680,15 +1693,9 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
                               &virtio_pci_config_ops,
                               proxy, "virtio-pci", size);
 
-        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar,
+        pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
                          PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
     }
-
-    if (!kvm_has_many_ioeventfds()) {
-        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
-    }
-
-    virtio_add_feature(&vdev->host_features, VIRTIO_F_BAD_FEATURE);
 }
 
 static void virtio_pci_device_unplugged(DeviceState *d)
@@ -1717,6 +1724,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
     bool pcie_port = pci_bus_is_express(pci_dev->bus) &&
                      !pci_bus_is_root(pci_dev->bus);
 
+    if (!kvm_has_many_ioeventfds()) {
+        proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
+    }
+
     /*
      * virtio pci bar layout used by default.
      * subclasses can re-arrange things if needed.
@@ -1726,10 +1737,10 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
      *   region 4+5 --  virtio modern memory (64bit) bar
      *
      */
-    proxy->legacy_io_bar  = 0;
-    proxy->msix_bar       = 1;
-    proxy->modern_io_bar  = 2;
-    proxy->modern_mem_bar = 4;
+    proxy->legacy_io_bar_idx  = 0;
+    proxy->msix_bar_idx       = 1;
+    proxy->modern_io_bar_idx  = 2;
+    proxy->modern_mem_bar_idx = 4;
 
     proxy->common.offset = 0x0;
     proxy->common.size = 0x1000;
@@ -1744,8 +1755,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
     proxy->device.type = VIRTIO_PCI_CAP_DEVICE_CFG;
 
     proxy->notify.offset = 0x3000;
-    proxy->notify.size =
-        QEMU_VIRTIO_PCI_QUEUE_MEM_MULT * VIRTIO_QUEUE_MAX;
+    proxy->notify.size = virtio_pci_queue_mem_mult(proxy) * VIRTIO_QUEUE_MAX;
     proxy->notify.type = VIRTIO_PCI_CAP_NOTIFY_CFG;
 
     proxy->notify_pio.offset = 0x0;
@@ -1754,8 +1764,8 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
 
     /* subclasses can enforce modern, so do this unconditionally */
     memory_region_init(&proxy->modern_bar, OBJECT(proxy), "virtio-pci",
-                       2 * QEMU_VIRTIO_PCI_QUEUE_MEM_MULT *
-                       VIRTIO_QUEUE_MAX);
+                       /* PCI BAR regions must be powers of 2 */
+                       pow2ceil(proxy->notify.offset + proxy->notify.size));
 
     memory_region_init_alias(&proxy->modern_cfg,
                              OBJECT(proxy),
@@ -1770,6 +1780,14 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
         proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
     }
 
+    if (!virtio_pci_modern(proxy) && !virtio_pci_legacy(proxy)) {
+        error_setg(errp, "device cannot work as neither modern nor legacy mode"
+                   " is enabled");
+        error_append_hint(errp, "Set either disable-modern or disable-legacy"
+                          " to off\n");
+        return;
+    }
+
     if (pcie_port && pci_is_express(pci_dev)) {
         int pos;
 
@@ -1833,6 +1851,10 @@ static Property virtio_pci_properties[] = {
                     VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
     DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
                     VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
+    DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
+                    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
+    DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
+                     ignore_backend_features, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2055,6 +2077,54 @@ static const TypeInfo vhost_scsi_pci_info = {
 };
 #endif
 
+/* vhost-vsock-pci */
+
+#ifdef CONFIG_VHOST_VSOCK
+static Property vhost_vsock_pci_properties[] = {
+    DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+    VHostVSockPCI *dev = VHOST_VSOCK_PCI(vpci_dev);
+    DeviceState *vdev = DEVICE(&dev->vdev);
+
+    qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
+    object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+}
+
+static void vhost_vsock_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+    PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+    k->realize = vhost_vsock_pci_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    dc->props = vhost_vsock_pci_properties;
+    pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+    pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_VSOCK;
+    pcidev_k->revision = 0x00;
+    pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_vsock_pci_instance_init(Object *obj)
+{
+    VHostVSockPCI *dev = VHOST_VSOCK_PCI(obj);
+
+    virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+                                TYPE_VHOST_VSOCK);
+}
+
+static const TypeInfo vhost_vsock_pci_info = {
+    .name          = TYPE_VHOST_VSOCK_PCI,
+    .parent        = TYPE_VIRTIO_PCI,
+    .instance_size = sizeof(VHostVSockPCI),
+    .instance_init = vhost_vsock_pci_instance_init,
+    .class_init    = vhost_vsock_pci_class_init,
+};
+#endif
+
 /* virtio-balloon-pci */
 
 static Property virtio_balloon_pci_properties[] = {
@@ -2444,13 +2514,11 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
     k->query_guest_notifiers = virtio_pci_query_guest_notifiers;
     k->set_guest_notifiers = virtio_pci_set_guest_notifiers;
     k->vmstate_change = virtio_pci_vmstate_change;
+    k->pre_plugged = virtio_pci_pre_plugged;
     k->device_plugged = virtio_pci_device_plugged;
     k->device_unplugged = virtio_pci_device_unplugged;
     k->query_nvectors = virtio_pci_query_nvectors;
-    k->ioeventfd_started = virtio_pci_ioeventfd_started;
-    k->ioeventfd_set_started = virtio_pci_ioeventfd_set_started;
-    k->ioeventfd_disabled = virtio_pci_ioeventfd_disabled;
-    k->ioeventfd_set_disabled = virtio_pci_ioeventfd_set_disabled;
+    k->ioeventfd_enabled = virtio_pci_ioeventfd_enabled;
     k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
 }
 
@@ -2485,6 +2553,9 @@ static void virtio_pci_register_types(void)
 #ifdef CONFIG_VHOST_SCSI
     type_register_static(&vhost_scsi_pci_info);
 #endif
+#ifdef CONFIG_VHOST_VSOCK
+    type_register_static(&vhost_vsock_pci_info);
+#endif
 }
 
 type_init(virtio_pci_register_types)
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 25fbf8a375..5e078866c4 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -25,12 +25,17 @@
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-input.h"
 #include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-crypto.h"
+
 #ifdef CONFIG_VIRTFS
 #include "hw/9pfs/virtio-9p.h"
 #endif
 #ifdef CONFIG_VHOST_SCSI
 #include "hw/virtio/vhost-scsi.h"
 #endif
+#ifdef CONFIG_VHOST_VSOCK
+#include "hw/virtio/vhost-vsock.h"
+#endif
 
 typedef struct VirtIOPCIProxy VirtIOPCIProxy;
 typedef struct VirtIOBlkPCI VirtIOBlkPCI;
@@ -44,6 +49,8 @@ typedef struct VirtIOInputPCI VirtIOInputPCI;
 typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
 typedef struct VirtIOInputHostPCI VirtIOInputHostPCI;
 typedef struct VirtIOGPUPCI VirtIOGPUPCI;
+typedef struct VHostVSockPCI VHostVSockPCI;
+typedef struct VirtIOCryptoPCI VirtIOCryptoPCI;
 
 /* virtio-pci-bus */
 
@@ -64,6 +71,7 @@ enum {
     VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
     VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
     VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
+    VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
 };
 
 /* Need to activate work-arounds for buggy guests at vmstate load. */
@@ -84,6 +92,10 @@ enum {
 #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
     (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
 
+/* page per vq flag to be used by split drivers within guests */
+#define VIRTIO_PCI_FLAG_PAGE_PER_VQ \
+    (1 << VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT)
+
 typedef struct {
     MSIMessage msg;
     int virq;
@@ -134,13 +146,14 @@ struct VirtIOPCIProxy {
     MemoryRegion io_bar;
     MemoryRegion modern_cfg;
     AddressSpace modern_as;
-    uint32_t legacy_io_bar;
-    uint32_t msix_bar;
-    uint32_t modern_io_bar;
-    uint32_t modern_mem_bar;
+    uint32_t legacy_io_bar_idx;
+    uint32_t msix_bar_idx;
+    uint32_t modern_io_bar_idx;
+    uint32_t modern_mem_bar_idx;
     int config_cap;
     uint32_t flags;
     bool disable_modern;
+    bool ignore_backend_features;
     OnOffAuto disable_legacy;
     uint32_t class_code;
     uint32_t nvectors;
@@ -149,8 +162,6 @@ struct VirtIOPCIProxy {
     uint32_t guest_features[2];
     VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX];
 
-    bool ioeventfd_disabled;
-    bool ioeventfd_started;
     VirtIOIRQFD *vector_irqfd;
     int nvqs_with_notifiers;
     VirtioBusState bus;
@@ -172,6 +183,11 @@ static inline void virtio_pci_force_virtio_1(VirtIOPCIProxy *proxy)
     proxy->disable_legacy = ON_OFF_AUTO_ON;
 }
 
+static inline void virtio_pci_disable_modern(VirtIOPCIProxy *proxy)
+{
+    proxy->disable_modern = true;
+}
+
 /*
  * virtio-scsi-pci: This extends VirtioPCIProxy.
  */
@@ -324,6 +340,32 @@ struct VirtIOGPUPCI {
     VirtIOGPU vdev;
 };
 
+#ifdef CONFIG_VHOST_VSOCK
+/*
+ * vhost-vsock-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_VSOCK_PCI "vhost-vsock-pci"
+#define VHOST_VSOCK_PCI(obj) \
+        OBJECT_CHECK(VHostVSockPCI, (obj), TYPE_VHOST_VSOCK_PCI)
+
+struct VHostVSockPCI {
+    VirtIOPCIProxy parent_obj;
+    VHostVSock vdev;
+};
+#endif
+
+/*
+ * virtio-crypto-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_CRYPTO_PCI "virtio-crypto-pci"
+#define VIRTIO_CRYPTO_PCI(obj) \
+        OBJECT_CHECK(VirtIOCryptoPCI, (obj), TYPE_VIRTIO_CRYPTO_PCI)
+
+struct VirtIOCryptoPCI {
+    VirtIOPCIProxy parent_obj;
+    VirtIOCrypto vdev;
+};
+
 /* Virtio ABI version, if we increment this, we break the guest driver. */
 #define VIRTIO_PCI_ABI_VERSION          0
 
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index cd8ca10177..9639f4e89b 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -120,15 +120,9 @@ static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp)
     return f;
 }
 
-static int virtio_rng_load(QEMUFile *f, void *opaque, size_t size)
+static int virtio_rng_post_load(void *opaque, int version_id)
 {
     VirtIORNG *vrng = opaque;
-    int ret;
-
-    ret = virtio_load(VIRTIO_DEVICE(vrng), f, 1);
-    if (ret != 0) {
-        return ret;
-    }
 
     /* We may have an element ready but couldn't process it due to a quota
      * limit.  Make sure to try again after live migration when the quota may
@@ -216,7 +210,16 @@ static void virtio_rng_device_unrealize(DeviceState *dev, Error **errp)
     virtio_cleanup(vdev);
 }
 
-VMSTATE_VIRTIO_DEVICE(rng, 1, virtio_rng_load, virtio_vmstate_save);
+static const VMStateDescription vmstate_virtio_rng = {
+    .name = "virtio-rng",
+    .minimum_version_id = 1,
+    .version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VIRTIO_DEVICE,
+        VMSTATE_END_OF_LIST()
+    },
+    .post_load =  virtio_rng_post_load,
+};
 
 static Property virtio_rng_properties[] = {
     /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s.  If
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 74c085c74d..1af2de2714 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -97,7 +97,6 @@ struct VirtQueue
     uint16_t vector;
     VirtIOHandleOutput handle_output;
     VirtIOHandleOutput handle_aio_output;
-    bool use_aio;
     VirtIODevice *vdev;
     EventNotifier guest_notifier;
     EventNotifier host_notifier;
@@ -264,14 +263,59 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                                   0, elem->out_sg[i].iov_len);
 }
 
-void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
-                       unsigned int len)
+/* virtqueue_detach_element:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Detach the element from the virtqueue.  This function is suitable for device
+ * reset or other situations where a #VirtQueueElement is simply freed and will
+ * not be pushed or discarded.
+ */
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len)
 {
-    vq->last_avail_idx--;
     vq->inuse--;
     virtqueue_unmap_sg(vq, elem, len);
 }
 
+/* virtqueue_unpop:
+ * @vq: The #VirtQueue
+ * @elem: The #VirtQueueElement
+ * @len: number of bytes written
+ *
+ * Pretend the most recent element wasn't popped from the virtqueue.  The next
+ * call to virtqueue_pop() will refetch the element.
+ */
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len)
+{
+    vq->last_avail_idx--;
+    virtqueue_detach_element(vq, elem, len);
+}
+
+/* virtqueue_rewind:
+ * @vq: The #VirtQueue
+ * @num: Number of elements to push back
+ *
+ * Pretend that elements weren't popped from the virtqueue.  The next
+ * virtqueue_pop() will refetch the oldest element.
+ *
+ * Use virtqueue_unpop() instead if you have a VirtQueueElement.
+ *
+ * Returns: true on success, false if @num is greater than the number of in use
+ * elements.
+ */
+bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
+{
+    if (num > vq->inuse) {
+        return false;
+    }
+    vq->last_avail_idx -= num;
+    vq->inuse -= num;
+    return true;
+}
+
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx)
 {
@@ -281,6 +325,10 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 
     virtqueue_unmap_sg(vq, elem, len);
 
+    if (unlikely(vq->vdev->broken)) {
+        return;
+    }
+
     idx = (idx + vq->used_idx) % vq->vring.num;
 
     uelem.id = elem->index;
@@ -291,6 +339,12 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_flush(VirtQueue *vq, unsigned int count)
 {
     uint16_t old, new;
+
+    if (unlikely(vq->vdev->broken)) {
+        vq->inuse -= count;
+        return;
+    }
+
     /* Make sure buffer is written before we update index. */
     smp_wmb();
     trace_virtqueue_flush(vq, count);
@@ -315,9 +369,9 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
 
     /* Check it isn't doing very strange things with descriptor numbers. */
     if (num_heads > vq->vring.num) {
-        error_report("Guest moved used index from %u to %u",
+        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
                      idx, vq->shadow_avail_idx);
-        exit(1);
+        return -EINVAL;
     }
     /* On success, callers read a descriptor at vq->last_avail_idx.
      * Make sure descriptor read does not bypass avail index read. */
@@ -328,45 +382,49 @@ static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
     return num_heads;
 }
 
-static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
+static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
+                               unsigned int *head)
 {
-    unsigned int head;
-
     /* Grab the next descriptor number they're advertising, and increment
      * the index we've seen. */
-    head = vring_avail_ring(vq, idx % vq->vring.num);
+    *head = vring_avail_ring(vq, idx % vq->vring.num);
 
     /* If their number is silly, that's a fatal mistake. */
-    if (head >= vq->vring.num) {
-        error_report("Guest says index %u is available", head);
-        exit(1);
+    if (*head >= vq->vring.num) {
+        virtio_error(vq->vdev, "Guest says index %u is available", *head);
+        return false;
     }
 
-    return head;
+    return true;
 }
 
-static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
-                                         hwaddr desc_pa, unsigned int max)
-{
-    unsigned int next;
+enum {
+    VIRTQUEUE_READ_DESC_ERROR = -1,
+    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
+    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
+};
 
+static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
+                                    hwaddr desc_pa, unsigned int max,
+                                    unsigned int *next)
+{
     /* If this descriptor says it doesn't chain, we're done. */
     if (!(desc->flags & VRING_DESC_F_NEXT)) {
-        return max;
+        return VIRTQUEUE_READ_DESC_DONE;
     }
 
     /* Check they're not leading us off end of descriptors. */
-    next = desc->next;
+    *next = desc->next;
     /* Make sure compiler knows to grab that: we don't want it changing! */
     smp_wmb();
 
-    if (next >= max) {
-        error_report("Desc next is %u", next);
-        exit(1);
+    if (*next >= max) {
+        virtio_error(vdev, "Desc next is %u", *next);
+        return VIRTQUEUE_READ_DESC_ERROR;
     }
 
-    vring_desc_read(vdev, desc, desc_pa, next);
-    return next;
+    vring_desc_read(vdev, desc, desc_pa, *next);
+    return VIRTQUEUE_READ_DESC_MORE;
 }
 
 void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
@@ -375,33 +433,38 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
 {
     unsigned int idx;
     unsigned int total_bufs, in_total, out_total;
+    int rc;
 
     idx = vq->last_avail_idx;
 
     total_bufs = in_total = out_total = 0;
-    while (virtqueue_num_heads(vq, idx)) {
+    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
         VirtIODevice *vdev = vq->vdev;
         unsigned int max, num_bufs, indirect = 0;
         VRingDesc desc;
         hwaddr desc_pa;
-        int i;
+        unsigned int i;
 
         max = vq->vring.num;
         num_bufs = total_bufs;
-        i = virtqueue_get_head(vq, idx++);
+
+        if (!virtqueue_get_head(vq, idx++, &i)) {
+            goto err;
+        }
+
         desc_pa = vq->vring.desc;
         vring_desc_read(vdev, &desc, desc_pa, i);
 
         if (desc.flags & VRING_DESC_F_INDIRECT) {
             if (desc.len % sizeof(VRingDesc)) {
-                error_report("Invalid size for indirect buffer table");
-                exit(1);
+                virtio_error(vdev, "Invalid size for indirect buffer table");
+                goto err;
             }
 
             /* If we've got too many, that implies a descriptor loop. */
             if (num_bufs >= max) {
-                error_report("Looped descriptor");
-                exit(1);
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
             }
 
             /* loop over the indirect descriptor table */
@@ -415,8 +478,8 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
         do {
             /* If we've got too many, that implies a descriptor loop. */
             if (++num_bufs > max) {
-                error_report("Looped descriptor");
-                exit(1);
+                virtio_error(vdev, "Looped descriptor");
+                goto err;
             }
 
             if (desc.flags & VRING_DESC_F_WRITE) {
@@ -427,13 +490,24 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
             if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                 goto done;
             }
-        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+            rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+        } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+            goto err;
+        }
 
         if (!indirect)
             total_bufs = num_bufs;
         else
             total_bufs++;
     }
+
+    if (rc < 0) {
+        goto err;
+    }
+
 done:
     if (in_bytes) {
         *in_bytes = in_total;
@@ -441,6 +515,11 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
     if (out_bytes) {
         *out_bytes = out_total;
     }
+    return;
+
+err:
+    in_total = out_total = 0;
+    goto done;
 }
 
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
@@ -452,27 +531,35 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
     return in_bytes <= in_total && out_bytes <= out_total;
 }
 
-static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
+static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
+                               hwaddr *addr, struct iovec *iov,
                                unsigned int max_num_sg, bool is_write,
                                hwaddr pa, size_t sz)
 {
+    bool ok = false;
     unsigned num_sg = *p_num_sg;
     assert(num_sg <= max_num_sg);
 
     if (!sz) {
-        error_report("virtio: zero sized buffers are not allowed");
-        exit(1);
+        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
+        goto out;
     }
 
     while (sz) {
         hwaddr len = sz;
 
         if (num_sg == max_num_sg) {
-            error_report("virtio: too many write descriptors in indirect table");
-            exit(1);
+            virtio_error(vdev, "virtio: too many write descriptors in "
+                               "indirect table");
+            goto out;
         }
 
         iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+        if (!iov[num_sg].iov_base) {
+            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
+            goto out;
+        }
+
         iov[num_sg].iov_len = len;
         addr[num_sg] = pa;
 
@@ -480,7 +567,28 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
         pa += len;
         num_sg++;
     }
+    ok = true;
+
+out:
     *p_num_sg = num_sg;
+    return ok;
+}
+
+/* Only used by error code paths before we have a VirtQueueElement (therefore
+ * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
+ * yet.
+ */
+static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
+                                    struct iovec *iov)
+{
+    unsigned int i;
+
+    for (i = 0; i < out_num + in_num; i++) {
+        int is_write = i >= out_num;
+
+        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
+        iov++;
+    }
 }
 
 static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
@@ -524,7 +632,7 @@ void virtqueue_map(VirtQueueElement *elem)
                         VIRTQUEUE_MAX_SIZE, 0);
 }
 
-void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
+static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
 {
     VirtQueueElement *elem;
     size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
@@ -555,7 +663,11 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
     hwaddr addr[VIRTQUEUE_MAX_SIZE];
     struct iovec iov[VIRTQUEUE_MAX_SIZE];
     VRingDesc desc;
+    int rc;
 
+    if (unlikely(vdev->broken)) {
+        return NULL;
+    }
     if (virtio_queue_empty(vq)) {
         return NULL;
     }
@@ -569,20 +681,24 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
     max = vq->vring.num;
 
     if (vq->inuse >= vq->vring.num) {
-        error_report("Virtqueue size exceeded");
-        exit(1);
+        virtio_error(vdev, "Virtqueue size exceeded");
+        return NULL;
+    }
+
+    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
+        return NULL;
     }
 
-    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
     if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
         vring_set_avail_event(vq, vq->last_avail_idx);
     }
 
+    i = head;
     vring_desc_read(vdev, &desc, desc_pa, i);
     if (desc.flags & VRING_DESC_F_INDIRECT) {
         if (desc.len % sizeof(VRingDesc)) {
-            error_report("Invalid size for indirect buffer table");
-            exit(1);
+            virtio_error(vdev, "Invalid size for indirect buffer table");
+            return NULL;
         }
 
         /* loop over the indirect descriptor table */
@@ -594,24 +710,38 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 
     /* Collect all the descriptors */
     do {
+        bool map_ok;
+
         if (desc.flags & VRING_DESC_F_WRITE) {
-            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
-                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
+            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
+                                        iov + out_num,
+                                        VIRTQUEUE_MAX_SIZE - out_num, true,
+                                        desc.addr, desc.len);
         } else {
             if (in_num) {
-                error_report("Incorrect order for descriptors");
-                exit(1);
+                virtio_error(vdev, "Incorrect order for descriptors");
+                goto err_undo_map;
             }
-            virtqueue_map_desc(&out_num, addr, iov,
-                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
+            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
+                                        VIRTQUEUE_MAX_SIZE, false,
+                                        desc.addr, desc.len);
+        }
+        if (!map_ok) {
+            goto err_undo_map;
         }
 
         /* If we've got too many, that implies a descriptor loop. */
         if ((in_num + out_num) > max) {
-            error_report("Looped descriptor");
-            exit(1);
+            virtio_error(vdev, "Looped descriptor");
+            goto err_undo_map;
         }
-    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
+
+        rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
+    } while (rc == VIRTQUEUE_READ_DESC_MORE);
+
+    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
+        goto err_undo_map;
+    }
 
     /* Now copy what we have collected and mapped */
     elem = virtqueue_alloc_element(sz, out_num, in_num);
@@ -629,6 +759,10 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 
     trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
     return elem;
+
+err_undo_map:
+    virtqueue_undo_map_desc(out_num, in_num, iov);
+    return NULL;
 }
 
 /* Reading and writing a structure directly to QEMUFile is *awful*, but
@@ -720,6 +854,10 @@ static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
     BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
+    if (unlikely(vdev->broken)) {
+        return;
+    }
+
     if (k->notify) {
         k->notify(qbus->parent, vector);
     }
@@ -803,10 +941,11 @@ void virtio_reset(void *opaque)
         k->reset(vdev);
     }
 
+    vdev->broken = false;
     vdev->guest_features = 0;
     vdev->queue_sel = 0;
     vdev->status = 0;
-    vdev->isr = 0;
+    atomic_set(&vdev->isr, 0);
     vdev->config_vector = VIRTIO_NO_VECTOR;
     virtio_notify_vector(vdev, vdev->config_vector);
 
@@ -822,6 +961,7 @@ void virtio_reset(void *opaque)
         vdev->vq[i].signalled_used_valid = false;
         vdev->vq[i].notification = true;
         vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
+        vdev->vq[i].inuse = 0;
     }
 }
 
@@ -1109,6 +1249,10 @@ static void virtio_queue_notify_vq(VirtQueue *vq)
     if (vq->vring.desc && vq->handle_output) {
         VirtIODevice *vdev = vq->vdev;
 
+        if (unlikely(vdev->broken)) {
+            return;
+        }
+
         trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
         vq->handle_output(vdev, vq);
     }
@@ -1142,9 +1286,8 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
     }
 }
 
-static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
-                                            VirtIOHandleOutput handle_output,
-                                            bool use_aio)
+VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
+                            VirtIOHandleOutput handle_output)
 {
     int i;
 
@@ -1161,28 +1304,10 @@ static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
     vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
     vdev->vq[i].handle_output = handle_output;
     vdev->vq[i].handle_aio_output = NULL;
-    vdev->vq[i].use_aio = use_aio;
 
     return &vdev->vq[i];
 }
 
-/* Add a virt queue and mark AIO.
- * An AIO queue will use the AioContext based event interface instead of the
- * default IOHandler and EventNotifier interface.
- */
-VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
-                                VirtIOHandleOutput handle_output)
-{
-    return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
-}
-
-/* Add a normal virt queue (on the contrary to the AIO version above. */
-VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
-                            VirtIOHandleOutput handle_output)
-{
-    return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
-}
-
 void virtio_del_queue(VirtIODevice *vdev, int n)
 {
     if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
@@ -1193,11 +1318,16 @@ void virtio_del_queue(VirtIODevice *vdev, int n)
     vdev->vq[n].vring.num_default = 0;
 }
 
-void virtio_irq(VirtQueue *vq)
+static void virtio_set_isr(VirtIODevice *vdev, int value)
 {
-    trace_virtio_irq(vq);
-    vq->vdev->isr |= 0x01;
-    virtio_notify_vector(vq->vdev, vq->vector);
+    uint8_t old = atomic_read(&vdev->isr);
+
+    /* Do not write ISR if it does not change, so that its cacheline remains
+     * shared in the common case where the guest does not read it.
+     */
+    if ((old & value) != value) {
+        atomic_or(&vdev->isr, value);
+    }
 }
 
 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
@@ -1223,6 +1353,33 @@ bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
     return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
+{
+    if (!virtio_should_notify(vdev, vq)) {
+        return;
+    }
+
+    trace_virtio_notify_irqfd(vdev, vq);
+
+    /*
+     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
+     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
+     * incorrectly polling this bit during crashdump and hibernation
+     * in MSI mode, causing a hang if this bit is never updated.
+     * Recent releases of Windows do not really shut down, but rather
+     * log out and hibernate to make the next startup faster.  Hence,
+     * this manifested as a more serious hang during shutdown with
+     *
+     * Next driver release from 2016 fixed this problem, so working around it
+     * is not a must, but it's easy to do so let's do it here.
+     *
+     * Note: it's safe to update ISR from any thread as it was switched
+     * to an atomic operation.
+     */
+    virtio_set_isr(vq->vdev, 0x1);
+    event_notifier_set(&vq->guest_notifier);
+}
+
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
     if (!virtio_should_notify(vdev, vq)) {
@@ -1230,7 +1387,7 @@ void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
     }
 
     trace_virtio_notify(vdev, vq);
-    vdev->isr |= 0x01;
+    virtio_set_isr(vq->vdev, 0x1);
     virtio_notify_vector(vdev, vq->vector);
 }
 
@@ -1239,7 +1396,7 @@ void virtio_notify_config(VirtIODevice *vdev)
     if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
         return;
 
-    vdev->isr |= 0x03;
+    virtio_set_isr(vdev, 0x3);
     vdev->generation++;
     virtio_notify_vector(vdev, vdev->config_vector);
 }
@@ -1293,6 +1450,13 @@ static bool virtio_extra_state_needed(void *opaque)
         k->has_extra_state(qbus->parent);
 }
 
+static bool virtio_broken_needed(void *opaque)
+{
+    VirtIODevice *vdev = opaque;
+
+    return vdev->broken;
+}
+
 static const VMStateDescription vmstate_virtqueue = {
     .name = "virtqueue_state",
     .version_id = 1,
@@ -1407,6 +1571,17 @@ static const VMStateDescription vmstate_virtio_64bit_features = {
     }
 };
 
+static const VMStateDescription vmstate_virtio_broken = {
+    .name = "virtio/broken",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = &virtio_broken_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(broken, VirtIODevice),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_virtio = {
     .name = "virtio",
     .version_id = 1,
@@ -1420,6 +1595,7 @@ static const VMStateDescription vmstate_virtio = {
         &vmstate_virtio_64bit_features,
         &vmstate_virtio_virtqueues,
         &vmstate_virtio_ringsize,
+        &vmstate_virtio_broken,
         &vmstate_virtio_extra_state,
         NULL
     }
@@ -1471,16 +1647,35 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f)
         vdc->save(vdev, f);
     }
 
+    if (vdc->vmsd) {
+        vmstate_save_state(f, vdc->vmsd, vdev, NULL);
+    }
+
     /* Subsections */
     vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
 }
 
 /* A wrapper for use as a VMState .put function */
-void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size)
+static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
 {
     virtio_save(VIRTIO_DEVICE(opaque), f);
 }
 
+/* A wrapper for use as a VMState .get function */
+static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
+
+    return virtio_load(vdev, f, dc->vmsd->version_id);
+}
+
+const VMStateInfo  virtio_vmstate_info = {
+    .name = "virtio",
+    .get = virtio_device_get,
+    .put = virtio_device_put,
+};
+
 static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
 {
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
@@ -1585,7 +1780,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
                          "inconsistent with Host index 0x%x",
                          i, vdev->vq[i].last_avail_idx);
                 return -1;
-	}
+        }
         if (k->load_queue) {
             ret = k->load_queue(qbus->parent, i, f);
             if (ret)
@@ -1602,6 +1797,13 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
         }
     }
 
+    if (vdc->vmsd) {
+        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
+        if (ret) {
+            return ret;
+        }
+    }
+
     /* Subsections */
     ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
     if (ret) {
@@ -1725,11 +1927,12 @@ void virtio_init(VirtIODevice *vdev, const char *name,
 
     vdev->device_id = device_id;
     vdev->status = 0;
-    vdev->isr = 0;
+    atomic_set(&vdev->isr, 0);
     vdev->queue_sel = 0;
     vdev->config_vector = VIRTIO_NO_VECTOR;
     vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
     vdev->vm_running = runstate_is_running();
+    vdev->broken = false;
     for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
         vdev->vq[i].vector = VIRTIO_NO_VECTOR;
         vdev->vq[i].vdev = vdev;
@@ -1764,11 +1967,6 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
     return vdev->vq[n].vring.used;
 }
 
-hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
-{
-    return vdev->vq[n].vring.desc;
-}
-
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
 {
     return sizeof(VRingDesc) * vdev->vq[n].vring.num;
@@ -1786,12 +1984,6 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
         sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
 }
 
-hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
-{
-    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
-	    virtio_queue_get_used_size(vdev, n);
-}
-
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
     return vdev->vq[n].last_avail_idx;
@@ -1822,7 +2014,7 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
 {
     VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
     if (event_notifier_test_and_clear(n)) {
-        virtio_irq(vq);
+        virtio_notify_vector(vq->vdev, vq->vector);
     }
 }
 
@@ -1871,7 +2063,7 @@ void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
     }
 }
 
-static void virtio_queue_host_notifier_read(EventNotifier *n)
+void virtio_queue_host_notifier_read(EventNotifier *n)
 {
     VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
     if (event_notifier_test_and_clear(n)) {
@@ -1879,32 +2071,6 @@ static void virtio_queue_host_notifier_read(EventNotifier *n)
     }
 }
 
-void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
-                                               bool set_handler)
-{
-    AioContext *ctx = qemu_get_aio_context();
-    if (assign && set_handler) {
-        if (vq->use_aio) {
-            aio_set_event_notifier(ctx, &vq->host_notifier, true,
-                                   virtio_queue_host_notifier_read);
-        } else {
-            event_notifier_set_handler(&vq->host_notifier, true,
-                                       virtio_queue_host_notifier_read);
-        }
-    } else {
-        if (vq->use_aio) {
-            aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
-        } else {
-            event_notifier_set_handler(&vq->host_notifier, true, NULL);
-        }
-    }
-    if (!assign) {
-        /* Test and clear notifier before after disabling event,
-         * in case poll callback didn't have time to run. */
-        virtio_queue_host_notifier_read(&vq->host_notifier);
-    }
-}
-
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
 {
     return &vq->host_notifier;
@@ -1916,12 +2082,31 @@ void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
     vdev->bus_name = g_strdup(bus_name);
 }
 
+void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+
+    vdev->broken = true;
+
+    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
+        virtio_notify_config(vdev);
+    }
+}
+
 static void virtio_device_realize(DeviceState *dev, Error **errp)
 {
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
     Error *err = NULL;
 
+    /* Devices should either use vmsd or the load/save methods */
+    assert(!vdc->vmsd || !vdc->load);
+
     if (vdc->realize != NULL) {
         vdc->realize(dev, &err);
         if (err != NULL) {
@@ -1962,15 +2147,120 @@ static Property virtio_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
+{
+    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int n, r, err;
+
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        VirtQueue *vq = &vdev->vq[n];
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+        r = virtio_bus_set_host_notifier(qbus, n, true);
+        if (r < 0) {
+            err = r;
+            goto assign_error;
+        }
+        event_notifier_set_handler(&vq->host_notifier, true,
+                                   virtio_queue_host_notifier_read);
+    }
+
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        /* Kick right away to begin processing requests already in vring */
+        VirtQueue *vq = &vdev->vq[n];
+        if (!vq->vring.num) {
+            continue;
+        }
+        event_notifier_set(&vq->host_notifier);
+    }
+    return 0;
+
+assign_error:
+    while (--n >= 0) {
+        VirtQueue *vq = &vdev->vq[n];
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+
+        event_notifier_set_handler(&vq->host_notifier, true, NULL);
+        r = virtio_bus_set_host_notifier(qbus, n, false);
+        assert(r >= 0);
+    }
+    return err;
+}
+
+int virtio_device_start_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_start_ioeventfd(vbus);
+}
+
+static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
+{
+    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+    int n, r;
+
+    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+        VirtQueue *vq = &vdev->vq[n];
+
+        if (!virtio_queue_get_num(vdev, n)) {
+            continue;
+        }
+        event_notifier_set_handler(&vq->host_notifier, true, NULL);
+        r = virtio_bus_set_host_notifier(qbus, n, false);
+        assert(r >= 0);
+    }
+}
+
+void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    virtio_bus_stop_ioeventfd(vbus);
+}
+
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_grab_ioeventfd(vbus);
+}
+
+void virtio_device_release_ioeventfd(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    virtio_bus_release_ioeventfd(vbus);
+}
+
 static void virtio_device_class_init(ObjectClass *klass, void *data)
 {
     /* Set the default value here. */
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = virtio_device_realize;
     dc->unrealize = virtio_device_unrealize;
     dc->bus_type = TYPE_VIRTIO_BUS;
     dc->props = virtio_properties;
+    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
+    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
+
+    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
+}
+
+bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusState *vbus = VIRTIO_BUS(qbus);
+
+    return virtio_bus_ioeventfd_enabled(vbus);
 }
 
 static const TypeInfo virtio_device_info = {
diff --git a/hw/xen/Makefile.objs b/hw/xen/Makefile.objs
index d3670940b7..591cdc229d 100644
--- a/hw/xen/Makefile.objs
+++ b/hw/xen/Makefile.objs
@@ -1,5 +1,5 @@
 # xen backend driver support
-common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o
+common-obj-$(CONFIG_XEN_BACKEND) += xen_backend.o xen_devconfig.o xen_pvdev.o
 
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen-host-pci-device.o
 obj-$(CONFIG_XEN_PCI_PASSTHROUGH) += xen_pt.o xen_pt_config_init.o xen_pt_graphics.o xen_pt_msi.o
diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index 69a238817e..d1190041ae 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c
@@ -27,15 +27,18 @@
 
 #include "hw/hw.h"
 #include "hw/sysbus.h"
+#include "hw/boards.h"
 #include "sysemu/char.h"
 #include "qemu/log.h"
+#include "qapi/error.h"
 #include "hw/xen/xen_backend.h"
+#include "hw/xen/xen_pvdev.h"
+#include "monitor/qdev.h"
 
 #include <xen/grant_table.h>
 
-#define TYPE_XENSYSDEV "xensysdev"
-
 DeviceState *xen_sysdev;
+BusState *xen_sysbus;
 
 /* ------------------------------------------------------------- */
 
@@ -46,129 +49,7 @@ struct xs_handle *xenstore = NULL;
 const char *xen_protocol;
 
 /* private */
-struct xs_dirs {
-    char *xs_dir;
-    QTAILQ_ENTRY(xs_dirs) list;
-};
-static QTAILQ_HEAD(xs_dirs_head, xs_dirs) xs_cleanup =
-    QTAILQ_HEAD_INITIALIZER(xs_cleanup);
-
-static QTAILQ_HEAD(XenDeviceHead, XenDevice) xendevs = QTAILQ_HEAD_INITIALIZER(xendevs);
-static int debug = 0;
-
-/* ------------------------------------------------------------- */
-
-static void xenstore_cleanup_dir(char *dir)
-{
-    struct xs_dirs *d;
-
-    d = g_malloc(sizeof(*d));
-    d->xs_dir = dir;
-    QTAILQ_INSERT_TAIL(&xs_cleanup, d, list);
-}
-
-void xen_config_cleanup(void)
-{
-    struct xs_dirs *d;
-
-    QTAILQ_FOREACH(d, &xs_cleanup, list) {
-        xs_rm(xenstore, 0, d->xs_dir);
-    }
-}
-
-int xenstore_write_str(const char *base, const char *node, const char *val)
-{
-    char abspath[XEN_BUFSIZE];
-
-    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
-    if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
-        return -1;
-    }
-    return 0;
-}
-
-char *xenstore_read_str(const char *base, const char *node)
-{
-    char abspath[XEN_BUFSIZE];
-    unsigned int len;
-    char *str, *ret = NULL;
-
-    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
-    str = xs_read(xenstore, 0, abspath, &len);
-    if (str != NULL) {
-        /* move to qemu-allocated memory to make sure
-         * callers can savely g_free() stuff. */
-        ret = g_strdup(str);
-        free(str);
-    }
-    return ret;
-}
-
-int xenstore_mkdir(char *path, int p)
-{
-    struct xs_permissions perms[2] = {
-        {
-            .id    = 0, /* set owner: dom0 */
-        }, {
-            .id    = xen_domid,
-            .perms = p,
-        }
-    };
-
-    if (!xs_mkdir(xenstore, 0, path)) {
-        xen_be_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
-        return -1;
-    }
-    xenstore_cleanup_dir(g_strdup(path));
-
-    if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
-        xen_be_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
-        return -1;
-    }
-    return 0;
-}
-
-int xenstore_write_int(const char *base, const char *node, int ival)
-{
-    char val[12];
-
-    snprintf(val, sizeof(val), "%d", ival);
-    return xenstore_write_str(base, node, val);
-}
-
-int xenstore_write_int64(const char *base, const char *node, int64_t ival)
-{
-    char val[21];
-
-    snprintf(val, sizeof(val), "%"PRId64, ival);
-    return xenstore_write_str(base, node, val);
-}
-
-int xenstore_read_int(const char *base, const char *node, int *ival)
-{
-    char *val;
-    int rc = -1;
-
-    val = xenstore_read_str(base, node);
-    if (val && 1 == sscanf(val, "%d", ival)) {
-        rc = 0;
-    }
-    g_free(val);
-    return rc;
-}
-
-int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
-{
-    char *val;
-    int rc = -1;
-
-    val = xenstore_read_str(base, node);
-    if (val && 1 == sscanf(val, "%"SCNu64, uval)) {
-        rc = 0;
-    }
-    g_free(val);
-    return rc;
-}
+static int debug;
 
 int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val)
 {
@@ -205,27 +86,14 @@ int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival)
     return xenstore_read_int(xendev->fe, node, ival);
 }
 
-int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node, uint64_t *uval)
+int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node,
+                            uint64_t *uval)
 {
     return xenstore_read_uint64(xendev->fe, node, uval);
 }
 
 /* ------------------------------------------------------------- */
 
-const char *xenbus_strstate(enum xenbus_state state)
-{
-    static const char *const name[] = {
-        [ XenbusStateUnknown      ] = "Unknown",
-        [ XenbusStateInitialising ] = "Initialising",
-        [ XenbusStateInitWait     ] = "InitWait",
-        [ XenbusStateInitialised  ] = "Initialised",
-        [ XenbusStateConnected    ] = "Connected",
-        [ XenbusStateClosing      ] = "Closing",
-        [ XenbusStateClosed       ] = "Closed",
-    };
-    return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
-}
-
 int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state)
 {
     int rc;
@@ -234,33 +102,12 @@ int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state)
     if (rc < 0) {
         return rc;
     }
-    xen_be_printf(xendev, 1, "backend state: %s -> %s\n",
+    xen_pv_printf(xendev, 1, "backend state: %s -> %s\n",
                   xenbus_strstate(xendev->be_state), xenbus_strstate(state));
     xendev->be_state = state;
     return 0;
 }
 
-/* ------------------------------------------------------------- */
-
-struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev)
-{
-    struct XenDevice *xendev;
-
-    QTAILQ_FOREACH(xendev, &xendevs, next) {
-        if (xendev->dom != dom) {
-            continue;
-        }
-        if (xendev->dev != dev) {
-            continue;
-        }
-        if (strcmp(xendev->type, type) != 0) {
-            continue;
-        }
-        return xendev;
-    }
-    return NULL;
-}
-
 /*
  * get xen backend device, allocate a new one if it doesn't exist.
  */
@@ -269,13 +116,19 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
 {
     struct XenDevice *xendev;
 
-    xendev = xen_be_find_xendev(type, dom, dev);
+    xendev = xen_pv_find_xendev(type, dom, dev);
     if (xendev) {
         return xendev;
     }
 
     /* init new xendev */
     xendev = g_malloc0(ops->size);
+    object_initialize(&xendev->qdev, ops->size, TYPE_XENBACKEND);
+    qdev_set_parent_bus(&xendev->qdev, xen_sysbus);
+    qdev_set_id(&xendev->qdev, g_strdup_printf("xen-%s-%d", type, dev));
+    qdev_init_nofail(&xendev->qdev);
+    object_unref(OBJECT(&xendev->qdev));
+
     xendev->type  = type;
     xendev->dom   = dom;
     xendev->dev   = dev;
@@ -291,7 +144,7 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
 
     xendev->evtchndev = xenevtchn_open(NULL, 0);
     if (xendev->evtchndev == NULL) {
-        xen_be_printf(NULL, 0, "can't open evtchn device\n");
+        xen_pv_printf(NULL, 0, "can't open evtchn device\n");
         g_free(xendev);
         return NULL;
     }
@@ -300,7 +153,7 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
     if (ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
         xendev->gnttabdev = xengnttab_open(NULL, 0);
         if (xendev->gnttabdev == NULL) {
-            xen_be_printf(NULL, 0, "can't open gnttab device\n");
+            xen_pv_printf(NULL, 0, "can't open gnttab device\n");
             xenevtchn_close(xendev->evtchndev);
             g_free(xendev);
             return NULL;
@@ -309,7 +162,7 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
         xendev->gnttabdev = NULL;
     }
 
-    QTAILQ_INSERT_TAIL(&xendevs, xendev, next);
+    xen_pv_insert_xendev(xendev);
 
     if (xendev->ops->alloc) {
         xendev->ops->alloc(xendev);
@@ -318,32 +171,6 @@ static struct XenDevice *xen_be_get_xendev(const char *type, int dom, int dev,
     return xendev;
 }
 
-/*
- * release xen backend device.
- */
-static void xen_be_del_xendev(struct XenDevice *xendev)
-{
-    if (xendev->ops->free) {
-        xendev->ops->free(xendev);
-    }
-
-    if (xendev->fe) {
-        char token[XEN_BUFSIZE];
-        snprintf(token, sizeof(token), "fe:%p", xendev);
-        xs_unwatch(xenstore, xendev->fe, token);
-        g_free(xendev->fe);
-    }
-
-    if (xendev->evtchndev != NULL) {
-        xenevtchn_close(xendev->evtchndev);
-    }
-    if (xendev->gnttabdev != NULL) {
-        xengnttab_close(xendev->gnttabdev);
-    }
-
-    QTAILQ_REMOVE(&xendevs, xendev, next);
-    g_free(xendev);
-}
 
 /*
  * Sync internal data structures on xenstore updates.
@@ -359,7 +186,7 @@ static void xen_be_backend_changed(struct XenDevice *xendev, const char *node)
     }
 
     if (node) {
-        xen_be_printf(xendev, 2, "backend update: %s\n", node);
+        xen_pv_printf(xendev, 2, "backend update: %s\n", node);
         if (xendev->ops->backend_changed) {
             xendev->ops->backend_changed(xendev, node);
         }
@@ -375,7 +202,7 @@ static void xen_be_frontend_changed(struct XenDevice *xendev, const char *node)
             fe_state = XenbusStateUnknown;
         }
         if (xendev->fe_state != fe_state) {
-            xen_be_printf(xendev, 1, "frontend state: %s -> %s\n",
+            xen_pv_printf(xendev, 1, "frontend state: %s -> %s\n",
                           xenbus_strstate(xendev->fe_state),
                           xenbus_strstate(fe_state));
         }
@@ -385,12 +212,13 @@ static void xen_be_frontend_changed(struct XenDevice *xendev, const char *node)
         g_free(xendev->protocol);
         xendev->protocol = xenstore_read_fe_str(xendev, "protocol");
         if (xendev->protocol) {
-            xen_be_printf(xendev, 1, "frontend protocol: %s\n", xendev->protocol);
+            xen_pv_printf(xendev, 1, "frontend protocol: %s\n",
+                          xendev->protocol);
         }
     }
 
     if (node) {
-        xen_be_printf(xendev, 2, "frontend update: %s\n", node);
+        xen_pv_printf(xendev, 2, "frontend update: %s\n", node);
         if (xendev->ops->frontend_changed) {
             xendev->ops->frontend_changed(xendev, node);
         }
@@ -414,26 +242,26 @@ static int xen_be_try_setup(struct XenDevice *xendev)
     int be_state;
 
     if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
-        xen_be_printf(xendev, 0, "reading backend state failed\n");
+        xen_pv_printf(xendev, 0, "reading backend state failed\n");
         return -1;
     }
 
     if (be_state != XenbusStateInitialising) {
-        xen_be_printf(xendev, 0, "initial backend state is wrong (%s)\n",
+        xen_pv_printf(xendev, 0, "initial backend state is wrong (%s)\n",
                       xenbus_strstate(be_state));
         return -1;
     }
 
     xendev->fe = xenstore_read_be_str(xendev, "frontend");
     if (xendev->fe == NULL) {
-        xen_be_printf(xendev, 0, "reading frontend path failed\n");
+        xen_pv_printf(xendev, 0, "reading frontend path failed\n");
         return -1;
     }
 
     /* setup frontend watch */
     snprintf(token, sizeof(token), "fe:%p", xendev);
     if (!xs_watch(xenstore, xendev->fe, token)) {
-        xen_be_printf(xendev, 0, "watching frontend path (%s) failed\n",
+        xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n",
                       xendev->fe);
         return -1;
     }
@@ -457,7 +285,7 @@ static int xen_be_try_init(struct XenDevice *xendev)
     int rc = 0;
 
     if (!xendev->online) {
-        xen_be_printf(xendev, 1, "not online\n");
+        xen_pv_printf(xendev, 1, "not online\n");
         return -1;
     }
 
@@ -465,7 +293,7 @@ static int xen_be_try_init(struct XenDevice *xendev)
         rc = xendev->ops->init(xendev);
     }
     if (rc != 0) {
-        xen_be_printf(xendev, 1, "init() failed\n");
+        xen_pv_printf(xendev, 1, "init() failed\n");
         return rc;
     }
 
@@ -488,9 +316,9 @@ static int xen_be_try_initialise(struct XenDevice *xendev)
     if (xendev->fe_state != XenbusStateInitialised  &&
         xendev->fe_state != XenbusStateConnected) {
         if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) {
-            xen_be_printf(xendev, 2, "frontend not ready, ignoring\n");
+            xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n");
         } else {
-            xen_be_printf(xendev, 2, "frontend not ready (yet)\n");
+            xen_pv_printf(xendev, 2, "frontend not ready (yet)\n");
             return -1;
         }
     }
@@ -499,7 +327,7 @@ static int xen_be_try_initialise(struct XenDevice *xendev)
         rc = xendev->ops->initialise(xendev);
     }
     if (rc != 0) {
-        xen_be_printf(xendev, 0, "initialise() failed\n");
+        xen_pv_printf(xendev, 0, "initialise() failed\n");
         return rc;
     }
 
@@ -520,9 +348,9 @@ static void xen_be_try_connected(struct XenDevice *xendev)
 
     if (xendev->fe_state != XenbusStateConnected) {
         if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) {
-            xen_be_printf(xendev, 2, "frontend not ready, ignoring\n");
+            xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n");
         } else {
-            xen_be_printf(xendev, 2, "frontend not ready (yet)\n");
+            xen_pv_printf(xendev, 2, "frontend not ready (yet)\n");
             return;
         }
     }
@@ -556,7 +384,7 @@ static int xen_be_try_reset(struct XenDevice *xendev)
         return -1;
     }
 
-    xen_be_printf(xendev, 1, "device reset (for re-connect)\n");
+    xen_pv_printf(xendev, 1, "device reset (for re-connect)\n");
     xen_be_set_state(xendev, XenbusStateInitialising);
     return 0;
 }
@@ -617,7 +445,8 @@ static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
     snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
     snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
     if (!xs_watch(xenstore, path, token)) {
-        xen_be_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", path);
+        xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
+                      path);
         return -1;
     }
 
@@ -637,8 +466,8 @@ static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
     return 0;
 }
 
-static void xenstore_update_be(char *watch, char *type, int dom,
-                               struct XenDevOps *ops)
+void xenstore_update_be(char *watch, char *type, int dom,
+                        struct XenDevOps *ops)
 {
     struct XenDevice *xendev;
     char path[XEN_BUFSIZE], *bepath;
@@ -662,7 +491,7 @@ static void xenstore_update_be(char *watch, char *type, int dom,
     if (xendev != NULL) {
         bepath = xs_read(xenstore, 0, xendev->be, &len);
         if (bepath == NULL) {
-            xen_be_del_xendev(xendev);
+            xen_pv_del_xendev(xendev);
         } else {
             free(bepath);
             xen_be_backend_changed(xendev, path);
@@ -671,7 +500,7 @@ static void xenstore_update_be(char *watch, char *type, int dom,
     }
 }
 
-static void xenstore_update_fe(char *watch, struct XenDevice *xendev)
+void xenstore_update_fe(char *watch, struct XenDevice *xendev)
 {
     char *node;
     unsigned int len;
@@ -688,56 +517,13 @@ static void xenstore_update_fe(char *watch, struct XenDevice *xendev)
     xen_be_frontend_changed(xendev, node);
     xen_be_check_state(xendev);
 }
-
-static void xenstore_update(void *unused)
-{
-    char **vec = NULL;
-    intptr_t type, ops, ptr;
-    unsigned int dom, count;
-
-    vec = xs_read_watch(xenstore, &count);
-    if (vec == NULL) {
-        goto cleanup;
-    }
-
-    if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
-               &type, &dom, &ops) == 3) {
-        xenstore_update_be(vec[XS_WATCH_PATH], (void*)type, dom, (void*)ops);
-    }
-    if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
-        xenstore_update_fe(vec[XS_WATCH_PATH], (void*)ptr);
-    }
-
-cleanup:
-    free(vec);
-}
-
-static void xen_be_evtchn_event(void *opaque)
-{
-    struct XenDevice *xendev = opaque;
-    evtchn_port_t port;
-
-    port = xenevtchn_pending(xendev->evtchndev);
-    if (port != xendev->local_port) {
-        xen_be_printf(xendev, 0,
-                      "xenevtchn_pending returned %d (expected %d)\n",
-                      port, xendev->local_port);
-        return;
-    }
-    xenevtchn_unmask(xendev->evtchndev, port);
-
-    if (xendev->ops->event) {
-        xendev->ops->event(xendev);
-    }
-}
-
 /* -------------------------------------------------------------------- */
 
 int xen_be_init(void)
 {
     xenstore = xs_daemon_open();
     if (!xenstore) {
-        xen_be_printf(NULL, 0, "can't connect to xenstored\n");
+        xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
         return -1;
     }
 
@@ -750,6 +536,8 @@ int xen_be_init(void)
 
     xen_sysdev = qdev_create(NULL, TYPE_XENSYSDEV);
     qdev_init_nofail(xen_sysdev);
+    xen_sysbus = qbus_create(TYPE_XENSYSBUS, DEVICE(xen_sysdev), "xen-sysbus");
+    qbus_set_bus_hotplug_handler(xen_sysbus, &error_abort);
 
     return 0;
 
@@ -761,6 +549,15 @@ int xen_be_init(void)
     return -1;
 }
 
+static void xen_set_dynamic_sysbus(void)
+{
+    Object *machine = qdev_get_machine();
+    ObjectClass *oc = object_get_class(machine);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->has_dynamic_sysbus = true;
+}
+
 int xen_be_register(const char *type, struct XenDevOps *ops)
 {
     char path[50];
@@ -782,6 +579,8 @@ int xen_be_register(const char *type, struct XenDevOps *ops)
 
 void xen_be_register_common(void)
 {
+    xen_set_dynamic_sysbus();
+
     xen_be_register("console", &xen_console_ops);
     xen_be_register("vkbd", &xen_kbdmouse_ops);
     xen_be_register("qdisk", &xen_blkdev_ops);
@@ -798,70 +597,52 @@ int xen_be_bind_evtchn(struct XenDevice *xendev)
     xendev->local_port = xenevtchn_bind_interdomain
         (xendev->evtchndev, xendev->dom, xendev->remote_port);
     if (xendev->local_port == -1) {
-        xen_be_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
+        xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
         return -1;
     }
-    xen_be_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
+    xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
     qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev),
-                        xen_be_evtchn_event, NULL, xendev);
+                        xen_pv_evtchn_event, NULL, xendev);
     return 0;
 }
 
-void xen_be_unbind_evtchn(struct XenDevice *xendev)
-{
-    if (xendev->local_port == -1) {
-        return;
-    }
-    qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
-    xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
-    xen_be_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
-    xendev->local_port = -1;
-}
 
-int xen_be_send_notify(struct XenDevice *xendev)
+static Property xendev_properties[] = {
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xendev_class_init(ObjectClass *klass, void *data)
 {
-    return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = xendev_properties;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 }
 
-/*
- * msg_level:
- *  0 == errors (stderr + logfile).
- *  1 == informative debug messages (logfile only).
- *  2 == noisy debug messages (logfile only).
- *  3 == will flood your log (logfile only).
- */
-void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
+static const TypeInfo xendev_type_info = {
+    .name          = TYPE_XENBACKEND,
+    .parent        = TYPE_XENSYSDEV,
+    .class_init    = xendev_class_init,
+    .instance_size = sizeof(struct XenDevice),
+};
+
+static void xen_sysbus_class_init(ObjectClass *klass, void *data)
 {
-    va_list args;
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
-    if (xendev) {
-        if (msg_level > xendev->debug) {
-            return;
-        }
-        qemu_log("xen be: %s: ", xendev->name);
-        if (msg_level == 0) {
-            fprintf(stderr, "xen be: %s: ", xendev->name);
-        }
-    } else {
-        if (msg_level > debug) {
-            return;
-        }
-        qemu_log("xen be core: ");
-        if (msg_level == 0) {
-            fprintf(stderr, "xen be core: ");
-        }
-    }
-    va_start(args, fmt);
-    qemu_log_vprintf(fmt, args);
-    va_end(args);
-    if (msg_level == 0) {
-        va_start(args, fmt);
-        vfprintf(stderr, fmt, args);
-        va_end(args);
-    }
-    qemu_log_flush();
+    hc->unplug = qdev_simple_device_unplug_cb;
 }
 
+static const TypeInfo xensysbus_info = {
+    .name       = TYPE_XENSYSBUS,
+    .parent     = TYPE_BUS,
+    .class_init = xen_sysbus_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
 static int xen_sysdev_init(SysBusDevice *dev)
 {
     return 0;
@@ -878,6 +659,7 @@ static void xen_sysdev_class_init(ObjectClass *klass, void *data)
 
     k->init = xen_sysdev_init;
     dc->props = xen_sysdev_properties;
+    dc->bus_type = TYPE_XENSYSBUS;
 }
 
 static const TypeInfo xensysdev_info = {
@@ -889,7 +671,9 @@ static const TypeInfo xensysdev_info = {
 
 static void xenbe_register_types(void)
 {
+    type_register_static(&xensysbus_info);
     type_register_static(&xensysdev_info);
+    type_register_static(&xendev_type_info);
 }
 
-type_init(xenbe_register_types);
+type_init(xenbe_register_types)
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c
index b7d290df6c..a80e78c0dc 100644
--- a/hw/xen/xen_devconfig.c
+++ b/hw/xen/xen_devconfig.c
@@ -55,7 +55,7 @@ int xen_config_dev_blk(DriveInfo *disk)
     const char *filename = qemu_opt_get(disk->opts, "file");
 
     snprintf(device_name, sizeof(device_name), "xvd%c", 'a' + disk->unit);
-    xen_be_printf(NULL, 1, "config disk %d [%s]: %s\n",
+    xen_pv_printf(NULL, 1, "config disk %d [%s]: %s\n",
                   disk->unit, device_name, filename);
     xen_config_dev_dirs("vbd", "qdisk", vdev, fe, be, sizeof(fe));
 
@@ -83,7 +83,7 @@ int xen_config_dev_nic(NICInfo *nic)
     snprintf(mac, sizeof(mac), "%02x:%02x:%02x:%02x:%02x:%02x",
              nic->macaddr.a[0], nic->macaddr.a[1], nic->macaddr.a[2],
              nic->macaddr.a[3], nic->macaddr.a[4], nic->macaddr.a[5]);
-    xen_be_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac);
+    xen_pv_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac);
     xen_config_dev_dirs("vif", "qnic", vlan_id, fe, be, sizeof(fe));
 
     /* frontend */
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
new file mode 100644
index 0000000000..aed783e844
--- /dev/null
+++ b/hw/xen/xen_pvdev.c
@@ -0,0 +1,318 @@
+/*
+ * Xen para-virtualization device
+ *
+ *  (c) 2008 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/qdev-core.h"
+#include "hw/xen/xen_backend.h"
+#include "hw/xen/xen_pvdev.h"
+
+/* private */
+static int debug;
+
+struct xs_dirs {
+    char *xs_dir;
+    QTAILQ_ENTRY(xs_dirs) list;
+};
+
+static QTAILQ_HEAD(xs_dirs_head, xs_dirs) xs_cleanup =
+    QTAILQ_HEAD_INITIALIZER(xs_cleanup);
+
+static QTAILQ_HEAD(XenDeviceHead, XenDevice) xendevs =
+    QTAILQ_HEAD_INITIALIZER(xendevs);
+
+/* ------------------------------------------------------------- */
+
+static void xenstore_cleanup_dir(char *dir)
+{
+    struct xs_dirs *d;
+
+    d = g_malloc(sizeof(*d));
+    d->xs_dir = dir;
+    QTAILQ_INSERT_TAIL(&xs_cleanup, d, list);
+}
+
+void xen_config_cleanup(void)
+{
+    struct xs_dirs *d;
+
+    QTAILQ_FOREACH(d, &xs_cleanup, list) {
+        xs_rm(xenstore, 0, d->xs_dir);
+    }
+}
+
+int xenstore_mkdir(char *path, int p)
+{
+    struct xs_permissions perms[2] = {
+        {
+            .id    = 0, /* set owner: dom0 */
+        }, {
+            .id    = xen_domid,
+            .perms = p,
+        }
+    };
+
+    if (!xs_mkdir(xenstore, 0, path)) {
+        xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
+        return -1;
+    }
+    xenstore_cleanup_dir(g_strdup(path));
+
+    if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
+        xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
+        return -1;
+    }
+    return 0;
+}
+
+int xenstore_write_str(const char *base, const char *node, const char *val)
+{
+    char abspath[XEN_BUFSIZE];
+
+    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+    if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
+        return -1;
+    }
+    return 0;
+}
+
+char *xenstore_read_str(const char *base, const char *node)
+{
+    char abspath[XEN_BUFSIZE];
+    unsigned int len;
+    char *str, *ret = NULL;
+
+    snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
+    str = xs_read(xenstore, 0, abspath, &len);
+    if (str != NULL) {
+        /* move to qemu-allocated memory to make sure
+         * callers can savely g_free() stuff. */
+        ret = g_strdup(str);
+        free(str);
+    }
+    return ret;
+}
+
+int xenstore_write_int(const char *base, const char *node, int ival)
+{
+    char val[12];
+
+    snprintf(val, sizeof(val), "%d", ival);
+    return xenstore_write_str(base, node, val);
+}
+
+int xenstore_write_int64(const char *base, const char *node, int64_t ival)
+{
+    char val[21];
+
+    snprintf(val, sizeof(val), "%"PRId64, ival);
+    return xenstore_write_str(base, node, val);
+}
+
+int xenstore_read_int(const char *base, const char *node, int *ival)
+{
+    char *val;
+    int rc = -1;
+
+    val = xenstore_read_str(base, node);
+    if (val && 1 == sscanf(val, "%d", ival)) {
+        rc = 0;
+    }
+    g_free(val);
+    return rc;
+}
+
+int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
+{
+    char *val;
+    int rc = -1;
+
+    val = xenstore_read_str(base, node);
+    if (val && 1 == sscanf(val, "%"SCNu64, uval)) {
+        rc = 0;
+    }
+    g_free(val);
+    return rc;
+}
+
+void xenstore_update(void *unused)
+{
+    char **vec = NULL;
+    intptr_t type, ops, ptr;
+    unsigned int dom, count;
+
+    vec = xs_read_watch(xenstore, &count);
+    if (vec == NULL) {
+        goto cleanup;
+    }
+
+    if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
+               &type, &dom, &ops) == 3) {
+        xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops);
+    }
+    if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
+        xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr);
+    }
+
+cleanup:
+    free(vec);
+}
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+    static const char *const name[] = {
+        [XenbusStateUnknown]       = "Unknown",
+        [XenbusStateInitialising]  = "Initialising",
+        [XenbusStateInitWait]      = "InitWait",
+        [XenbusStateInitialised]   = "Initialised",
+        [XenbusStateConnected]     = "Connected",
+        [XenbusStateClosing]       = "Closing",
+        [XenbusStateClosed]        = "Closed",
+    };
+    return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+
+/*
+ * msg_level:
+ *  0 == errors (stderr + logfile).
+ *  1 == informative debug messages (logfile only).
+ *  2 == noisy debug messages (logfile only).
+ *  3 == will flood your log (logfile only).
+ */
+void xen_pv_printf(struct XenDevice *xendev, int msg_level,
+                   const char *fmt, ...)
+{
+    va_list args;
+
+    if (xendev) {
+        if (msg_level > xendev->debug) {
+            return;
+        }
+        qemu_log("xen be: %s: ", xendev->name);
+        if (msg_level == 0) {
+            fprintf(stderr, "xen be: %s: ", xendev->name);
+        }
+    } else {
+        if (msg_level > debug) {
+            return;
+        }
+        qemu_log("xen be core: ");
+        if (msg_level == 0) {
+            fprintf(stderr, "xen be core: ");
+        }
+    }
+    va_start(args, fmt);
+    qemu_log_vprintf(fmt, args);
+    va_end(args);
+    if (msg_level == 0) {
+        va_start(args, fmt);
+        vfprintf(stderr, fmt, args);
+        va_end(args);
+    }
+    qemu_log_flush();
+}
+
+void xen_pv_evtchn_event(void *opaque)
+{
+    struct XenDevice *xendev = opaque;
+    evtchn_port_t port;
+
+    port = xenevtchn_pending(xendev->evtchndev);
+    if (port != xendev->local_port) {
+        xen_pv_printf(xendev, 0,
+                      "xenevtchn_pending returned %d (expected %d)\n",
+                      port, xendev->local_port);
+        return;
+    }
+    xenevtchn_unmask(xendev->evtchndev, port);
+
+    if (xendev->ops->event) {
+        xendev->ops->event(xendev);
+    }
+}
+
+void xen_pv_unbind_evtchn(struct XenDevice *xendev)
+{
+    if (xendev->local_port == -1) {
+        return;
+    }
+    qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+    xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
+    xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
+    xendev->local_port = -1;
+}
+
+int xen_pv_send_notify(struct XenDevice *xendev)
+{
+    return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+}
+
+/* ------------------------------------------------------------- */
+
+struct XenDevice *xen_pv_find_xendev(const char *type, int dom, int dev)
+{
+    struct XenDevice *xendev;
+
+    QTAILQ_FOREACH(xendev, &xendevs, next) {
+        if (xendev->dom != dom) {
+            continue;
+        }
+        if (xendev->dev != dev) {
+            continue;
+        }
+        if (strcmp(xendev->type, type) != 0) {
+            continue;
+        }
+        return xendev;
+    }
+    return NULL;
+}
+
+/*
+ * release xen backend device.
+ */
+void xen_pv_del_xendev(struct XenDevice *xendev)
+{
+    if (xendev->ops->free) {
+        xendev->ops->free(xendev);
+    }
+
+    if (xendev->fe) {
+        char token[XEN_BUFSIZE];
+        snprintf(token, sizeof(token), "fe:%p", xendev);
+        xs_unwatch(xenstore, xendev->fe, token);
+        g_free(xendev->fe);
+    }
+
+    if (xendev->evtchndev != NULL) {
+        xenevtchn_close(xendev->evtchndev);
+    }
+    if (xendev->gnttabdev != NULL) {
+        xengnttab_close(xendev->gnttabdev);
+    }
+
+    QTAILQ_REMOVE(&xendevs, xendev, next);
+
+    qdev_unplug(&xendev->qdev, NULL);
+}
+
+void xen_pv_insert_xendev(struct XenDevice *xendev)
+{
+    QTAILQ_INSERT_TAIL(&xendevs, xendev, next);
+}
diff --git a/hw/xenpv/xen_domainbuild.c b/hw/xenpv/xen_domainbuild.c
index 5a9f5ac806..457a8976c3 100644
--- a/hw/xenpv/xen_domainbuild.c
+++ b/hw/xenpv/xen_domainbuild.c
@@ -53,11 +53,7 @@ int xenstore_domain_init1(const char *kernel, const char *ramdisk,
     char *dom, uuid_string[42], vm[256], path[256];
     int i;
 
-    snprintf(uuid_string, sizeof(uuid_string), UUID_FMT,
-             qemu_uuid[0], qemu_uuid[1], qemu_uuid[2], qemu_uuid[3],
-             qemu_uuid[4], qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
-             qemu_uuid[8], qemu_uuid[9], qemu_uuid[10], qemu_uuid[11],
-             qemu_uuid[12], qemu_uuid[13], qemu_uuid[14], qemu_uuid[15]);
+    qemu_uuid_unparse(&qemu_uuid, uuid_string);
     dom = xs_get_domain_path(xenstore, xen_domid);
     snprintf(vm,  sizeof(vm),  "/vm/%s", uuid_string);
 
@@ -236,7 +232,7 @@ int xen_domain_build_pv(const char *kernel, const char *ramdisk,
     unsigned long xenstore_mfn = 0, console_mfn = 0;
     int rc;
 
-    memcpy(uuid, qemu_uuid, sizeof(uuid));
+    memcpy(uuid, &qemu_uuid, sizeof(uuid));
     rc = xen_domain_create(xen_xc, ssidref, uuid, flags, &xen_domid);
     if (rc < 0) {
         fprintf(stderr, "xen: xc_domain_create() failed\n");
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index ac75949484..dc6fdcc266 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -265,7 +265,7 @@ static void lx_init(const LxBoardDesc *board, MachineState *machine)
     }
 
     if (!serial_hds[0]) {
-        serial_hds[0] = qemu_chr_new("serial0", "null", NULL);
+        serial_hds[0] = qemu_chr_new("serial0", "null");
     }
 
     serial_mm_init(system_io, 0x0d050020, 2, xtensa_get_extint(env, 0),
diff --git a/include/block/aio.h b/include/block/aio.h
index 173c1ed404..c7ae27c91c 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -18,7 +18,6 @@
 #include "qemu/queue.h"
 #include "qemu/event_notifier.h"
 #include "qemu/thread.h"
-#include "qemu/rfifolock.h"
 #include "qemu/timer.h"
 
 typedef struct BlockAIOCB BlockAIOCB;
@@ -54,7 +53,7 @@ struct AioContext {
     GSource source;
 
     /* Protects all fields from multi-threaded access */
-    RFifoLock lock;
+    QemuRecMutex lock;
 
     /* The list of registered AIO handlers */
     QLIST_HEAD(, AioHandler) aio_handlers;
@@ -116,9 +115,6 @@ struct AioContext {
     bool notified;
     EventNotifier notifier;
 
-    /* Scheduling this BH forces the event loop it iterate */
-    QEMUBH *notify_dummy_bh;
-
     /* Thread pool for performing work and receiving completion callbacks */
     struct ThreadPool *thread_pool;
 
@@ -180,6 +176,12 @@ void aio_context_acquire(AioContext *ctx);
 /* Relinquish ownership of the AioContext. */
 void aio_context_release(AioContext *ctx);
 
+/**
+ * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
+ * only once and as soon as possible.
+ */
+void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
 /**
  * aio_bh_new: Allocate a new bottom half structure.
  *
@@ -446,6 +448,24 @@ static inline bool aio_node_check(AioContext *ctx, bool is_external)
     return !is_external || !atomic_read(&ctx->external_disable_cnt);
 }
 
+/**
+ * Return the AioContext whose event loop runs in the current thread.
+ *
+ * If called from an IOThread this will be the IOThread's AioContext.  If
+ * called from another thread it will be the main loop AioContext.
+ */
+AioContext *qemu_get_current_aio_context(void);
+
+/**
+ * @ctx: the aio context
+ *
+ * Return whether we are running in the I/O thread that manages @ctx.
+ */
+static inline bool aio_context_in_iothread(AioContext *ctx)
+{
+    return ctx == qemu_get_current_aio_context();
+}
+
 /**
  * aio_context_setup:
  * @ctx: the aio context
diff --git a/include/block/block.h b/include/block/block.h
index 11c162d594..49bb0b239a 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -7,16 +7,15 @@
 #include "qemu/coroutine.h"
 #include "block/accounting.h"
 #include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
 #include "qapi/qmp/qobject.h"
 #include "qapi-types.h"
 #include "qemu/hbitmap.h"
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
-typedef struct BlockJob BlockJob;
 typedef struct BdrvChild BdrvChild;
 typedef struct BdrvChildRole BdrvChildRole;
-typedef struct BlockJobTxn BlockJobTxn;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
@@ -65,9 +64,10 @@ typedef enum {
     BDRV_REQ_MAY_UNMAP          = 0x4,
     BDRV_REQ_NO_SERIALISING     = 0x8,
     BDRV_REQ_FUA                = 0x10,
+    BDRV_REQ_WRITE_COMPRESSED   = 0x20,
 
     /* Mask of valid flags */
-    BDRV_REQ_MASK               = 0x1f,
+    BDRV_REQ_MASK               = 0x3f,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
@@ -106,6 +106,8 @@ typedef struct HDGeometry {
 #define BDRV_OPT_CACHE_WB       "cache.writeback"
 #define BDRV_OPT_CACHE_DIRECT   "cache.direct"
 #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
+#define BDRV_OPT_READ_ONLY      "read-only"
+#define BDRV_OPT_DISCARD        "discard"
 
 
 #define BDRV_SECTOR_BITS   9
@@ -184,11 +186,6 @@ typedef enum BlockOpType {
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
-void bdrv_info_print(Monitor *mon, const QObject *data);
-void bdrv_info(Monitor *mon, QObject **ret_data);
-void bdrv_stats_print(Monitor *mon, const QObject *data);
-void bdrv_info_stats(Monitor *mon, QObject **ret_data);
-
 /* disk I/O throttling */
 void bdrv_init(void);
 void bdrv_init_with_whitelist(void);
@@ -220,7 +217,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference,
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
                                     BlockDriverState *bs,
                                     QDict *options, int flags);
-int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp);
 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
 int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                         BlockReopenQueue *queue, Error **errp);
@@ -316,17 +313,11 @@ BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
                             BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
                            BlockCompletionFunc *cb, void *opaque);
-BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs,
-                              int64_t offset, int count,
-                              BlockCompletionFunc *cb, void *opaque);
 void bdrv_aio_cancel(BlockAIOCB *acb);
 void bdrv_aio_cancel_async(BlockAIOCB *acb);
 
 /* sg packet commands */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque);
+int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
 
 /* Invalidate any cached metadata used by image formats */
 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
@@ -336,11 +327,43 @@ int bdrv_inactivate_all(void);
 /* Ensure contents are flushed to disk.  */
 int bdrv_flush(BlockDriverState *bs);
 int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
 void bdrv_close_all(void);
 void bdrv_drain(BlockDriverState *bs);
 void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
+void bdrv_drain_all_begin(void);
+void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
+#define BDRV_POLL_WHILE(bs, cond) ({                       \
+    bool waited_ = false;                                  \
+    BlockDriverState *bs_ = (bs);                          \
+    AioContext *ctx_ = bdrv_get_aio_context(bs_);          \
+    if (aio_context_in_iothread(ctx_)) {                   \
+        while ((cond)) {                                   \
+            aio_poll(ctx_, true);                          \
+            waited_ = true;                                \
+        }                                                  \
+    } else {                                               \
+        assert(qemu_get_current_aio_context() ==           \
+               qemu_get_aio_context());                    \
+        /* Ask bdrv_dec_in_flight to wake up the main      \
+         * QEMU AioContext.  Extra I/O threads never take  \
+         * other I/O threads' AioContexts (see for example \
+         * block_job_defer_to_main_loop for how to do it). \
+         */                                                \
+        assert(!bs_->wakeup);                              \
+        bs_->wakeup = true;                                \
+        while ((cond)) {                                   \
+            aio_context_release(ctx_);                     \
+            aio_poll(qemu_get_aio_context(), true);        \
+            aio_context_acquire(ctx_);                     \
+            waited_ = true;                                \
+        }                                                  \
+        bs_->wakeup = false;                               \
+    }                                                      \
+    waited_; })
+
 int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
 int bdrv_has_zero_init_1(BlockDriverState *bs);
@@ -392,15 +415,12 @@ bool bdrv_is_encrypted(BlockDriverState *bs);
 bool bdrv_key_required(BlockDriverState *bs);
 int bdrv_set_key(BlockDriverState *bs, const char *key);
 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp);
-int bdrv_query_missing_keys(void);
 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
                          void *opaque);
 const char *bdrv_get_node_name(const BlockDriverState *bs);
 const char *bdrv_get_device_name(const BlockDriverState *bs);
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                          const uint8_t *buf, int nb_sectors);
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
 void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
@@ -421,7 +441,6 @@ void bdrv_get_full_backing_filename_from_filename(const char *backed,
                                                   const char *backing,
                                                   char *dest, size_t sz,
                                                   Error **errp);
-int bdrv_is_snapshot(BlockDriverState *bs);
 
 int path_has_protocol(const char *path);
 int path_is_absolute(const char *path);
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
new file mode 100644
index 0000000000..8a759477a3
--- /dev/null
+++ b/include/block/block_backup.h
@@ -0,0 +1,39 @@
+/*
+ * QEMU backup
+ *
+ * Copyright (c) 2013 Proxmox Server Solutions
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Authors:
+ *  Dietmar Maurer <dietmar@proxmox.com>
+ *  Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_BACKUP_H
+#define BLOCK_BACKUP_H
+
+#include "block/block_int.h"
+
+typedef struct CowRequest {
+    int64_t start;
+    int64_t end;
+    QLIST_ENTRY(CowRequest) list;
+    CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+                                          int nb_sectors);
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+                              int64_t sector_num,
+                              int nb_sectors);
+void backup_cow_request_end(CowRequest *req);
+
+void backup_do_checkpoint(BlockJob *job, Error **errp);
+
+#endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1e939de4fe..83a423c580 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -62,8 +62,6 @@
 enum BdrvTrackedRequestType {
     BDRV_TRACKED_READ,
     BDRV_TRACKED_WRITE,
-    BDRV_TRACKED_FLUSH,
-    BDRV_TRACKED_IOCTL,
     BDRV_TRACKED_DISCARD,
 };
 
@@ -204,8 +202,8 @@ struct BlockDriver {
     bool has_variable_length;
     int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
 
-    int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors);
+    int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
+        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
 
     int (*bdrv_snapshot_create)(BlockDriverState *bs,
                                 QEMUSnapshotInfo *sn_info);
@@ -244,6 +242,8 @@ struct BlockDriver {
     BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
         unsigned long int req, void *buf,
         BlockCompletionFunc *cb, void *opaque);
+    int coroutine_fn (*bdrv_co_ioctl)(BlockDriverState *bs,
+                                      unsigned long int req, void *buf);
 
     /* List of options for creating images, terminated by name == NULL */
     QemuOptsList *create_opts;
@@ -443,7 +443,7 @@ struct BlockDriverState {
                          note this is a reference count */
 
     CoQueue flush_queue;            /* Serializing flush queue */
-    BdrvTrackedRequest *active_flush_req; /* Flush request in flight */
+    bool active_flush_req;          /* Flush request in flight? */
     unsigned int write_gen;         /* Current data generation */
     unsigned int flushed_gen;       /* Flushed write generation */
 
@@ -471,9 +471,12 @@ struct BlockDriverState {
     /* Callback before write request is processed */
     NotifierWithReturnList before_write_notifiers;
 
-    /* number of in-flight serialising requests */
+    /* number of in-flight requests; overall and serialising */
+    unsigned int in_flight;
     unsigned int serialising_in_flight;
 
+    bool wakeup;
+
     /* Offset after the highest byte written to */
     uint64_t wr_highest_offset;
 
@@ -562,15 +565,6 @@ extern BlockDriver bdrv_file;
 extern BlockDriver bdrv_raw;
 extern BlockDriver bdrv_qcow2;
 
-/**
- * bdrv_setup_io_funcs:
- *
- * Prepare a #BlockDriver for I/O request processing by populating
- * unimplemented coroutine and AIO interfaces with generic wrapper functions
- * that fall back to implemented interfaces.
- */
-void bdrv_setup_io_funcs(BlockDriver *bdrv);
-
 int coroutine_fn bdrv_co_preadv(BdrvChild *child,
     int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
     BdrvRequestFlags flags);
@@ -641,6 +635,21 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                       void (*aio_context_detached)(void *),
                                       void *opaque);
 
+/**
+ * bdrv_wakeup:
+ * @bs: The BlockDriverState for which an I/O operation has been completed.
+ *
+ * Wake up the main thread if it is waiting on BDRV_POLL_WHILE.  During
+ * synchronous I/O on a BlockDriverState that is attached to another
+ * I/O thread, the main thread lets the I/O thread's event loop run,
+ * waiting for the I/O operation to complete.  A bdrv_wakeup will wake
+ * up the main thread if necessary.
+ *
+ * Manual calls to bdrv_wakeup are rarely necessary, because
+ * bdrv_dec_in_flight already calls it.
+ */
+void bdrv_wakeup(BlockDriverState *bs);
+
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
@@ -656,8 +665,6 @@ int is_windows_drive(const char *filename);
  * the new backing file if the job completes. Ignored if @base is %NULL.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -669,8 +676,7 @@ int is_windows_drive(const char *filename);
  */
 void stream_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, const char *backing_file_str,
-                  int64_t speed, BlockdevOnError on_error,
-                  BlockCompletionFunc *cb, void *opaque, Error **errp);
+                  int64_t speed, BlockdevOnError on_error, Error **errp);
 
 /**
  * commit_start:
@@ -681,34 +687,35 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * @base: Block device that will be written into, and become the new top.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
  * @backing_file_str: String to use as the backing file in @top's overlay
  * @errp: Error object.
  *
  */
 void commit_start(const char *job_id, BlockDriverState *bs,
                   BlockDriverState *base, BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
-                  void *opaque, const char *backing_file_str, Error **errp);
+                  BlockdevOnError on_error, const char *backing_file_str,
+                  Error **errp);
 /**
  * commit_active_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
  * device name of @bs.
  * @bs: Active block device to be committed.
  * @base: Block device that will be written into, and become the new top.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
+ * @auto_complete: Auto complete the job.
  *
  */
 void commit_active_start(const char *job_id, BlockDriverState *bs,
-                         BlockDriverState *base, int64_t speed,
-                         BlockdevOnError on_error,
+                         BlockDriverState *base, int creation_flags,
+                         int64_t speed, BlockdevOnError on_error,
                          BlockCompletionFunc *cb,
-                         void *opaque, Error **errp);
+                         void *opaque, Error **errp, bool auto_complete);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
@@ -725,12 +732,10 @@ void commit_active_start(const char *job_id, BlockDriverState *bs,
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @unmap: Whether to unmap target where source sectors only contain zeroes.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
  *
  * Start a mirroring operation on @bs.  Clusters that are allocated
- * in @bs will be written to @bs until the job is cancelled or
+ * in @bs will be written to @target until the job is cancelled or
  * manually completed.  At the end of a successful mirroring job,
  * @bs will be switched to read from @target.
  */
@@ -740,12 +745,10 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
                   MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
-                  bool unmap,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp);
+                  bool unmap, Error **errp);
 
 /*
- * backup_start:
+ * backup_job_create:
  * @job_id: The id of the newly-created job, or %NULL to use the
  * device name of @bs.
  * @bs: Block device to operate on.
@@ -755,20 +758,25 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
  * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @txn: Transaction that this job is part of (may be NULL).
  *
- * Start a backup operation on @bs.  Clusters in @bs are written to @target
+ * Create a backup operation on @bs.  Clusters in @bs are written to @target
  * until the job is cancelled or manually completed.
  */
-void backup_start(const char *job_id, BlockDriverState *bs,
-                  BlockDriverState *target, int64_t speed,
-                  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
-                  BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockCompletionFunc *cb, void *opaque,
-                  BlockJobTxn *txn, Error **errp);
+BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+                            BlockDriverState *target, int64_t speed,
+                            MirrorSyncMode sync_mode,
+                            BdrvDirtyBitmap *sync_bitmap,
+                            bool compress,
+                            BlockdevOnError on_source_error,
+                            BlockdevOnError on_target_error,
+                            int creation_flags,
+                            BlockCompletionFunc *cb, void *opaque,
+                            BlockJobTxn *txn, Error **errp);
 
 void hmp_drive_add_node(Monitor *mon, const char *optstr);
 
@@ -792,6 +800,9 @@ bool bdrv_requests_pending(BlockDriverState *bs);
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
 
+void bdrv_inc_in_flight(BlockDriverState *bs);
+void bdrv_dec_in_flight(BlockDriverState *bs);
+
 void blockdev_close_all_bdrv_states(void);
 
 #endif /* BLOCK_INT_H */
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 4ddb4ae2e1..1acb256223 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -28,78 +28,15 @@
 
 #include "block/block.h"
 
-/**
- * BlockJobDriver:
- *
- * A class type for block job driver.
- */
-typedef struct BlockJobDriver {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    BlockJobType job_type;
-
-    /** Optional callback for job types that support setting a speed limit */
-    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-
-    /** Optional callback for job types that need to forward I/O status reset */
-    void (*iostatus_reset)(BlockJob *job);
-
-    /**
-     * Optional callback for job types whose completion must be triggered
-     * manually.
-     */
-    void (*complete)(BlockJob *job, Error **errp);
-
-    /**
-     * If the callback is not NULL, it will be invoked when all the jobs
-     * belonging to the same transaction complete; or upon this job's
-     * completion if it is not in a transaction. Skipped if NULL.
-     *
-     * All jobs will complete with a call to either .commit() or .abort() but
-     * never both.
-     */
-    void (*commit)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when any job in the
-     * same transaction fails; or upon this job's failure (due to error or
-     * cancellation) if it is not in a transaction. Skipped if NULL.
-     *
-     * All jobs will complete with a call to either .commit() or .abort() but
-     * never both.
-     */
-    void (*abort)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when the job transitions
-     * into the paused state.  Paused jobs must not perform any asynchronous
-     * I/O or event loop activity.  This callback is used to quiesce jobs.
-     */
-    void coroutine_fn (*pause)(BlockJob *job);
-
-    /**
-     * If the callback is not NULL, it will be invoked when the job transitions
-     * out of the paused state.  Any asynchronous I/O or event loop activity
-     * should be restarted from this callback.
-     */
-    void coroutine_fn (*resume)(BlockJob *job);
-
-    /*
-     * If the callback is not NULL, it will be invoked before the job is
-     * resumed in a new AioContext.  This is the place to move any resources
-     * besides job->blk to the new AioContext.
-     */
-    void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
-} BlockJobDriver;
+typedef struct BlockJobDriver BlockJobDriver;
+typedef struct BlockJobTxn BlockJobTxn;
 
 /**
  * BlockJob:
  *
  * Long-running operation on a BlockDriverState.
  */
-struct BlockJob {
+typedef struct BlockJob {
     /** The job type, including the job vtable.  */
     const BlockJobDriver *driver;
 
@@ -107,7 +44,7 @@ struct BlockJob {
     BlockBackend *blk;
 
     /**
-     * The ID of the block job.
+     * The ID of the block job. May be NULL for internal jobs.
      */
     char *id;
 
@@ -181,6 +118,9 @@ struct BlockJob {
     /** Block other operations when block job is running */
     Error *blocker;
 
+    /** BlockDriverStates that are involved in this block job */
+    GSList *nodes;
+
     /** The opaque value that is passed to the completion function.  */
     void *opaque;
 
@@ -198,7 +138,12 @@ struct BlockJob {
     /** Non-NULL if this job is part of a transaction */
     BlockJobTxn *txn;
     QLIST_ENTRY(BlockJob) txn_list;
-};
+} BlockJob;
+
+typedef enum BlockJobCreateFlags {
+    BLOCK_JOB_DEFAULT = 0x00,
+    BLOCK_JOB_INTERNAL = 0x01,
+} BlockJobCreateFlags;
 
 /**
  * block_job_next:
@@ -222,74 +167,15 @@ BlockJob *block_job_next(BlockJob *job);
 BlockJob *block_job_get(const char *id);
 
 /**
- * block_job_create:
- * @job_id: The id of the newly-created job, or %NULL to have one
- * generated automatically.
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it.  The job
- * will call @cb asynchronously when the job completes.  Note that
- * @bs may have been closed at the time the @cb it is called.  If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const char *job_id, const BlockJobDriver *driver,
-                       BlockDriverState *bs, int64_t speed,
-                       BlockCompletionFunc *cb, void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds.  Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
-
-/**
- * block_job_yield:
- * @job: The job that calls the function.
- *
- * Yield the block job coroutine.
- */
-void block_job_yield(BlockJob *job);
-
-/**
- * block_job_ref:
- * @bs: The block device.
+ * block_job_add_bdrv:
+ * @job: A block job
+ * @bs: A BlockDriverState that is involved in @job
  *
- * Grab a reference to the block job. Should be paired with block_job_unref.
+ * Add @bs to the list of BlockDriverState that are involved in
+ * @job. This means that all operations will be blocked on @bs while
+ * @job exists.
  */
-void block_job_ref(BlockJob *job);
-
-/**
- * block_job_unref:
- * @bs: The block device.
- *
- * Release reference to the block job and release resources if it is the last
- * reference.
- */
-void block_job_unref(BlockJob *job);
-
-/**
- * block_job_completed:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_completed(BlockJob *job, int ret);
+void block_job_add_bdrv(BlockJob *job, BlockDriverState *bs);
 
 /**
  * block_job_set_speed:
@@ -302,6 +188,15 @@ void block_job_completed(BlockJob *job, int ret);
  */
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
 
+/**
+ * block_job_start:
+ * @job: A job that has not yet been started.
+ *
+ * Begins execution of a block job.
+ * Takes ownership of one reference to the job object.
+ */
+void block_job_start(BlockJob *job);
+
 /**
  * block_job_cancel:
  * @job: The job to be canceled.
@@ -319,30 +214,13 @@ void block_job_cancel(BlockJob *job);
  */
 void block_job_complete(BlockJob *job, Error **errp);
 
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
 /**
  * block_job_query:
  * @job: The job to get information about.
  *
  * Return information about a job.
  */
-BlockJobInfo *block_job_query(BlockJob *job);
-
-/**
- * block_job_pause_point:
- * @job: The job that is ready to pause.
- *
- * Pause now if block_job_pause() has been called.  Block jobs that perform
- * lots of I/O must call this between requests so that the job can be paused.
- */
-void coroutine_fn block_job_pause_point(BlockJob *job);
+BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
 
 /**
  * block_job_pause:
@@ -353,45 +231,38 @@ void coroutine_fn block_job_pause_point(BlockJob *job);
 void block_job_pause(BlockJob *job);
 
 /**
- * block_job_resume:
- * @job: The job to be resumed.
- *
- * Resume the specified job.  Must be paired with a preceding block_job_pause.
- */
-void block_job_resume(BlockJob *job);
-
-/**
- * block_job_enter:
- * @job: The job to enter.
+ * block_job_user_pause:
+ * @job: The job to be paused.
  *
- * Continue the specified job by entering the coroutine.
+ * Asynchronously pause the specified job.
+ * Do not allow a resume until a matching call to block_job_user_resume.
  */
-void block_job_enter(BlockJob *job);
+void block_job_user_pause(BlockJob *job);
 
 /**
- * block_job_event_cancelled:
- * @job: The job whose information is requested.
+ * block_job_paused:
+ * @job: The job to query.
  *
- * Send a BLOCK_JOB_CANCELLED event for the specified job.
+ * Returns true if the job is user-paused.
  */
-void block_job_event_cancelled(BlockJob *job);
+bool block_job_user_paused(BlockJob *job);
 
 /**
- * block_job_ready:
- * @job: The job which is now ready to complete.
- * @msg: Error message. Only present on failure.
+ * block_job_resume:
+ * @job: The job to be resumed.
  *
- * Send a BLOCK_JOB_COMPLETED event for the specified job.
+ * Resume the specified job.  Must be paired with a preceding block_job_pause.
  */
-void block_job_event_completed(BlockJob *job, const char *msg);
+void block_job_resume(BlockJob *job);
 
 /**
- * block_job_ready:
- * @job: The job which is now ready to complete.
+ * block_job_user_resume:
+ * @job: The job to be resumed.
  *
- * Send a BLOCK_JOB_READY event for the specified job.
+ * Resume the specified job.
+ * Must be paired with a preceding block_job_user_pause.
  */
-void block_job_event_ready(BlockJob *job);
+void block_job_user_resume(BlockJob *job);
 
 /**
  * block_job_cancel_sync:
@@ -438,37 +309,6 @@ int block_job_complete_sync(BlockJob *job, Error **errp);
  */
 void block_job_iostatus_reset(BlockJob *job);
 
-/**
- * block_job_error_action:
- * @job: The job to signal an error for.
- * @on_err: The error action setting.
- * @is_read: Whether the operation was a read.
- * @error: The error that was reported.
- *
- * Report an I/O error for a block job and possibly stop the VM.  Return the
- * action that was selected based on @on_err and @error.
- */
-BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
-                                        int is_read, int error);
-
-typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
-
-/**
- * block_job_defer_to_main_loop:
- * @job: The job
- * @fn: The function to run in the main loop
- * @opaque: The opaque value that is passed to @fn
- *
- * Execute a given function in the main loop with the BlockDriverState
- * AioContext acquired.  Block jobs must call bdrv_unref(), bdrv_close(), and
- * anything that uses bdrv_drain_all() in the main loop.
- *
- * The @job AioContext is held while @fn executes.
- */
-void block_job_defer_to_main_loop(BlockJob *job,
-                                  BlockJobDeferToMainLoopFn *fn,
-                                  void *opaque);
-
 /**
  * block_job_txn_new:
  *
@@ -504,4 +344,12 @@ void block_job_txn_unref(BlockJobTxn *txn);
  */
 void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job);
 
+/**
+ * block_job_is_internal:
+ * @job: The job to determine if it is user-visible or not.
+ *
+ * Returns true if the job should not be visible to the management layer.
+ */
+bool block_job_is_internal(BlockJob *job);
+
 #endif
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
new file mode 100644
index 0000000000..82238229c6
--- /dev/null
+++ b/include/block/blockjob_int.h
@@ -0,0 +1,250 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCKJOB_INT_H
+#define BLOCKJOB_INT_H
+
+#include "block/blockjob.h"
+#include "block/block.h"
+
+/**
+ * BlockJobDriver:
+ *
+ * A class type for block job driver.
+ */
+struct BlockJobDriver {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    BlockJobType job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+    /** Optional callback for job types that need to forward I/O status reset */
+    void (*iostatus_reset)(BlockJob *job);
+
+    /** Mandatory: Entrypoint for the Coroutine. */
+    CoroutineEntry *start;
+
+    /**
+     * Optional callback for job types whose completion must be triggered
+     * manually.
+     */
+    void (*complete)(BlockJob *job, Error **errp);
+
+    /**
+     * If the callback is not NULL, it will be invoked when all the jobs
+     * belonging to the same transaction complete; or upon this job's
+     * completion if it is not in a transaction. Skipped if NULL.
+     *
+     * All jobs will complete with a call to either .commit() or .abort() but
+     * never both.
+     */
+    void (*commit)(BlockJob *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when any job in the
+     * same transaction fails; or upon this job's failure (due to error or
+     * cancellation) if it is not in a transaction. Skipped if NULL.
+     *
+     * All jobs will complete with a call to either .commit() or .abort() but
+     * never both.
+     */
+    void (*abort)(BlockJob *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked after a call to either
+     * .commit() or .abort(). Regardless of which callback is invoked after
+     * completion, .clean() will always be called, even if the job does not
+     * belong to a transaction group.
+     */
+    void (*clean)(BlockJob *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when the job transitions
+     * into the paused state.  Paused jobs must not perform any asynchronous
+     * I/O or event loop activity.  This callback is used to quiesce jobs.
+     */
+    void coroutine_fn (*pause)(BlockJob *job);
+
+    /**
+     * If the callback is not NULL, it will be invoked when the job transitions
+     * out of the paused state.  Any asynchronous I/O or event loop activity
+     * should be restarted from this callback.
+     */
+    void coroutine_fn (*resume)(BlockJob *job);
+
+    /*
+     * If the callback is not NULL, it will be invoked before the job is
+     * resumed in a new AioContext.  This is the place to move any resources
+     * besides job->blk to the new AioContext.
+     */
+    void (*attached_aio_context)(BlockJob *job, AioContext *new_context);
+
+    /*
+     * If the callback is not NULL, it will be invoked when the job has to be
+     * synchronously cancelled or completed; it should drain BlockDriverStates
+     * as required to ensure progress.
+     */
+    void (*drain)(BlockJob *job);
+};
+
+/**
+ * block_job_create:
+ * @job_id: The id of the newly-created job, or %NULL to have one
+ * generated automatically.
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it.  The job
+ * will call @cb asynchronously when the job completes.  Note that
+ * @bs may have been closed at the time the @cb it is called.  If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const char *job_id, const BlockJobDriver *driver,
+                       BlockDriverState *bs, int64_t speed, int flags,
+                       BlockCompletionFunc *cb, void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds.  Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
+
+/**
+ * block_job_yield:
+ * @job: The job that calls the function.
+ *
+ * Yield the block job coroutine.
+ */
+void block_job_yield(BlockJob *job);
+
+/**
+ * block_job_ref:
+ * @bs: The block device.
+ *
+ * Grab a reference to the block job. Should be paired with block_job_unref.
+ */
+void block_job_ref(BlockJob *job);
+
+/**
+ * block_job_unref:
+ * @bs: The block device.
+ *
+ * Release reference to the block job and release resources if it is the last
+ * reference.
+ */
+void block_job_unref(BlockJob *job);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_pause_point:
+ * @job: The job that is ready to pause.
+ *
+ * Pause now if block_job_pause() has been called.  Block jobs that perform
+ * lots of I/O must call this between requests so that the job can be paused.
+ */
+void coroutine_fn block_job_pause_point(BlockJob *job);
+
+/**
+ * block_job_enter:
+ * @job: The job to enter.
+ *
+ * Continue the specified job by entering the coroutine.
+ */
+void block_job_enter(BlockJob *job);
+
+/**
+ * block_job_event_ready:
+ * @job: The job which is now ready to be completed.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_event_ready(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM.  Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
+                                        int is_read, int error);
+
+typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque);
+
+/**
+ * block_job_defer_to_main_loop:
+ * @job: The job
+ * @fn: The function to run in the main loop
+ * @opaque: The opaque value that is passed to @fn
+ *
+ * Execute a given function in the main loop with the BlockDriverState
+ * AioContext acquired.  Block jobs must call bdrv_unref(), bdrv_close(), and
+ * anything that uses bdrv_drain_all() in the main loop.
+ *
+ * The @job AioContext is held while @fn executes.
+ */
+void block_job_defer_to_main_loop(BlockJob *job,
+                                  BlockJobDeferToMainLoopFn *fn,
+                                  void *opaque);
+
+#endif
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index ee3388f90d..9dea14ba03 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -8,6 +8,9 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                           uint32_t granularity,
                                           const char *name,
                                           Error **errp);
+void bdrv_create_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+                                   int chunk_size);
+void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
                                        BdrvDirtyBitmap *bitmap,
                                        Error **errp);
@@ -27,8 +30,11 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
                    int64_t sector);
@@ -36,9 +42,34 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t cur_sector, int64_t nr_sectors);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int64_t nr_sectors);
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
-void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
+                               BdrvDirtyBitmap *bitmap, int64_t sector,
+                               int nb_sectors);
+void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
+                                  BdrvDirtyBitmap *bitmap, int64_t sector,
+                                  int nb_sectors);
+BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
+                                         uint64_t first_sector);
+void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
+int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 
+uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
+                                              uint64_t start, uint64_t count);
+uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
+                                      uint8_t *buf, uint64_t start,
+                                      uint64_t count);
+void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
+                                        uint8_t *buf, uint64_t start,
+                                        uint64_t count, bool finish);
+void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
+                                          uint64_t start, uint64_t count,
+                                          bool finish);
+void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
+
 #endif
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 1897557a9b..3e373f0498 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device
@@ -25,52 +26,89 @@
 #include "io/channel-socket.h"
 #include "crypto/tlscreds.h"
 
-/* Note: these are _NOT_ the same as the network representation of an NBD
+/* Handshake phase structs - this struct is passed on the wire */
+
+struct nbd_option {
+    uint64_t magic; /* NBD_OPTS_MAGIC */
+    uint32_t option; /* NBD_OPT_* */
+    uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_option nbd_option;
+
+struct nbd_opt_reply {
+    uint64_t magic; /* NBD_REP_MAGIC */
+    uint32_t option; /* NBD_OPT_* */
+    uint32_t type; /* NBD_REP_* */
+    uint32_t length;
+} QEMU_PACKED;
+typedef struct nbd_opt_reply nbd_opt_reply;
+
+/* Transmission phase structs
+ *
+ * Note: these are _NOT_ the same as the network representation of an NBD
  * request and reply!
  */
-struct nbd_request {
+struct NBDRequest {
     uint64_t handle;
     uint64_t from;
     uint32_t len;
-    uint32_t type;
+    uint16_t flags; /* NBD_CMD_FLAG_* */
+    uint16_t type; /* NBD_CMD_* */
 };
+typedef struct NBDRequest NBDRequest;
 
-struct nbd_reply {
+struct NBDReply {
     uint64_t handle;
     uint32_t error;
 };
+typedef struct NBDReply NBDReply;
 
+/* Transmission (export) flags: sent from server to client during handshake,
+   but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS      (1 << 0)        /* Flags are there */
 #define NBD_FLAG_READ_ONLY      (1 << 1)        /* Device is read-only */
 #define NBD_FLAG_SEND_FLUSH     (1 << 2)        /* Send FLUSH */
 #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit Access) */
 #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - rotational media */
 #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)     /* Send WRITE_ZEROES */
 
-/* New-style global flags. */
-#define NBD_FLAG_FIXED_NEWSTYLE     (1 << 0)    /* Fixed newstyle protocol. */
+/* New-style handshake (global) flags, sent from server to client, and
+   control what will happen during handshake phase. */
+#define NBD_FLAG_FIXED_NEWSTYLE   (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_NO_ZEROES        (1 << 1) /* End handshake without zeroes. */
 
-/* New-style client flags. */
-#define NBD_FLAG_C_FIXED_NEWSTYLE   (1 << 0)    /* Fixed newstyle protocol. */
+/* New-style client flags, sent from client to server to control what happens
+   during handshake phase. */
+#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
+#define NBD_FLAG_C_NO_ZEROES      (1 << 1) /* End handshake without zeroes. */
 
 /* Reply types. */
+#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
+
 #define NBD_REP_ACK             (1)             /* Data sending finished. */
 #define NBD_REP_SERVER          (2)             /* Export description. */
-#define NBD_REP_ERR_UNSUP       ((UINT32_C(1) << 31) | 1) /* Unknown option. */
-#define NBD_REP_ERR_POLICY      ((UINT32_C(1) << 31) | 2) /* Server denied */
-#define NBD_REP_ERR_INVALID     ((UINT32_C(1) << 31) | 3) /* Invalid length. */
-#define NBD_REP_ERR_TLS_REQD    ((UINT32_C(1) << 31) | 5) /* TLS required */
 
+#define NBD_REP_ERR_UNSUP       NBD_REP_ERR(1)  /* Unknown option */
+#define NBD_REP_ERR_POLICY      NBD_REP_ERR(2)  /* Server denied */
+#define NBD_REP_ERR_INVALID     NBD_REP_ERR(3)  /* Invalid length */
+#define NBD_REP_ERR_PLATFORM    NBD_REP_ERR(4)  /* Not compiled in */
+#define NBD_REP_ERR_TLS_REQD    NBD_REP_ERR(5)  /* TLS required */
+#define NBD_REP_ERR_SHUTDOWN    NBD_REP_ERR(7)  /* Server shutting down */
 
-#define NBD_CMD_MASK_COMMAND	0x0000ffff
-#define NBD_CMD_FLAG_FUA	(1 << 16)
+/* Request flags, sent from client to server during transmission phase */
+#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
 
+/* Supported request types */
 enum {
     NBD_CMD_READ = 0,
     NBD_CMD_WRITE = 1,
     NBD_CMD_DISC = 2,
     NBD_CMD_FLUSH = 3,
-    NBD_CMD_TRIM = 4
+    NBD_CMD_TRIM = 4,
+    /* 5 reserved for failed experiment NBD_CMD_CACHE */
+    NBD_CMD_WRITE_ZEROES = 6,
 };
 
 #define NBD_DEFAULT_PORT	10809
@@ -95,16 +133,17 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
                           QIOChannel **outioc,
                           off_t *size, Error **errp);
 int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
-ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
-ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
+ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
 
 typedef struct NBDExport NBDExport;
 typedef struct NBDClient NBDClient;
 
-NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
+NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
                           uint16_t nbdflags, void (*close)(NBDExport *),
+                          bool writethrough, BlockBackend *on_eject_blk,
                           Error **errp);
 void nbd_export_close(NBDExport *exp);
 void nbd_export_get(NBDExport *exp);
@@ -114,6 +153,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp);
 
 NBDExport *nbd_export_find(const char *name);
 void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_set_description(NBDExport *exp, const char *description);
 void nbd_export_close_all(void);
 
 void nbd_client_new(NBDExport *exp,
diff --git a/include/crypto/cipher.h b/include/crypto/cipher.h
index 376654dcdd..bec9f412b0 100644
--- a/include/crypto/cipher.h
+++ b/include/crypto/cipher.h
@@ -85,13 +85,15 @@ struct QCryptoCipher {
 /**
  * qcrypto_cipher_supports:
  * @alg: the cipher algorithm
+ * @mode: the cipher mode
  *
- * Determine if @alg cipher algorithm is supported by the
+ * Determine if @alg cipher algorithm in @mode is supported by the
  * current configured build
  *
  * Returns: true if the algorithm is supported, false otherwise
  */
-bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg);
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg,
+                             QCryptoCipherMode mode);
 
 /**
  * qcrypto_cipher_get_block_len:
@@ -213,16 +215,16 @@ int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
 /**
  * qcrypto_cipher_setiv:
  * @cipher: the cipher object
- * @iv: the initialization vector bytes
+ * @iv: the initialization vector or counter (CTR mode) bytes
  * @niv: the length of @iv
  * @errpr: pointer to a NULL-initialized error object
  *
  * If the @cipher object is setup to use a mode that requires
- * initialization vectors, this sets the initialization vector
+ * initialization vectors or counter, this sets the @niv
  * bytes. The @iv data should have the same length as the
  * cipher key used when originally constructing the cipher
  * object. It is an error to set an initialization vector
- * if the cipher mode does not require one.
+ * or counter if the cipher mode does not require one.
  *
  * Returns: 0 on success, -1 on error
  */
diff --git a/include/crypto/pbkdf.h b/include/crypto/pbkdf.h
index e9e4ceca83..ef209b3e03 100644
--- a/include/crypto/pbkdf.h
+++ b/include/crypto/pbkdf.h
@@ -122,7 +122,7 @@ bool qcrypto_pbkdf2_supports(QCryptoHashAlgorithm hash);
 int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
                    const uint8_t *key, size_t nkey,
                    const uint8_t *salt, size_t nsalt,
-                   unsigned int iterations,
+                   uint64_t iterations,
                    uint8_t *out, size_t nout,
                    Error **errp);
 
@@ -133,6 +133,7 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
  * @nkey: the length of @key in bytes
  * @salt: a random salt
  * @nsalt: length of @salt in bytes
+ * @nout: size of desired derived key
  * @errp: pointer to a NULL-initialized error object
  *
  * Time the PBKDF2 algorithm to determine how many
@@ -140,13 +141,16 @@ int qcrypto_pbkdf2(QCryptoHashAlgorithm hash,
  * key from a user password provided in @key in 1
  * second of compute time. The result of this can
  * be used as a the @iterations parameter of a later
- * call to qcrypto_pbkdf2().
+ * call to qcrypto_pbkdf2(). The value of @nout should
+ * match that value that will later be provided with
+ * a call to qcrypto_pbkdf2().
  *
  * Returns: number of iterations in 1 second, -1 on error
  */
-int qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
-                               const uint8_t *key, size_t nkey,
-                               const uint8_t *salt, size_t nsalt,
-                               Error **errp);
+uint64_t qcrypto_pbkdf2_count_iters(QCryptoHashAlgorithm hash,
+                                    const uint8_t *key, size_t nkey,
+                                    const uint8_t *salt, size_t nsalt,
+                                    size_t nout,
+                                    Error **errp);
 
 #endif /* QCRYPTO_PBKDF_H */
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index b6a705982f..e9004e5798 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -31,6 +31,7 @@
 #define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
 #define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
 #define EXCP_YIELD      0x10004 /* cpu wants to yield timeslice to another */
+#define EXCP_ATOMIC     0x10005 /* stop-the-world and emulate atomic */
 
 /* some important defines:
  *
@@ -189,6 +190,15 @@ void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
 
 /* page related stuff */
 
+#ifdef TARGET_PAGE_BITS_VARY
+extern bool target_page_bits_decided;
+extern int target_page_bits;
+#define TARGET_PAGE_BITS ({ assert(target_page_bits_decided); \
+                            target_page_bits; })
+#else
+#define TARGET_PAGE_BITS_MIN TARGET_PAGE_BITS
+#endif
+
 #define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
 #define TARGET_PAGE_MASK ~(TARGET_PAGE_SIZE - 1)
 #define TARGET_PAGE_ALIGN(addr) (((addr) + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK)
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 952bcfeb4c..cffdc130e6 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -23,6 +23,11 @@ typedef struct CPUListState {
     FILE *file;
 } CPUListState;
 
+/* The CPU list lock nests outside tb_lock/tb_unlock.  */
+void qemu_init_cpu_list(void);
+void cpu_list_lock(void);
+void cpu_list_unlock(void);
+
 #if !defined(CONFIG_USER_ONLY)
 
 enum device_endian {
@@ -58,6 +63,7 @@ RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
 void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
 void qemu_ram_unset_idstr(RAMBlock *block);
 const char *qemu_ram_get_idstr(RAMBlock *rb);
+size_t qemu_ram_pagesize(RAMBlock *block);
 
 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
                             int len, int is_write);
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index b573df53b0..6eb5fe80dc 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -401,7 +401,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
                                       int access_type, int mmu_idx)
 {
 #if defined(CONFIG_USER_ONLY)
-    return g2h(vaddr);
+    return g2h(addr);
 #else
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d008296c1b..a8c13cee66 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -56,21 +56,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
                               target_ulong pc, target_ulong cs_base,
                               uint32_t flags,
                               int cflags);
-#if defined(CONFIG_USER_ONLY)
-void cpu_list_lock(void);
-void cpu_list_unlock(void);
-#else
-static inline void cpu_list_unlock(void)
-{
-}
-static inline void cpu_list_lock(void)
-{
-}
-#endif
 
-void cpu_exec_init(CPUState *cpu, Error **errp);
 void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
 void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
+void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
 
 #if !defined(CONFIG_USER_ONLY)
 void cpu_reloading_memory_map(void);
@@ -225,6 +214,8 @@ struct TranslationBlock {
 #define CF_USE_ICOUNT  0x20000
 #define CF_IGNORE_ICOUNT 0x40000 /* Do not generate icount code */
 
+    uint16_t invalid;
+
     void *tc_ptr;    /* pointer to the translated code */
     uint8_t *tc_search;  /* pointer to search data */
     /* original tb when cflags has CF_NOCACHE */
@@ -325,6 +316,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb,
 
 #endif
 
+/* Called with tb_lock held.  */
 static inline void tb_add_jump(TranslationBlock *tb, int n,
                                TranslationBlock *tb_next)
 {
@@ -347,13 +339,12 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
     tb_next->jmp_list_first = (uintptr_t)tb | n;
 }
 
-/* GETRA is the true target of the return instruction that we'll execute,
-   defined here for simplicity of defining the follow-up macros.  */
+/* GETPC is the true target of the return instruction that we'll execute.  */
 #if defined(CONFIG_TCG_INTERPRETER)
 extern uintptr_t tci_tb_ptr;
-# define GETRA() tci_tb_ptr
+# define GETPC() tci_tb_ptr
 #else
-# define GETRA() \
+# define GETPC() \
     ((uintptr_t)__builtin_extract_return_addr(__builtin_return_address(0)))
 #endif
 
@@ -366,8 +357,6 @@ extern uintptr_t tci_tb_ptr;
    smaller than 4 bytes, so we don't worry about special-casing this.  */
 #define GETPC_ADJ   2
 
-#define GETPC()  (GETRA() - GETPC_ADJ)
-
 #if !defined(CONFIG_USER_ONLY)
 
 struct MemoryRegion *iotlb_to_region(CPUState *cpu,
@@ -381,6 +370,7 @@ void tlb_fill(CPUState *cpu, target_ulong addr, MMUAccessType access_type,
 #if defined(CONFIG_USER_ONLY)
 void mmap_lock(void);
 void mmap_unlock(void);
+bool have_mmap_lock(void);
 
 static inline tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
 {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 3e4d4164cd..9728a2fb1a 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -67,6 +67,27 @@ struct IOMMUTLBEntry {
     IOMMUAccessFlags perm;
 };
 
+/*
+ * Bitmap for different IOMMUNotifier capabilities. Each notifier can
+ * register with one or multiple IOMMU Notifier capability bit(s).
+ */
+typedef enum {
+    IOMMU_NOTIFIER_NONE = 0,
+    /* Notify cache invalidations */
+    IOMMU_NOTIFIER_UNMAP = 0x1,
+    /* Notify entry changes (newly created entries) */
+    IOMMU_NOTIFIER_MAP = 0x2,
+} IOMMUNotifierFlag;
+
+#define IOMMU_NOTIFIER_ALL (IOMMU_NOTIFIER_MAP | IOMMU_NOTIFIER_UNMAP)
+
+struct IOMMUNotifier {
+    void (*notify)(struct IOMMUNotifier *notifier, IOMMUTLBEntry *data);
+    IOMMUNotifierFlag notifier_flags;
+    QLIST_ENTRY(IOMMUNotifier) node;
+};
+typedef struct IOMMUNotifier IOMMUNotifier;
+
 /* New-style MMIO accessors can indicate that the transaction failed.
  * A zero (MEMTX_OK) response means success; anything else is a failure
  * of some kind. The memory subsystem will bitwise-OR together results
@@ -153,10 +174,10 @@ struct MemoryRegionIOMMUOps {
     IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
     /* Returns minimum supported page size */
     uint64_t (*get_min_page_size)(MemoryRegion *iommu);
-    /* Called when the first notifier is set */
-    void (*notify_started)(MemoryRegion *iommu);
-    /* Called when the last notifier is removed */
-    void (*notify_stopped)(MemoryRegion *iommu);
+    /* Called when IOMMU Notifier flag changed */
+    void (*notify_flag_changed)(MemoryRegion *iommu,
+                                IOMMUNotifierFlag old_flags,
+                                IOMMUNotifierFlag new_flags);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -188,7 +209,7 @@ struct MemoryRegion {
     void (*destructor)(MemoryRegion *mr);
     uint64_t align;
     bool terminates;
-    bool skip_dump;
+    bool ram_device;
     bool enabled;
     bool warning_printed; /* For reservations */
     uint8_t vga_logging_count;
@@ -201,7 +222,8 @@ struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
-    NotifierList iommu_notify;
+    QLIST_HEAD(, IOMMUNotifier) iommu_notify;
+    IOMMUNotifierFlag iommu_notify_flags;
 };
 
 /**
@@ -233,8 +255,9 @@ struct MemoryListener {
                                hwaddr addr, hwaddr len);
     /* Lower = earlier (during add), later (during del) */
     unsigned priority;
-    AddressSpace *address_space_filter;
+    AddressSpace *address_space;
     QTAILQ_ENTRY(MemoryListener) link;
+    QTAILQ_ENTRY(MemoryListener) link_as;
 };
 
 /**
@@ -256,7 +279,7 @@ struct AddressSpace {
     struct AddressSpaceDispatch *dispatch;
     struct AddressSpaceDispatch *next_dispatch;
     MemoryListener dispatch_listener;
-
+    QTAILQ_HEAD(memory_listeners_as, MemoryListener) listeners;
     QTAILQ_ENTRY(AddressSpace) address_spaces_link;
 };
 
@@ -425,6 +448,30 @@ void memory_region_init_ram_ptr(MemoryRegion *mr,
                                 uint64_t size,
                                 void *ptr);
 
+/**
+ * memory_region_init_ram_device_ptr:  Initialize RAM device memory region from
+ *                                     a user-provided pointer.
+ *
+ * A RAM device represents a mapping to a physical device, such as to a PCI
+ * MMIO BAR of an vfio-pci assigned device.  The memory region may be mapped
+ * into the VM address space and access to the region will modify memory
+ * directly.  However, the memory region should not be included in a memory
+ * dump (device may not be enabled/mapped at the time of the dump), and
+ * operations incompatible with manipulating MMIO should be avoided.  Replaces
+ * skip_dump flag.
+ *
+ * @mr: the #MemoryRegion to be initialized.
+ * @owner: the object that tracks the region's reference count
+ * @name: the name of the region.
+ * @size: size of the region.
+ * @ptr: memory to be mapped; must contain at least @size bytes.
+ */
+void memory_region_init_ram_device_ptr(MemoryRegion *mr,
+                                       struct Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       void *ptr);
+
 /**
  * memory_region_init_alias: Initialize a memory region that aliases all or a
  *                           part of another memory region.
@@ -551,22 +598,13 @@ static inline bool memory_region_is_ram(MemoryRegion *mr)
 }
 
 /**
- * memory_region_is_skip_dump: check whether a memory region should not be
- *                             dumped
- *
- * Returns %true is a memory region should not be dumped(e.g. VFIO BAR MMAP).
+ * memory_region_is_ram_device: check whether a memory region is a ram device
  *
- * @mr: the memory region being queried
- */
-bool memory_region_is_skip_dump(MemoryRegion *mr);
-
-/**
- * memory_region_set_skip_dump: Set skip_dump flag, dump will ignore this memory
- *                              region
+ * Returns %true is a memory region is a device backed ram region
  *
  * @mr: the memory region being queried
  */
-void memory_region_set_skip_dump(MemoryRegion *mr);
+bool memory_region_is_ram_device(MemoryRegion *mr);
 
 /**
  * memory_region_is_romd: check whether a memory region is in ROMD mode
@@ -607,6 +645,15 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr);
 /**
  * memory_region_notify_iommu: notify a change in an IOMMU translation entry.
  *
+ * The notification type will be decided by entry.perm bits:
+ *
+ * - For UNMAP (cache invalidation) notifies: set entry.perm to IOMMU_NONE.
+ * - For MAP (newly added entry) notifies: set entry.perm to the
+ *   permission of the page (which is definitely !IOMMU_NONE).
+ *
+ * Note: for any IOMMU implementation, an in-place mapping change
+ * should be notified with an UNMAP followed by a MAP.
+ *
  * @mr: the memory region that was changed
  * @entry: the new entry in the IOMMU translation table.  The entry
  *         replaces all old entries for the same virtual I/O address range.
@@ -620,11 +667,12 @@ void memory_region_notify_iommu(MemoryRegion *mr,
  * IOMMU translation entries.
  *
  * @mr: the memory region to observe
- * @n: the notifier to be added; the notifier receives a pointer to an
- *     #IOMMUTLBEntry as the opaque value; the pointer ceases to be
- *     valid on exit from the notifier.
+ * @n: the IOMMUNotifier to be added; the notify callback receives a
+ *     pointer to an #IOMMUTLBEntry as the opaque value; the pointer
+ *     ceases to be valid on exit from the notifier.
  */
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
+void memory_region_register_iommu_notifier(MemoryRegion *mr,
+                                           IOMMUNotifier *n);
 
 /**
  * memory_region_iommu_replay: replay existing IOMMU translations to
@@ -636,7 +684,8 @@ void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
  * @is_write: Whether to treat the replay as a translate "write"
  *     through the iommu
  */
-void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write);
+void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+                                bool is_write);
 
 /**
  * memory_region_unregister_iommu_notifier: unregister a notifier for
@@ -646,7 +695,8 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write);
  *      needs to be called
  * @n: the notifier to be removed.
  */
-void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n);
+void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
+                                             IOMMUNotifier *n);
 
 /**
  * memory_region_name: get a memory region's name
@@ -1154,12 +1204,11 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size);
 
 /**
- * address_space_sync_dirty_bitmap: synchronize the dirty log for all memory
+ * memory_global_dirty_log_sync: synchronize the dirty log for all memory
  *
- * Synchronizes the dirty page log for an entire address space.
- * @as: the address space that contains the memory being synchronized
+ * Synchronizes the dirty page log for all address spaces.
  */
-void address_space_sync_dirty_bitmap(AddressSpace *as);
+void memory_global_dirty_log_sync(void);
 
 /**
  * memory_region_transaction_begin: Start a transaction.
@@ -1431,9 +1480,11 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr);
 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 {
     if (is_write) {
-        return memory_region_is_ram(mr) && !mr->readonly;
+        return memory_region_is_ram(mr) &&
+               !mr->readonly && !memory_region_is_ram_device(mr);
     } else {
-        return memory_region_is_ram(mr) || memory_region_is_romd(mr);
+        return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
+               memory_region_is_romd(mr);
     }
 }
 
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 2a9465da11..54d7108a9e 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -36,6 +36,7 @@ struct RAMBlock {
     /* RCU-enabled, writes protected by the ramlist lock */
     QLIST_ENTRY(RAMBlock) next;
     int fd;
+    size_t page_size;
 };
 
 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index dce95d92d6..c7f17f26e0 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -38,7 +38,7 @@ struct TBContext {
     QemuMutex tb_lock;
 
     /* statistics */
-    int tb_flush_count;
+    unsigned tb_flush_count;
     int tb_phys_invalidate_count;
 };
 
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 1bde349b74..14f8383686 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -658,6 +658,21 @@ static inline int floatx80_is_any_nan(floatx80 a)
     return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
 }
 
+/*----------------------------------------------------------------------------
+| Return whether the given value is an invalid floatx80 encoding.
+| Invalid floatx80 encodings arise when the integer bit is not set, but
+| the exponent is not zero. The only times the integer bit is permitted to
+| be zero is in subnormal numbers and the value zero.
+| This includes what the Intel software developer's manual calls pseudo-NaNs,
+| pseudo-infinities and un-normal numbers. It does not include
+| pseudo-denormals, which must still be correctly handled as inputs even
+| if they are never generated as outputs.
+*----------------------------------------------------------------------------*/
+static inline bool floatx80_invalid_encoding(floatx80 a)
+{
+    return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0;
+}
+
 #define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
 #define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
 #define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
diff --git a/include/glib-compat.h b/include/glib-compat.h
index 8d5a7f3801..acf254d2a0 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -280,4 +280,52 @@ static inline void g_hash_table_add(GHashTable *hash_table, gpointer key)
     } while (0)
 #endif
 
+#if !GLIB_CHECK_VERSION(2, 28, 0)
+static inline void g_list_free_full(GList *list, GDestroyNotify free_func)
+{
+    GList *l;
+
+    for (l = list; l; l = l->next) {
+        free_func(l->data);
+    }
+
+    g_list_free(list);
+}
+
+static inline void g_slist_free_full(GSList *list, GDestroyNotify free_func)
+{
+    GSList *l;
+
+    for (l = list; l; l = l->next) {
+        free_func(l->data);
+    }
+
+    g_slist_free(list);
+}
+#endif
+
+#if !GLIB_CHECK_VERSION(2, 26, 0)
+static inline void g_source_set_name(GSource *source, const char *name)
+{
+    /* This is just a debugging aid, so leaving it a no-op */
+}
+static inline void g_source_set_name_by_id(guint tag, const char *name)
+{
+    /* This is just a debugging aid, so leaving it a no-op */
+}
+#endif
+
+#if !GLIB_CHECK_VERSION(2, 36, 0)
+/* Always fail.  This will not include error_report output in the test log,
+ * sending it instead to stderr.
+ */
+#define g_test_initialized() (0)
+#endif
+#if !GLIB_CHECK_VERSION(2, 38, 0)
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+#error schizophrenic detection of glib subprocess testing
+#endif
+#define g_test_subprocess() (0)
+#endif
+
 #endif
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index 41c1d95c4c..154f3b82f6 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -294,7 +294,8 @@ typedef struct AcpiMultipleApicTable AcpiMultipleApicTable;
 #define ACPI_APIC_GENERIC_DISTRIBUTOR   12
 #define ACPI_APIC_GENERIC_MSI_FRAME     13
 #define ACPI_APIC_GENERIC_REDISTRIBUTOR 14
-#define ACPI_APIC_RESERVED              15   /* 15 and greater are reserved */
+#define ACPI_APIC_GENERIC_TRANSLATOR    15
+#define ACPI_APIC_RESERVED              16   /* 16 and greater are reserved */
 
 /*
  * MADT sub-structures (Follow MULTIPLE_APIC_DESCRIPTION_TABLE)
@@ -342,6 +343,24 @@ struct AcpiMadtLocalNmi {
 } QEMU_PACKED;
 typedef struct AcpiMadtLocalNmi AcpiMadtLocalNmi;
 
+struct AcpiMadtProcessorX2Apic {
+    ACPI_SUB_HEADER_DEF
+    uint16_t reserved;
+    uint32_t x2apic_id;              /* Processor's local x2APIC ID */
+    uint32_t flags;
+    uint32_t uid;                    /* Processor object _UID */
+} QEMU_PACKED;
+typedef struct AcpiMadtProcessorX2Apic AcpiMadtProcessorX2Apic;
+
+struct AcpiMadtLocalX2ApicNmi {
+    ACPI_SUB_HEADER_DEF
+    uint16_t flags;                  /* MPS INTI flags */
+    uint32_t uid;                    /* Processor object _UID */
+    uint8_t  lint;                   /* Local APIC LINT# */
+    uint8_t  reserved[3];            /* Local APIC LINT# */
+} QEMU_PACKED;
+typedef struct AcpiMadtLocalX2ApicNmi AcpiMadtLocalX2ApicNmi;
+
 struct AcpiMadtGenericInterrupt {
     ACPI_SUB_HEADER_DEF
     uint16_t reserved;
@@ -395,6 +414,16 @@ struct AcpiMadtGenericRedistributor {
 
 typedef struct AcpiMadtGenericRedistributor AcpiMadtGenericRedistributor;
 
+struct AcpiMadtGenericTranslator {
+    ACPI_SUB_HEADER_DEF
+    uint16_t reserved;
+    uint32_t translation_id;
+    uint64_t base_address;
+    uint32_t reserved2;
+} QEMU_PACKED;
+
+typedef struct AcpiMadtGenericTranslator AcpiMadtGenericTranslator;
+
 /*
  * Generic Timer Description Table (GTDT)
  */
@@ -474,6 +503,17 @@ struct AcpiSratProcessorAffinity
 } QEMU_PACKED;
 typedef struct AcpiSratProcessorAffinity AcpiSratProcessorAffinity;
 
+struct AcpiSratProcessorX2ApicAffinity {
+    ACPI_SUB_HEADER_DEF
+    uint16_t    reserved;
+    uint32_t    proximity_domain;
+    uint32_t    x2apic_id;
+    uint32_t    flags;
+    uint32_t    clk_domain;
+    uint32_t    reserved2;
+} QEMU_PACKED;
+typedef struct AcpiSratProcessorX2ApicAffinity AcpiSratProcessorX2ApicAffinity;
+
 struct AcpiSratMemoryAffinity
 {
     ACPI_SUB_HEADER_DEF
@@ -579,7 +619,10 @@ struct AcpiDmarDeviceScope {
     uint16_t reserved;
     uint8_t enumeration_id;
     uint8_t bus;
-    uint16_t path[0];           /* list of dev:func pairs */
+    struct {
+        uint8_t device;
+        uint8_t function;
+    } path[0];
 } QEMU_PACKED;
 typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
 
@@ -598,4 +641,72 @@ typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
 /* Masks for Flags field above */
 #define ACPI_DMAR_INCLUDE_PCI_ALL   1
 
+/*
+ * Input Output Remapping Table (IORT)
+ * Conforms to "IO Remapping Table System Software on ARM Platforms",
+ * Document number: ARM DEN 0049B, October 2015
+ */
+
+struct AcpiIortTable {
+    ACPI_TABLE_HEADER_DEF     /* ACPI common table header */
+    uint32_t node_count;
+    uint32_t node_offset;
+    uint32_t reserved;
+} QEMU_PACKED;
+typedef struct AcpiIortTable AcpiIortTable;
+
+/*
+ * IORT node types
+ */
+
+#define ACPI_IORT_NODE_HEADER_DEF   /* Node format common fields */ \
+    uint8_t  type;          \
+    uint16_t length;        \
+    uint8_t  revision;      \
+    uint32_t reserved;      \
+    uint32_t mapping_count; \
+    uint32_t mapping_offset;
+
+/* Values for node Type above */
+enum {
+        ACPI_IORT_NODE_ITS_GROUP = 0x00,
+        ACPI_IORT_NODE_NAMED_COMPONENT = 0x01,
+        ACPI_IORT_NODE_PCI_ROOT_COMPLEX = 0x02,
+        ACPI_IORT_NODE_SMMU = 0x03,
+        ACPI_IORT_NODE_SMMU_V3 = 0x04
+};
+
+struct AcpiIortIdMapping {
+    uint32_t input_base;
+    uint32_t id_count;
+    uint32_t output_base;
+    uint32_t output_reference;
+    uint32_t flags;
+} QEMU_PACKED;
+typedef struct AcpiIortIdMapping AcpiIortIdMapping;
+
+struct AcpiIortMemoryAccess {
+    uint32_t cache_coherency;
+    uint8_t  hints;
+    uint16_t reserved;
+    uint8_t  memory_flags;
+} QEMU_PACKED;
+typedef struct AcpiIortMemoryAccess AcpiIortMemoryAccess;
+
+struct AcpiIortItsGroup {
+    ACPI_IORT_NODE_HEADER_DEF
+    uint32_t its_count;
+    uint32_t identifiers[0];
+} QEMU_PACKED;
+typedef struct AcpiIortItsGroup AcpiIortItsGroup;
+
+struct AcpiIortRC {
+    ACPI_IORT_NODE_HEADER_DEF
+    AcpiIortMemoryAccess memory_properties;
+    uint32_t ats_attribute;
+    uint32_t pci_segment_number;
+    AcpiIortIdMapping id_mapping_array[0];
+} QEMU_PACKED;
+typedef struct AcpiIortRC AcpiIortRC;
+
 #endif
diff --git a/include/hw/acpi/acpi_dev_interface.h b/include/hw/acpi/acpi_dev_interface.h
index da4ef7fbd3..901a4ae876 100644
--- a/include/hw/acpi/acpi_dev_interface.h
+++ b/include/hw/acpi/acpi_dev_interface.h
@@ -10,6 +10,7 @@ typedef enum {
     ACPI_PCI_HOTPLUG_STATUS = 2,
     ACPI_CPU_HOTPLUG_STATUS = 4,
     ACPI_MEMORY_HOTPLUG_STATUS = 8,
+    ACPI_NVDIMM_HOTPLUG_STATUS = 16,
 } AcpiEventStatusBits;
 
 #define TYPE_ACPI_DEVICE_IF "acpi-device-interface"
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index e5f087803f..559326cbd5 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -367,6 +367,7 @@ Aml *aml_sizeof(Aml *arg);
 Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 Aml *aml_object_type(Aml *object);
 
+void build_append_int_noprefix(GArray *table, uint64_t value, int size);
 void
 build_header(BIOSLinker *linker, GArray *table_data,
              AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h
new file mode 100644
index 0000000000..a72f734eb1
--- /dev/null
+++ b/include/hw/adc/stm32f2xx_adc.h
@@ -0,0 +1,87 @@
+/*
+ * STM32F2XX ADC
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_ADC_H
+#define HW_STM32F2XX_ADC_H
+
+#define ADC_SR    0x00
+#define ADC_CR1   0x04
+#define ADC_CR2   0x08
+#define ADC_SMPR1 0x0C
+#define ADC_SMPR2 0x10
+#define ADC_JOFR1 0x14
+#define ADC_JOFR2 0x18
+#define ADC_JOFR3 0x1C
+#define ADC_JOFR4 0x20
+#define ADC_HTR   0x24
+#define ADC_LTR   0x28
+#define ADC_SQR1  0x2C
+#define ADC_SQR2  0x30
+#define ADC_SQR3  0x34
+#define ADC_JSQR  0x38
+#define ADC_JDR1  0x3C
+#define ADC_JDR2  0x40
+#define ADC_JDR3  0x44
+#define ADC_JDR4  0x48
+#define ADC_DR    0x4C
+
+#define ADC_CR2_ADON    0x01
+#define ADC_CR2_CONT    0x02
+#define ADC_CR2_ALIGN   0x800
+#define ADC_CR2_SWSTART 0x40000000
+
+#define ADC_CR1_RES 0x3000000
+
+#define ADC_COMMON_ADDRESS 0x100
+
+#define TYPE_STM32F2XX_ADC "stm32f2xx-adc"
+#define STM32F2XX_ADC(obj) \
+    OBJECT_CHECK(STM32F2XXADCState, (obj), TYPE_STM32F2XX_ADC)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t adc_sr;
+    uint32_t adc_cr1;
+    uint32_t adc_cr2;
+    uint32_t adc_smpr1;
+    uint32_t adc_smpr2;
+    uint32_t adc_jofr[4];
+    uint32_t adc_htr;
+    uint32_t adc_ltr;
+    uint32_t adc_sqr1;
+    uint32_t adc_sqr2;
+    uint32_t adc_sqr3;
+    uint32_t adc_jsqr;
+    uint32_t adc_jdr[4];
+    uint32_t adc_dr;
+
+    qemu_irq irq;
+} STM32F2XXADCState;
+
+#endif /* HW_STM32F2XX_ADC_H */
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
new file mode 100644
index 0000000000..5406b498d7
--- /dev/null
+++ b/include/hw/arm/aspeed_soc.h
@@ -0,0 +1,65 @@
+/*
+ * ASPEED SoC family
+ *
+ * Andrew Jeffery <andrew@aj.id.au>
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef ASPEED_SOC_H
+#define ASPEED_SOC_H
+
+#include "hw/arm/arm.h"
+#include "hw/intc/aspeed_vic.h"
+#include "hw/misc/aspeed_scu.h"
+#include "hw/misc/aspeed_sdmc.h"
+#include "hw/timer/aspeed_timer.h"
+#include "hw/i2c/aspeed_i2c.h"
+#include "hw/ssi/aspeed_smc.h"
+
+#define ASPEED_SPIS_NUM  2
+
+typedef struct AspeedSoCState {
+    /*< private >*/
+    DeviceState parent;
+
+    /*< public >*/
+    ARMCPU *cpu;
+    MemoryRegion iomem;
+    AspeedVICState vic;
+    AspeedTimerCtrlState timerctrl;
+    AspeedI2CState i2c;
+    AspeedSCUState scu;
+    AspeedSMCState fmc;
+    AspeedSMCState spi[ASPEED_SPIS_NUM];
+    AspeedSDMCState sdmc;
+} AspeedSoCState;
+
+#define TYPE_ASPEED_SOC "aspeed-soc"
+#define ASPEED_SOC(obj) OBJECT_CHECK(AspeedSoCState, (obj), TYPE_ASPEED_SOC)
+
+typedef struct AspeedSoCInfo {
+    const char *name;
+    const char *cpu_model;
+    uint32_t silicon_rev;
+    hwaddr sdram_base;
+    int spis_num;
+    const hwaddr *spi_bases;
+    const char *fmc_typename;
+    const char **spi_typename;
+} AspeedSoCInfo;
+
+typedef struct AspeedSoCClass {
+    DeviceClass parent_class;
+    AspeedSoCInfo *info;
+} AspeedSoCClass;
+
+#define ASPEED_SOC_CLASS(klass)                                         \
+    OBJECT_CLASS_CHECK(AspeedSoCClass, (klass), TYPE_ASPEED_SOC)
+#define ASPEED_SOC_GET_CLASS(obj)                               \
+    OBJECT_GET_CLASS(AspeedSoCClass, (obj), TYPE_ASPEED_SOC)
+
+#endif /* ASPEED_SOC_H */
diff --git a/include/hw/arm/ast2400.h b/include/hw/arm/ast2400.h
deleted file mode 100644
index 7833bc716c..0000000000
--- a/include/hw/arm/ast2400.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * ASPEED AST2400 SoC
- *
- * Andrew Jeffery <andrew@aj.id.au>
- *
- * Copyright 2016 IBM Corp.
- *
- * This code is licensed under the GPL version 2 or later.  See
- * the COPYING file in the top-level directory.
- */
-
-#ifndef AST2400_H
-#define AST2400_H
-
-#include "hw/arm/arm.h"
-#include "hw/intc/aspeed_vic.h"
-#include "hw/misc/aspeed_scu.h"
-#include "hw/timer/aspeed_timer.h"
-#include "hw/i2c/aspeed_i2c.h"
-#include "hw/ssi/aspeed_smc.h"
-
-typedef struct AST2400State {
-    /*< private >*/
-    DeviceState parent;
-
-    /*< public >*/
-    ARMCPU *cpu;
-    MemoryRegion iomem;
-    AspeedVICState vic;
-    AspeedTimerCtrlState timerctrl;
-    AspeedI2CState i2c;
-    AspeedSCUState scu;
-    AspeedSMCState smc;
-    AspeedSMCState spi;
-} AST2400State;
-
-#define TYPE_AST2400 "ast2400"
-#define AST2400(obj) OBJECT_CHECK(AST2400State, (obj), TYPE_AST2400)
-
-#define AST2400_SDRAM_BASE       0x40000000
-
-#endif /* AST2400_H */
diff --git a/include/hw/arm/pxa.h b/include/hw/arm/pxa.h
index dd1a48b0c1..191e068184 100644
--- a/include/hw/arm/pxa.h
+++ b/include/hw/arm/pxa.h
@@ -83,7 +83,6 @@ typedef struct PXA2xxLCDState PXA2xxLCDState;
 PXA2xxLCDState *pxa2xx_lcdc_init(MemoryRegion *sysmem,
                 hwaddr base, qemu_irq irq);
 void pxa2xx_lcd_vsync_notifier(PXA2xxLCDState *s, qemu_irq handler);
-void pxa2xx_lcdc_oritentation(void *opaque, int angle);
 
 /* pxa2xx_mmci.c */
 typedef struct PXA2xxMMCIState PXA2xxMMCIState;
diff --git a/include/hw/arm/stm32f205_soc.h b/include/hw/arm/stm32f205_soc.h
index 779b5da2dc..133214195b 100644
--- a/include/hw/arm/stm32f205_soc.h
+++ b/include/hw/arm/stm32f205_soc.h
@@ -28,6 +28,9 @@
 #include "hw/misc/stm32f2xx_syscfg.h"
 #include "hw/timer/stm32f2xx_timer.h"
 #include "hw/char/stm32f2xx_usart.h"
+#include "hw/adc/stm32f2xx_adc.h"
+#include "hw/or-irq.h"
+#include "hw/ssi/stm32f2xx_spi.h"
 
 #define TYPE_STM32F205_SOC "stm32f205-soc"
 #define STM32F205_SOC(obj) \
@@ -35,6 +38,8 @@
 
 #define STM_NUM_USARTS 6
 #define STM_NUM_TIMERS 4
+#define STM_NUM_ADCS 3
+#define STM_NUM_SPIS 3
 
 #define FLASH_BASE_ADDRESS 0x08000000
 #define FLASH_SIZE (1024 * 1024)
@@ -52,6 +57,10 @@ typedef struct STM32F205State {
     STM32F2XXSyscfgState syscfg;
     STM32F2XXUsartState usart[STM_NUM_USARTS];
     STM32F2XXTimerState timer[STM_NUM_TIMERS];
+    STM32F2XXADCState adc[STM_NUM_ADCS];
+    STM32F2XXSPIState spi[STM_NUM_SPIS];
+
+    qemu_or_irq *adc_irqs;
 } STM32F205State;
 
 #endif
diff --git a/include/hw/arm/virt-acpi-build.h b/include/hw/arm/virt-acpi-build.h
index e43330ad65..f5ec749b8f 100644
--- a/include/hw/arm/virt-acpi-build.h
+++ b/include/hw/arm/virt-acpi-build.h
@@ -33,6 +33,7 @@ typedef struct VirtGuestInfo {
     const int *irqmap;
     bool use_highmem;
     int gic_version;
+    bool no_its;
 } VirtGuestInfo;
 
 
diff --git a/include/hw/boards.h b/include/hw/boards.h
index c52cc44873..c9696bd168 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -86,6 +86,12 @@ typedef struct {
  *    Returns a @HotpluggableCPUList, which describes CPUs objects which
  *    could be added with -device/device_add.
  *    Caller is responsible for freeing returned list.
+ * @minimum_page_bits:
+ *    If non-zero, the board promises never to create a CPU with a page size
+ *    smaller than this, so QEMU can use a more efficient larger page
+ *    size than the target architecture's minimum. (Attempting to create
+ *    such a CPU will fail.) Note that changing this is a migration
+ *    compatibility break for the machine.
  */
 struct MachineClass {
     /*< private >*/
@@ -93,7 +99,7 @@ struct MachineClass {
     /*< public >*/
 
     const char *family; /* NULL iff @name identifies a standalone machtype */
-    const char *name;
+    char *name;
     const char *alias;
     const char *desc;
 
@@ -124,6 +130,7 @@ struct MachineClass {
     ram_addr_t default_ram_size;
     bool option_rom_has_mr;
     bool rom_file_has_mr;
+    int minimum_page_bits;
 
     HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                            DeviceState *dev);
diff --git a/include/hw/bt.h b/include/hw/bt.h
index 185e79df2b..2fa22bdab6 100644
--- a/include/hw/bt.h
+++ b/include/hw/bt.h
@@ -128,7 +128,7 @@ enum {
     __csrhci_pins,
 };
 qemu_irq *csrhci_pins_get(CharDriverState *chr);
-CharDriverState *uart_hci_init(qemu_irq wakeup);
+CharDriverState *uart_hci_init(void);
 
 /* bt-l2cap.c */
 struct bt_l2cap_device_s;
@@ -174,8 +174,6 @@ enum bt_l2cap_psm_predef {
 void bt_l2cap_sdp_init(struct bt_l2cap_device_s *dev);
 
 /* bt-hid.c */
-struct bt_device_s *bt_mouse_init(struct bt_scatternet_s *net);
-struct bt_device_s *bt_tablet_init(struct bt_scatternet_s *net);
 struct bt_device_s *bt_keyboard_init(struct bt_scatternet_s *net);
 
 /* Link Management Protocol layer defines */
diff --git a/include/hw/char/bcm2835_aux.h b/include/hw/char/bcm2835_aux.h
index 42f0ee7a92..6865f154bc 100644
--- a/include/hw/char/bcm2835_aux.h
+++ b/include/hw/char/bcm2835_aux.h
@@ -22,7 +22,7 @@ typedef struct {
     /*< public >*/
 
     MemoryRegion iomem;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 
     uint8_t read_fifo[BCM2835_AUX_RX_FIFO_LEN];
diff --git a/include/hw/char/cadence_uart.h b/include/hw/char/cadence_uart.h
index a12773c076..ca75eb5e32 100644
--- a/include/hw/char/cadence_uart.h
+++ b/include/hw/char/cadence_uart.h
@@ -44,7 +44,7 @@ typedef struct {
     uint32_t rx_count;
     uint32_t tx_count;
     uint64_t char_tx_time;
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
     QEMUTimer *fifo_trigger_handle;
 } CadenceUARTState;
diff --git a/include/hw/char/digic-uart.h b/include/hw/char/digic-uart.h
index 7b3f145372..340c8e1111 100644
--- a/include/hw/char/digic-uart.h
+++ b/include/hw/char/digic-uart.h
@@ -19,6 +19,7 @@
 #define HW_CHAR_DIGIC_UART_H
 
 #include "hw/sysbus.h"
+#include "sysemu/char.h"
 
 #define TYPE_DIGIC_UART "digic-uart"
 #define DIGIC_UART(obj) \
@@ -37,7 +38,7 @@ typedef struct DigicUartState {
     /*< public >*/
 
     MemoryRegion regs_region;
-    CharDriverState *chr;
+    CharBackend chr;
 
     uint32_t reg_rx;
     uint32_t reg_st;
diff --git a/include/hw/char/imx_serial.h b/include/hw/char/imx_serial.h
index 6cd75c0ba7..4cc3fbc395 100644
--- a/include/hw/char/imx_serial.h
+++ b/include/hw/char/imx_serial.h
@@ -19,6 +19,7 @@
 #define IMX_SERIAL_H
 
 #include "hw/sysbus.h"
+#include "sysemu/char.h"
 
 #define TYPE_IMX_SERIAL "imx.serial"
 #define IMX_SERIAL(obj) OBJECT_CHECK(IMXSerialState, (obj), TYPE_IMX_SERIAL)
@@ -96,7 +97,7 @@ typedef struct IMXSerialState {
     uint32_t ucr3;
 
     qemu_irq irq;
-    CharDriverState *chr;
+    CharBackend chr;
 } IMXSerialState;
 
 #endif
diff --git a/include/hw/char/serial.h b/include/hw/char/serial.h
index a4fd3d559c..c928d7d907 100644
--- a/include/hw/char/serial.h
+++ b/include/hw/char/serial.h
@@ -28,8 +28,10 @@
 
 #include "hw/hw.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/char.h"
 #include "exec/memory.h"
 #include "qemu/fifo8.h"
+#include "sysemu/char.h"
 
 #define UART_FIFO_LENGTH    16      /* 16550A Fifo Length */
 
@@ -52,7 +54,7 @@ struct SerialState {
        it can be reset while reading iir */
     int thr_ipending;
     qemu_irq irq;
-    CharDriverState *chr;
+    CharBackend chr;
     int last_break_enable;
     int it_shift;
     int baudbase;
@@ -94,6 +96,6 @@ SerialState *serial_mm_init(MemoryRegion *address_space,
 
 /* serial-isa.c */
 #define TYPE_ISA_SERIAL "isa-serial"
-void serial_hds_isa_init(ISABus *bus, int n);
+void serial_hds_isa_init(ISABus *bus, int from, int to);
 
 #endif
diff --git a/include/hw/char/stm32f2xx_usart.h b/include/hw/char/stm32f2xx_usart.h
index b97f192a45..3267523270 100644
--- a/include/hw/char/stm32f2xx_usart.h
+++ b/include/hw/char/stm32f2xx_usart.h
@@ -67,7 +67,7 @@ typedef struct {
     uint32_t usart_cr3;
     uint32_t usart_gtpr;
 
-    CharDriverState *chr;
+    CharBackend chr;
     qemu_irq irq;
 } STM32F2XXUsartState;
 #endif /* HW_STM32F2XX_USART_H */
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 7ee7299c36..8dfc7a38c0 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -1,6 +1,29 @@
 #ifndef HW_COMPAT_H
 #define HW_COMPAT_H
 
+#define HW_COMPAT_2_7 \
+    {\
+        .driver   = "virtio-pci",\
+        .property = "page-per-vq",\
+        .value    = "on",\
+    },{\
+        .driver   = "virtio-serial-device",\
+        .property = "emergency-write",\
+        .value    = "off",\
+    },{\
+        .driver   = "ioapic",\
+        .property = "version",\
+        .value    = "0x11",\
+    },{\
+        .driver   = "intel-iommu",\
+        .property = "x-buggy-eim",\
+        .value    = "true",\
+    },{\
+        .driver   = "virtio-pci",\
+        .property = "x-ignore-backend-features",\
+        .value    = "on",\
+    },
+
 #define HW_COMPAT_2_6 \
     {\
         .driver   = "virtio-mmio",\
diff --git a/include/hw/core/generic-loader.h b/include/hw/core/generic-loader.h
new file mode 100644
index 0000000000..dd27c42ab0
--- /dev/null
+++ b/include/hw/core/generic-loader.h
@@ -0,0 +1,46 @@
+/*
+ * Generic Loader
+ *
+ * Copyright (C) 2014 Li Guang
+ * Written by Li Guang <lig.fnst@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef GENERIC_LOADER_H
+#define GENERIC_LOADER_H
+
+#include "elf.h"
+
+typedef struct GenericLoaderState {
+    /* <private> */
+    DeviceState parent_obj;
+
+    /* <public> */
+    CPUState *cpu;
+
+    uint64_t addr;
+    uint64_t data;
+    uint8_t data_len;
+    uint32_t cpu_num;
+
+    char *file;
+
+    bool force_raw;
+    bool data_be;
+    bool set_pc;
+} GenericLoaderState;
+
+#define TYPE_GENERIC_LOADER "loader"
+#define GENERIC_LOADER(obj) OBJECT_CHECK(GenericLoaderState, (obj), \
+                                         TYPE_GENERIC_LOADER)
+
+#endif
diff --git a/include/hw/dma/xlnx-zynq-devcfg.h b/include/hw/dma/xlnx-zynq-devcfg.h
index d40e5c8df6..9f5119a89a 100644
--- a/include/hw/dma/xlnx-zynq-devcfg.h
+++ b/include/hw/dma/xlnx-zynq-devcfg.h
@@ -34,7 +34,7 @@
 #define XLNX_ZYNQ_DEVCFG(obj) \
     OBJECT_CHECK(XlnxZynqDevcfg, (obj), TYPE_XLNX_ZYNQ_DEVCFG)
 
-#define XLNX_ZYNQ_DEVCFG_R_MAX 0x118
+#define XLNX_ZYNQ_DEVCFG_R_MAX (0x100 / 4)
 
 #define XLNX_ZYNQ_DEVCFG_DMA_CMD_FIFO_LEN 10
 
diff --git a/include/hw/elf_ops.h b/include/hw/elf_ops.h
index f510e7ec2a..25659b93be 100644
--- a/include/hw/elf_ops.h
+++ b/include/hw/elf_ops.h
@@ -263,7 +263,8 @@ static int glue(load_elf, SZ)(const char *name, int fd,
                               void *translate_opaque,
                               int must_swab, uint64_t *pentry,
                               uint64_t *lowaddr, uint64_t *highaddr,
-                              int elf_machine, int clear_lsb, int data_swab)
+                              int elf_machine, int clear_lsb, int data_swab,
+                              AddressSpace *as)
 {
     struct elfhdr ehdr;
     struct elf_phdr *phdr = NULL, *ph;
@@ -280,6 +281,11 @@ static int glue(load_elf, SZ)(const char *name, int fd,
         glue(bswap_ehdr, SZ)(&ehdr);
     }
 
+    if (elf_machine <= EM_NONE) {
+        /* The caller didn't specify an ARCH, we can figure it out */
+        elf_machine = ehdr.e_machine;
+    }
+
     switch (elf_machine) {
         case EM_PPC64:
             if (ehdr.e_machine != EM_PPC64) {
@@ -400,7 +406,7 @@ static int glue(load_elf, SZ)(const char *name, int fd,
             snprintf(label, sizeof(label), "phdr #%d: %s", i, name);
 
             /* rom_add_elf_program() seize the ownership of 'data' */
-            rom_add_elf_program(label, data, file_size, mem_size, addr);
+            rom_add_elf_program(label, data, file_size, mem_size, addr, as);
 
             total_size += mem_size;
             if (addr < low)
diff --git a/include/hw/hotplug.h b/include/hw/hotplug.h
index c0db869f85..1a0516a479 100644
--- a/include/hw/hotplug.h
+++ b/include/hw/hotplug.h
@@ -83,7 +83,6 @@ void hotplug_handler_pre_plug(HotplugHandler *plug_handler,
                               DeviceState *plugged_dev,
                               Error **errp);
 
-
 /**
  * hotplug_handler_unplug_request:
  *
diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h
index 06c4e9f6f9..1209eb483a 100644
--- a/include/hw/i386/apic_internal.h
+++ b/include/hw/i386/apic_internal.h
@@ -146,6 +146,10 @@ typedef struct APICCommonClass
     void (*pre_save)(APICCommonState *s);
     void (*post_load)(APICCommonState *s);
     void (*reset)(APICCommonState *s);
+    /* send_msi emulates an APIC bus and its proper place would be in a new
+     * device, but it's convenient to have it here for now.
+     */
+    void (*send_msi)(MSIMessage *msi);
 } APICCommonClass;
 
 struct APICCommonState {
@@ -156,7 +160,8 @@ struct APICCommonState {
     MemoryRegion io_memory;
     X86CPU *cpu;
     uint32_t apicbase;
-    uint8_t id;
+    uint8_t id; /* legacy APIC ID */
+    uint32_t initial_apic_id;
     uint8_t version;
     uint8_t arb_id;
     uint8_t tpr;
@@ -222,4 +227,6 @@ static inline int apic_get_bit(uint32_t *tab, int index)
     return !!(tab[i] & mask);
 }
 
+APICCommonClass *apic_get_class(void);
+
 #endif /* QEMU_APIC_INTERNAL_H */
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index a42dbd745a..405c9d122e 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -123,7 +123,6 @@ enum {
 union VTD_IR_TableEntry {
     struct {
 #ifdef HOST_WORDS_BIGENDIAN
-        uint32_t dest_id:32;         /* Destination ID */
         uint32_t __reserved_1:8;     /* Reserved 1 */
         uint32_t vector:8;           /* Interrupt Vector */
         uint32_t irte_mode:1;        /* IRTE Mode */
@@ -147,9 +146,9 @@ union VTD_IR_TableEntry {
         uint32_t irte_mode:1;        /* IRTE Mode */
         uint32_t vector:8;           /* Interrupt Vector */
         uint32_t __reserved_1:8;     /* Reserved 1 */
-        uint32_t dest_id:32;         /* Destination ID */
 #endif
-        uint16_t source_id:16;       /* Source-ID */
+        uint32_t dest_id;            /* Destination ID */
+        uint16_t source_id;          /* Source-ID */
 #ifdef HOST_WORDS_BIGENDIAN
         uint64_t __reserved_2:44;    /* Reserved 2 */
         uint64_t sid_vtype:2;        /* Source-ID Validation Type */
@@ -220,7 +219,7 @@ struct VTD_MSIMessage {
             uint32_t dest:8;
             uint32_t __addr_head:12; /* 0xfee */
 #endif
-            uint32_t __addr_hi:32;
+            uint32_t __addr_hi;
         } QEMU_PACKED;
         uint64_t msi_addr;
     };
@@ -239,7 +238,7 @@ struct VTD_MSIMessage {
             uint16_t level:1;
             uint16_t trigger_mode:1;
 #endif
-            uint16_t __resved1:16;
+            uint16_t __resved1;
         } QEMU_PACKED;
         uint32_t msi_data;
     };
@@ -289,6 +288,8 @@ struct IntelIOMMUState {
     dma_addr_t intr_root;           /* Interrupt remapping table pointer */
     uint32_t intr_size;             /* Number of IR table entries */
     bool intr_eime;                 /* Extended interrupt mode enabled */
+    OnOffAuto intr_eim;             /* Toggle for EIM cabability */
+    bool buggy_eim;                 /* Force buggy EIM unless eim=off */
 };
 
 /* Find the VTD Address space associated with the given bus pointer,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 74c175c1e5..4b74130559 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -13,7 +13,6 @@
 #include "qemu/bitmap.h"
 #include "sysemu/sysemu.h"
 #include "hw/pci/pci.h"
-#include "hw/boards.h"
 #include "hw/compat.h"
 #include "hw/mem/pc-dimm.h"
 #include "hw/mem/nvdimm.h"
@@ -37,6 +36,7 @@
 /**
  * PCMachineState:
  * @acpi_dev: link to ACPI PM device that performs ACPI hotplug handling
+ * @boot_cpus: number of present VCPUs
  */
 struct PCMachineState {
     /*< private >*/
@@ -53,6 +53,7 @@ struct PCMachineState {
     ISADevice *rtc;
     PCIBus *bus;
     FWCfgState *fw_cfg;
+    qemu_irq *gsi;
 
     /* Configuration options: */
     uint64_t max_ram_below_4g;
@@ -61,6 +62,8 @@ struct PCMachineState {
 
     AcpiNVDIMMState acpi_nvdimm_state;
 
+    bool acpi_build_enabled;
+
     /* RAM information (sizes, addresses, configuration): */
     ram_addr_t below_4g_mem_size, above_4g_mem_size;
 
@@ -68,6 +71,7 @@ struct PCMachineState {
     bool apic_xrupt_override;
     unsigned apic_id_limit;
     CPUArchIdList *possible_cpus;
+    uint16_t boot_cpus;
 
     /* NUMA information: */
     uint64_t numa_nodes;
@@ -180,8 +184,6 @@ qemu_irq *i8259_init(ISABus *bus, qemu_irq parent_irq);
 qemu_irq *kvm_i8259_init(ISABus *bus);
 int pic_read_irq(DeviceState *d);
 int pic_get_output(DeviceState *d);
-void hmp_info_pic(Monitor *mon, const QDict *qdict);
-void hmp_info_irq(Monitor *mon, const QDict *qdict);
 
 /* ioapic.c */
 
@@ -215,12 +217,11 @@ void vmmouse_set_data(const uint32_t *data);
 /* pckbd.c */
 #define I8042_A20_LINE "a20"
 
-void i8042_init(qemu_irq kbd_irq, qemu_irq mouse_irq, uint32_t io_base);
 void i8042_mm_init(qemu_irq kbd_irq, qemu_irq mouse_irq,
                    MemoryRegion *region, ram_addr_t size,
                    hwaddr mask);
 void i8042_isa_mouse_fake_event(void *opaque);
-void i8042_setup_a20_line(ISADevice *dev, qemu_irq *a20_out);
+void i8042_setup_a20_line(ISADevice *dev, qemu_irq a20_out);
 
 /* pc.c */
 extern int fd_bootchk;
@@ -283,7 +284,6 @@ int cmos_get_fd_drive_type(FloppyDriveType fd0);
 I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
                       qemu_irq sci_irq, qemu_irq smi_irq,
                       int smm_enabled, DeviceState **piix4_pm);
-void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr);
 
 /* hpet.c */
 extern int no_hpet;
@@ -367,6 +367,41 @@ int e820_add_entry(uint64_t, uint64_t, uint32_t);
 int e820_get_num_entries(void);
 bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
 
+#define PC_COMPAT_2_8 \
+
+#define PC_COMPAT_2_7 \
+    HW_COMPAT_2_7 \
+    {\
+        .driver   = TYPE_X86_CPU,\
+        .property = "l3-cache",\
+        .value    = "off",\
+    },\
+    {\
+        .driver   = TYPE_X86_CPU,\
+        .property = "full-cpuid-auto-level",\
+        .value    = "off",\
+    },\
+    {\
+        .driver   = "Opteron_G3" "-" TYPE_X86_CPU,\
+        .property = "family",\
+        .value    = "15",\
+    },\
+    {\
+        .driver   = "Opteron_G3" "-" TYPE_X86_CPU,\
+        .property = "model",\
+        .value    = "6",\
+    },\
+    {\
+        .driver   = "Opteron_G3" "-" TYPE_X86_CPU,\
+        .property = "stepping",\
+        .value    = "1",\
+    },\
+    {\
+        .driver   = "isa-pcspk",\
+        .property = "migrate",\
+        .value    = "off",\
+    },
+
 #define PC_COMPAT_2_6 \
     HW_COMPAT_2_6 \
     {\
@@ -394,7 +429,6 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
     },
 
 #define PC_COMPAT_2_5 \
-    PC_COMPAT_2_6 \
     HW_COMPAT_2_5
 
 /* Helper for setting model-id for CPU models that changed model-id
@@ -903,7 +937,6 @@ bool e820_get_entry(int, uint32_t, uint64_t *, uint64_t *);
     { \
         MachineClass *mc = MACHINE_CLASS(oc); \
         optsfn(mc); \
-        mc->name = namestr; \
         mc->init = initfn; \
     } \
     static const TypeInfo pc_machine_type_##suffix = { \
diff --git a/include/hw/i386/x86-iommu.h b/include/hw/i386/x86-iommu.h
index c48e8dd597..0c89d9835b 100644
--- a/include/hw/i386/x86-iommu.h
+++ b/include/hw/i386/x86-iommu.h
@@ -37,6 +37,12 @@
 typedef struct X86IOMMUState X86IOMMUState;
 typedef struct X86IOMMUClass X86IOMMUClass;
 
+typedef enum IommuType {
+    TYPE_INTEL,
+    TYPE_AMD,
+    TYPE_NONE
+} IommuType;
+
 struct X86IOMMUClass {
     SysBusDeviceClass parent;
     /* Intel/AMD specific realize() hook */
@@ -67,6 +73,7 @@ typedef struct IEC_Notifier IEC_Notifier;
 struct X86IOMMUState {
     SysBusDevice busdev;
     bool intr_supported;        /* Whether vIOMMU supports IR */
+    IommuType type;             /* IOMMU type - AMD/Intel     */
     QLIST_HEAD(, IEC_Notifier) iec_notifiers; /* IEC notify list */
 };
 
@@ -76,6 +83,11 @@ struct X86IOMMUState {
  */
 X86IOMMUState *x86_iommu_get_default(void);
 
+/*
+ * x86_iommu_get_type - get IOMMU type
+ */
+IommuType x86_iommu_get_type(void);
+
 /**
  * x86_iommu_iec_register_notifier - register IEC (Interrupt Entry
  *                                   Cache) notifiers
diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
index 7824bc34ce..88dc11808b 100644
--- a/include/hw/ide/internal.h
+++ b/include/hw/ide/internal.h
@@ -480,6 +480,9 @@ struct IDEBus {
     uint8_t retry_unit;
     int64_t retry_sector_num;
     uint32_t retry_nsector;
+    PortioList portio_list;
+    PortioList portio2_list;
+    VMChangeStateEntry *vmstate;
 };
 
 #define TYPE_IDE_DEVICE "ide-device"
diff --git a/include/hw/input/adb-keys.h b/include/hw/input/adb-keys.h
new file mode 100644
index 0000000000..525fba8a61
--- /dev/null
+++ b/include/hw/input/adb-keys.h
@@ -0,0 +1,141 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2016 John Arbuckle
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ *  adb-keys.h
+ *
+ *  Provides an enum of all the Macintosh keycodes.
+ *  Additional information: http://www.archive.org/stream/apple-guide-macintosh-family-hardware/Apple_Guide_to_the_Macintosh_Family_Hardware_2e#page/n345/mode/2up
+ *                          page 308
+ */
+
+#ifndef ADB_KEYS_H
+#define ADB_KEYS_H
+
+enum {
+    ADB_KEY_A = 0x00,
+    ADB_KEY_B = 0x0b,
+    ADB_KEY_C = 0x08,
+    ADB_KEY_D = 0x02,
+    ADB_KEY_E = 0x0e,
+    ADB_KEY_F = 0x03,
+    ADB_KEY_G = 0x05,
+    ADB_KEY_H = 0x04,
+    ADB_KEY_I = 0x22,
+    ADB_KEY_J = 0x26,
+    ADB_KEY_K = 0x28,
+    ADB_KEY_L = 0x25,
+    ADB_KEY_M = 0x2e,
+    ADB_KEY_N = 0x2d,
+    ADB_KEY_O = 0x1f,
+    ADB_KEY_P = 0x23,
+    ADB_KEY_Q = 0x0c,
+    ADB_KEY_R = 0x0f,
+    ADB_KEY_S = 0x01,
+    ADB_KEY_T = 0x11,
+    ADB_KEY_U = 0x20,
+    ADB_KEY_V = 0x09,
+    ADB_KEY_W = 0x0d,
+    ADB_KEY_X = 0x07,
+    ADB_KEY_Y = 0x10,
+    ADB_KEY_Z = 0x06,
+
+    ADB_KEY_0 = 0x1d,
+    ADB_KEY_1 = 0x12,
+    ADB_KEY_2 = 0x13,
+    ADB_KEY_3 = 0x14,
+    ADB_KEY_4 = 0x15,
+    ADB_KEY_5 = 0x17,
+    ADB_KEY_6 = 0x16,
+    ADB_KEY_7 = 0x1a,
+    ADB_KEY_8 = 0x1c,
+    ADB_KEY_9 = 0x19,
+
+    ADB_KEY_GRAVE_ACCENT = 0x32,
+    ADB_KEY_MINUS = 0x1b,
+    ADB_KEY_EQUAL = 0x18,
+    ADB_KEY_DELETE = 0x33,
+    ADB_KEY_CAPS_LOCK = 0x39,
+    ADB_KEY_TAB = 0x30,
+    ADB_KEY_RETURN = 0x24,
+    ADB_KEY_LEFT_BRACKET = 0x21,
+    ADB_KEY_RIGHT_BRACKET = 0x1e,
+    ADB_KEY_BACKSLASH = 0x2a,
+    ADB_KEY_SEMICOLON = 0x29,
+    ADB_KEY_APOSTROPHE = 0x27,
+    ADB_KEY_COMMA = 0x2b,
+    ADB_KEY_PERIOD = 0x2f,
+    ADB_KEY_FORWARD_SLASH = 0x2c,
+    ADB_KEY_LEFT_SHIFT = 0x38,
+    ADB_KEY_RIGHT_SHIFT = 0x7b,
+    ADB_KEY_SPACEBAR = 0x31,
+    ADB_KEY_LEFT_CONTROL = 0x36,
+    ADB_KEY_RIGHT_CONTROL = 0x7d,
+    ADB_KEY_LEFT_OPTION = 0x3a,
+    ADB_KEY_RIGHT_OPTION = 0x7c,
+    ADB_KEY_COMMAND = 0x37,
+
+    ADB_KEY_KP_0 = 0x52,
+    ADB_KEY_KP_1 = 0x53,
+    ADB_KEY_KP_2 = 0x54,
+    ADB_KEY_KP_3 = 0x55,
+    ADB_KEY_KP_4 = 0x56,
+    ADB_KEY_KP_5 = 0x57,
+    ADB_KEY_KP_6 = 0x58,
+    ADB_KEY_KP_7 = 0x59,
+    ADB_KEY_KP_8 = 0x5b,
+    ADB_KEY_KP_9 = 0x5c,
+    ADB_KEY_KP_PERIOD = 0x41,
+    ADB_KEY_KP_ENTER = 0x4c,
+    ADB_KEY_KP_PLUS = 0x45,
+    ADB_KEY_KP_SUBTRACT = 0x4e,
+    ADB_KEY_KP_MULTIPLY = 0x43,
+    ADB_KEY_KP_DIVIDE = 0x4b,
+    ADB_KEY_KP_EQUAL = 0x51,
+    ADB_KEY_KP_CLEAR = 0x47,
+
+    ADB_KEY_UP = 0x3e,
+    ADB_KEY_DOWN = 0x3d,
+    ADB_KEY_LEFT = 0x3b,
+    ADB_KEY_RIGHT = 0x3c,
+
+    ADB_KEY_HELP = 0x72,
+    ADB_KEY_HOME = 0x73,
+    ADB_KEY_PAGE_UP = 0x74,
+    ADB_KEY_PAGE_DOWN = 0x79,
+    ADB_KEY_END = 0x77,
+    ADB_KEY_FORWARD_DELETE = 0x75,
+
+    ADB_KEY_ESC = 0x35,
+    ADB_KEY_F1 = 0x7a,
+    ADB_KEY_F2 = 0x78,
+    ADB_KEY_F3 = 0x63,
+    ADB_KEY_F4 = 0x76,
+    ADB_KEY_F5 = 0x60,
+    ADB_KEY_F6 = 0x61,
+    ADB_KEY_F7 = 0x62,
+    ADB_KEY_F8 = 0x64,
+    ADB_KEY_F9 = 0x65,
+    ADB_KEY_F10 = 0x6d,
+    ADB_KEY_F11 = 0x67,
+    ADB_KEY_F12 = 0x6f,
+    ADB_KEY_F13 = 0x69,
+    ADB_KEY_F14 = 0x6b,
+    ADB_KEY_F15 = 0x71,
+
+    ADB_KEY_VOLUME_UP = 0x48,
+    ADB_KEY_VOLUME_DOWN = 0x49,
+    ADB_KEY_VOLUME_MUTE = 0x4a,
+    ADB_KEY_POWER = 0x7f7f
+};
+
+/* Could not find the value for this key. */
+/* #define ADB_KEY_EJECT */
+
+#endif /* ADB_KEYS_H */
diff --git a/include/hw/intc/arm_gicv3_its_common.h b/include/hw/intc/arm_gicv3_its_common.h
new file mode 100644
index 0000000000..1ba18944cf
--- /dev/null
+++ b/include/hw/intc/arm_gicv3_its_common.h
@@ -0,0 +1,78 @@
+/*
+ * ITS support for ARM GICv3
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Written by Pavel Fedin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef QEMU_ARM_GICV3_ITS_COMMON_H
+#define QEMU_ARM_GICV3_ITS_COMMON_H
+
+#include "hw/sysbus.h"
+#include "hw/intc/arm_gicv3_common.h"
+
+#define ITS_CONTROL_SIZE 0x10000
+#define ITS_TRANS_SIZE   0x10000
+#define ITS_SIZE         (ITS_CONTROL_SIZE + ITS_TRANS_SIZE)
+
+struct GICv3ITSState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem_main;
+    MemoryRegion iomem_its_cntrl;
+    MemoryRegion iomem_its_translation;
+
+    GICv3State *gicv3;
+
+    int dev_fd; /* kvm device fd if backed by kvm vgic support */
+    uint64_t gits_translater_gpa;
+    bool translater_gpa_known;
+
+    /* Registers */
+    uint32_t ctlr;
+    uint64_t cbaser;
+    uint64_t cwriter;
+    uint64_t creadr;
+    uint64_t baser[8];
+
+    Error *migration_blocker;
+};
+
+typedef struct GICv3ITSState GICv3ITSState;
+
+void gicv3_its_init_mmio(GICv3ITSState *s, const MemoryRegionOps *ops);
+
+#define TYPE_ARM_GICV3_ITS_COMMON "arm-gicv3-its-common"
+#define ARM_GICV3_ITS_COMMON(obj) \
+     OBJECT_CHECK(GICv3ITSState, (obj), TYPE_ARM_GICV3_ITS_COMMON)
+#define ARM_GICV3_ITS_COMMON_CLASS(klass) \
+     OBJECT_CLASS_CHECK(GICv3ITSCommonClass, (klass), TYPE_ARM_GICV3_ITS_COMMON)
+#define ARM_GICV3_ITS_COMMON_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(GICv3ITSCommonClass, (obj), TYPE_ARM_GICV3_ITS_COMMON)
+
+struct GICv3ITSCommonClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+    /*< public >*/
+
+    int (*send_msi)(GICv3ITSState *s, uint32_t data, uint16_t devid);
+    void (*pre_save)(GICv3ITSState *s);
+    void (*post_load)(GICv3ITSState *s);
+};
+
+typedef struct GICv3ITSCommonClass GICv3ITSCommonClass;
+
+#endif
diff --git a/include/hw/intc/intc.h b/include/hw/intc/intc.h
new file mode 100644
index 0000000000..27d9828943
--- /dev/null
+++ b/include/hw/intc/intc.h
@@ -0,0 +1,33 @@
+#ifndef INTC_H
+#define INTC_H
+
+#include "qom/object.h"
+
+#define TYPE_INTERRUPT_STATS_PROVIDER "intctrl"
+
+#define INTERRUPT_STATS_PROVIDER_CLASS(klass) \
+    OBJECT_CLASS_CHECK(InterruptStatsProviderClass, (klass), \
+                       TYPE_INTERRUPT_STATS_PROVIDER)
+#define INTERRUPT_STATS_PROVIDER_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(InterruptStatsProviderClass, (obj), \
+                     TYPE_INTERRUPT_STATS_PROVIDER)
+#define INTERRUPT_STATS_PROVIDER(obj) \
+    INTERFACE_CHECK(InterruptStatsProvider, (obj), \
+                    TYPE_INTERRUPT_STATS_PROVIDER)
+
+typedef struct InterruptStatsProvider {
+    Object parent;
+} InterruptStatsProvider;
+
+typedef struct InterruptStatsProviderClass {
+    InterfaceClass parent;
+
+    /* The returned pointer and statistics must remain valid until
+     * the BQL is next dropped.
+     */
+    bool (*get_statistics)(InterruptStatsProvider *obj, uint64_t **irq_counts,
+                           unsigned int *nb_irqs);
+    void (*print_info)(InterruptStatsProvider *obj, Monitor *mon);
+} InterruptStatsProviderClass;
+
+#endif
diff --git a/include/hw/isa/i8257.h b/include/hw/isa/i8257.h
index aa211c0df7..88a2766a3f 100644
--- a/include/hw/isa/i8257.h
+++ b/include/hw/isa/i8257.h
@@ -36,6 +36,8 @@ typedef struct I8257State {
     QEMUBH *dma_bh;
     bool dma_bh_scheduled;
     int running;
+    PortioList portio_page;
+    PortioList portio_pageh;
 } I8257State;
 
 #endif
diff --git a/include/hw/isa/isa.h b/include/hw/isa/isa.h
index 7693ac5454..c2fdd70cdc 100644
--- a/include/hw/isa/isa.h
+++ b/include/hw/isa/isa.h
@@ -134,12 +134,15 @@ void isa_register_ioport(ISADevice *dev, MemoryRegion *io, uint16_t start);
  * device and use the legacy portio routines.
  *
  * @dev: the ISADevice against which these are registered; may be NULL.
+ * @piolist: the PortioList associated with the io ports
  * @start: the base I/O port against which the portio->offset is applied.
  * @portio: the ports, sorted by offset.
  * @opaque: passed into the portio callbacks.
  * @name: passed into memory_region_init_io.
  */
-void isa_register_portio_list(ISADevice *dev, uint16_t start,
+void isa_register_portio_list(ISADevice *dev,
+                              PortioList *piolist,
+                              uint16_t start,
                               const MemoryRegionPortio *portio,
                               void *opaque, const char *name);
 
diff --git a/include/hw/lm32/lm32_pic.h b/include/hw/lm32/lm32_pic.h
index 189fa386f7..e6479b8f63 100644
--- a/include/hw/lm32/lm32_pic.h
+++ b/include/hw/lm32/lm32_pic.h
@@ -8,7 +8,4 @@ uint32_t lm32_pic_get_im(DeviceState *d);
 void lm32_pic_set_ip(DeviceState *d, uint32_t ip);
 void lm32_pic_set_im(DeviceState *d, uint32_t im);
 
-void lm32_hmp_info_pic(Monitor *mon, const QDict *qdict);
-void lm32_hmp_info_irq(Monitor *mon, const QDict *qdict);
-
 #endif /* QEMU_HW_LM32_PIC_H */
diff --git a/include/hw/loader.h b/include/hw/loader.h
index be706a2cbe..69e5f68403 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -14,8 +14,28 @@
 int get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
 ssize_t load_image_size(const char *filename, void *addr, size_t size);
+
+/**load_image_targphys_as:
+ * @filename: Path to the image file
+ * @addr: Address to load the image to
+ * @max_sz: The maximum size of the image to load
+ * @as: The AddressSpace to load the ELF to. The value of address_space_memory
+ *      is used if nothing is supplied here.
+ *
+ * Load a fixed image into memory.
+ *
+ * Returns the size of the loaded image on success, -1 otherwise.
+ */
+int load_image_targphys_as(const char *filename,
+                           hwaddr addr, uint64_t max_sz, AddressSpace *as);
+
+/** load_image_targphys:
+ * Same as load_image_targphys_as(), but doesn't allow the caller to specify
+ * an AddressSpace.
+ */
 int load_image_targphys(const char *filename, hwaddr,
                         uint64_t max_sz);
+
 /**
  * load_image_mr: load an image into a memory region
  * @filename: Path to the image file
@@ -45,7 +65,7 @@ int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
 #define ELF_LOAD_WRONG_ENDIAN -4
 const char *load_elf_strerror(int error);
 
-/** load_elf:
+/** load_elf_as:
  * @filename: Path of ELF file
  * @translate_fn: optional function to translate load addresses
  * @translate_opaque: opaque data passed to @translate_fn
@@ -59,6 +79,8 @@ const char *load_elf_strerror(int error);
  * @data_swab: Set to order of byte swapping for data. 0 for no swap, 1
  *             for swapping bytes within halfwords, 2 for bytes within
  *             words and 3 for within doublewords.
+ * @as: The AddressSpace to load the ELF to. The value of address_space_memory
+ *      is used if nothing is supplied here.
  *
  * Load an ELF file's contents to the emulated system's address space.
  * Clients may optionally specify a callback to perform address
@@ -68,8 +90,19 @@ const char *load_elf_strerror(int error);
  * load will fail if the target ELF does not match. Some architectures
  * have some architecture-specific behaviours that come into effect when
  * their particular values for @elf_machine are set.
+ * If @elf_machine is EM_NONE then the machine type will be read from the
+ * ELF header and no checks will be carried out against the machine type.
  */
+int load_elf_as(const char *filename,
+                uint64_t (*translate_fn)(void *, uint64_t),
+                void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
+                uint64_t *highaddr, int big_endian, int elf_machine,
+                int clear_lsb, int data_swab, AddressSpace *as);
 
+/** load_elf:
+ * Same as load_elf_as(), but doesn't allow the caller to specify an
+ * AddressSpace.
+ */
 int load_elf(const char *filename, uint64_t (*translate_fn)(void *, uint64_t),
              void *translate_opaque, uint64_t *pentry, uint64_t *lowaddr,
              uint64_t *highaddr, int big_endian, int elf_machine,
@@ -89,6 +122,30 @@ void load_elf_hdr(const char *filename, void *hdr, bool *is64, Error **errp);
 
 int load_aout(const char *filename, hwaddr addr, int max_sz,
               int bswap_needed, hwaddr target_page_size);
+
+/** load_uimage_as:
+ * @filename: Path of uimage file
+ * @ep: Populated with program entry point. Ignored if NULL.
+ * @loadaddr: Populated with the load address. Ignored if NULL.
+ * @is_linux: Is set to true if the image loaded is Linux. Ignored if NULL.
+ * @translate_fn: optional function to translate load addresses
+ * @translate_opaque: opaque data passed to @translate_fn
+ * @as: The AddressSpace to load the ELF to. The value of address_space_memory
+ *      is used if nothing is supplied here.
+ *
+ * Loads a u-boot image into memory.
+ *
+ * Returns the size of the loaded image on success, -1 otherwise.
+ */
+int load_uimage_as(const char *filename, hwaddr *ep,
+                   hwaddr *loadaddr, int *is_linux,
+                   uint64_t (*translate_fn)(void *, uint64_t),
+                   void *translate_opaque, AddressSpace *as);
+
+/** load_uimage:
+ * Same as load_uimage_as(), but doesn't allow the caller to specify an
+ * AddressSpace.
+ */
 int load_uimage(const char *filename, hwaddr *ep,
                 hwaddr *loadaddr, int *is_linux,
                 uint64_t (*translate_fn)(void *, uint64_t),
@@ -118,14 +175,14 @@ extern bool rom_file_has_mr;
 
 int rom_add_file(const char *file, const char *fw_dir,
                  hwaddr addr, int32_t bootindex,
-                 bool option_rom, MemoryRegion *mr);
+                 bool option_rom, MemoryRegion *mr, AddressSpace *as);
 MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
                            size_t max_len, hwaddr addr,
                            const char *fw_file_name,
                            FWCfgReadCallback fw_callback,
-                           void *callback_opaque);
+                           void *callback_opaque, AddressSpace *as);
 int rom_add_elf_program(const char *name, void *data, size_t datasize,
-                        size_t romsize, hwaddr addr);
+                        size_t romsize, hwaddr addr, AddressSpace *as);
 int rom_check_and_register_reset(void);
 void rom_set_fw(FWCfgState *f);
 void rom_set_order_override(int order);
@@ -135,11 +192,17 @@ void *rom_ptr(hwaddr addr);
 void hmp_info_roms(Monitor *mon, const QDict *qdict);
 
 #define rom_add_file_fixed(_f, _a, _i)          \
-    rom_add_file(_f, NULL, _a, _i, false, NULL)
+    rom_add_file(_f, NULL, _a, _i, false, NULL, NULL)
 #define rom_add_blob_fixed(_f, _b, _l, _a)      \
-    rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL)
+    rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, NULL)
 #define rom_add_file_mr(_f, _mr, _i)            \
-    rom_add_file(_f, NULL, 0, _i, false, _mr)
+    rom_add_file(_f, NULL, 0, _i, false, _mr, NULL)
+#define rom_add_file_as(_f, _as, _i)            \
+    rom_add_file(_f, NULL, 0, _i, false, NULL, _as)
+#define rom_add_file_fixed_as(_f, _a, _i, _as)          \
+    rom_add_file(_f, NULL, _a, _i, false, NULL, _as)
+#define rom_add_blob_fixed_as(_f, _b, _l, _a, _as)      \
+    rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, _as)
 
 #define PC_ROM_MIN_VGA     0xc0000
 #define PC_ROM_MIN_OPTION  0xc8000
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 1cfe9e01c4..03e1ff9558 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -98,12 +98,28 @@ typedef struct NVDIMMClass NVDIMMClass;
 #define NVDIMM_ACPI_IO_BASE     0x0a18
 #define NVDIMM_ACPI_IO_LEN      4
 
+/*
+ * NvdimmFitBuffer:
+ * @fit: FIT structures for present NVDIMMs. It is updated when
+ *   the NVDIMM device is plugged or unplugged.
+ * @dirty: It allows OSPM to detect change and restart read in
+ *   progress if there is any.
+ */
+struct NvdimmFitBuffer {
+    GArray *fit;
+    bool dirty;
+};
+typedef struct NvdimmFitBuffer NvdimmFitBuffer;
+
 struct AcpiNVDIMMState {
     /* detect if NVDIMM support is enabled. */
     bool is_enabled;
 
     /* the data of the fw_cfg file NVDIMM_DSM_MEM_FILE. */
     GArray *dsm_mem;
+
+    NvdimmFitBuffer fit_buf;
+
     /* the IO region used by OSPM to transfer control to QEMU. */
     MemoryRegion io_mr;
 };
@@ -112,5 +128,8 @@ typedef struct AcpiNVDIMMState AcpiNVDIMMState;
 void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
                             FWCfgState *fw_cfg, Object *owner);
 void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,
-                       BIOSLinker *linker, GArray *dsm_dma_arrea);
+                       BIOSLinker *linker, AcpiNVDIMMState *state,
+                       uint32_t ram_slots);
+void nvdimm_plug(AcpiNVDIMMState *state);
+void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev);
 #endif
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
index fdfd982288..14ffc43de8 100644
--- a/include/hw/misc/aspeed_scu.h
+++ b/include/hw/misc/aspeed_scu.h
@@ -33,7 +33,200 @@ typedef struct AspeedSCUState {
 
 #define AST2400_A0_SILICON_REV   0x02000303U
 #define AST2500_A0_SILICON_REV   0x04000303U
+#define AST2500_A1_SILICON_REV   0x04010303U
 
 extern bool is_supported_silicon_rev(uint32_t silicon_rev);
 
+/*
+ * Extracted from Aspeed SDK v00.03.21. Fixes and extra definitions
+ * were added.
+ *
+ * Original header file :
+ *    arch/arm/mach-aspeed/include/mach/regs-scu.h
+ *
+ *    Copyright (C) 2012-2020  ASPEED Technology Inc.
+ *
+ *    This program is free software; you can redistribute it and/or modify
+ *    it under the terms of the GNU General Public License version 2 as
+ *    published by the Free Software Foundation.
+ *
+ *      History      :
+ *       1. 2012/12/29 Ryan Chen Create
+ */
+
+/* Hardware Strapping Register definition (for Aspeed AST2400 SOC)
+ *
+ * 31:29  Software defined strapping registers
+ * 28:27  DRAM size setting (for VGA driver use)
+ * 26:24  DRAM configuration setting
+ * 23     Enable 25 MHz reference clock input
+ * 22     Enable GPIOE pass-through mode
+ * 21     Enable GPIOD pass-through mode
+ * 20     Disable LPC to decode SuperIO 0x2E/0x4E address
+ * 19     Disable ACPI function
+ * 23,18  Clock source selection
+ * 17     Enable BMC 2nd boot watchdog timer
+ * 16     SuperIO configuration address selection
+ * 15     VGA Class Code selection
+ * 14     Enable LPC dedicated reset pin function
+ * 13:12  SPI mode selection
+ * 11:10  CPU/AHB clock frequency ratio selection
+ * 9:8    H-PLL default clock frequency selection
+ * 7      Define MAC#2 interface
+ * 6      Define MAC#1 interface
+ * 5      Enable VGA BIOS ROM
+ * 4      Boot flash memory extended option
+ * 3:2    VGA memory size selection
+ * 1:0    BMC CPU boot code selection
+ */
+#define SCU_AST2400_HW_STRAP_SW_DEFINE(x)          ((x) << 29)
+#define SCU_AST2400_HW_STRAP_SW_DEFINE_MASK        (0x7 << 29)
+
+#define SCU_AST2400_HW_STRAP_DRAM_SIZE(x)          ((x) << 27)
+#define SCU_AST2400_HW_STRAP_DRAM_SIZE_MASK        (0x3 << 27)
+#define     DRAM_SIZE_64MB                             0
+#define     DRAM_SIZE_128MB                            1
+#define     DRAM_SIZE_256MB                            2
+#define     DRAM_SIZE_512MB                            3
+
+#define SCU_AST2400_HW_STRAP_DRAM_CONFIG(x)        ((x) << 24)
+#define SCU_AST2400_HW_STRAP_DRAM_CONFIG_MASK      (0x7 << 24)
+
+#define SCU_HW_STRAP_GPIOE_PT_EN                   (0x1 << 22)
+#define SCU_HW_STRAP_GPIOD_PT_EN                   (0x1 << 21)
+#define SCU_HW_STRAP_LPC_DEC_SUPER_IO              (0x1 << 20)
+#define SCU_AST2400_HW_STRAP_ACPI_DIS              (0x1 << 19)
+
+/* bit 23, 18 [1,0] */
+#define SCU_AST2400_HW_STRAP_SET_CLK_SOURCE(x)     (((((x) & 0x3) >> 1) << 23) \
+                                                    | (((x) & 0x1) << 18))
+#define SCU_AST2400_HW_STRAP_GET_CLK_SOURCE(x)     (((((x) >> 23) & 0x1) << 1) \
+                                                    | (((x) >> 18) & 0x1))
+#define SCU_AST2400_HW_STRAP_CLK_SOURCE_MASK       ((0x1 << 23) | (0x1 << 18))
+#define     AST2400_CLK_25M_IN                         (0x1 << 23)
+#define     AST2400_CLK_24M_IN                         0
+#define     AST2400_CLK_48M_IN                         1
+#define     AST2400_CLK_25M_IN_24M_USB_CKI             2
+#define     AST2400_CLK_25M_IN_48M_USB_CKI             3
+
+#define SCU_HW_STRAP_2ND_BOOT_WDT                  (0x1 << 17)
+#define SCU_HW_STRAP_SUPER_IO_CONFIG               (0x1 << 16)
+#define SCU_HW_STRAP_VGA_CLASS_CODE                (0x1 << 15)
+#define SCU_HW_STRAP_LPC_RESET_PIN                 (0x1 << 14)
+
+#define SCU_HW_STRAP_SPI_MODE(x)                   ((x) << 12)
+#define SCU_HW_STRAP_SPI_MODE_MASK                 (0x3 << 12)
+#define     SCU_HW_STRAP_SPI_DIS                       0
+#define     SCU_HW_STRAP_SPI_MASTER                    1
+#define     SCU_HW_STRAP_SPI_M_S_EN                    2
+#define     SCU_HW_STRAP_SPI_PASS_THROUGH              3
+
+#define SCU_AST2400_HW_STRAP_SET_CPU_AHB_RATIO(x)  ((x) << 10)
+#define SCU_AST2400_HW_STRAP_GET_CPU_AHB_RATIO(x)  (((x) >> 10) & 3)
+#define SCU_AST2400_HW_STRAP_CPU_AHB_RATIO_MASK    (0x3 << 10)
+#define     AST2400_CPU_AHB_RATIO_1_1                  0
+#define     AST2400_CPU_AHB_RATIO_2_1                  1
+#define     AST2400_CPU_AHB_RATIO_4_1                  2
+#define     AST2400_CPU_AHB_RATIO_3_1                  3
+
+#define SCU_AST2400_HW_STRAP_GET_H_PLL_CLK(x)      (((x) >> 8) & 0x3)
+#define SCU_AST2400_HW_STRAP_H_PLL_CLK_MASK        (0x3 << 8)
+#define     AST2400_CPU_384MHZ                         0
+#define     AST2400_CPU_360MHZ                         1
+#define     AST2400_CPU_336MHZ                         2
+#define     AST2400_CPU_408MHZ                         3
+
+#define SCU_HW_STRAP_MAC1_RGMII                    (0x1 << 7)
+#define SCU_HW_STRAP_MAC0_RGMII                    (0x1 << 6)
+#define SCU_HW_STRAP_VGA_BIOS_ROM                  (0x1 << 5)
+#define SCU_HW_STRAP_SPI_WIDTH                     (0x1 << 4)
+
+#define SCU_HW_STRAP_VGA_SIZE_GET(x)               (((x) >> 2) & 0x3)
+#define SCU_HW_STRAP_VGA_MASK                      (0x3 << 2)
+#define SCU_HW_STRAP_VGA_SIZE_SET(x)               ((x) << 2)
+#define     VGA_8M_DRAM                                0
+#define     VGA_16M_DRAM                               1
+#define     VGA_32M_DRAM                               2
+#define     VGA_64M_DRAM                               3
+
+#define SCU_AST2400_HW_STRAP_BOOT_MODE(x)          (x)
+#define     AST2400_NOR_BOOT                           0
+#define     AST2400_NAND_BOOT                          1
+#define     AST2400_SPI_BOOT                           2
+#define     AST2400_DIS_BOOT                           3
+
+/*
+ * Hardware strapping register definition (for Aspeed AST2500 SoC and
+ * higher)
+ *
+ * 31     Enable SPI Flash Strap Auto Fetch Mode
+ * 30     Enable GPIO Strap Mode
+ * 29     Select UART Debug Port
+ * 28     Reserved (1)
+ * 27     Enable fast reset mode for ARM ICE debugger
+ * 26     Enable eSPI flash mode
+ * 25     Enable eSPI mode
+ * 24     Select DDR4 SDRAM
+ * 23     Select 25 MHz reference clock input mode
+ * 22     Enable GPIOE pass-through mode
+ * 21     Enable GPIOD pass-through mode
+ * 20     Disable LPC to decode SuperIO 0x2E/0x4E address
+ * 19     Enable ACPI function
+ * 18     Select USBCKI input frequency
+ * 17     Enable BMC 2nd boot watchdog timer
+ * 16     SuperIO configuration address selection
+ * 15     VGA Class Code selection
+ * 14     Select dedicated LPC reset input
+ * 13:12  SPI mode selection
+ * 11:9   AXI/AHB clock frequency ratio selection
+ * 8      Reserved (0)
+ * 7      Define MAC#2 interface
+ * 6      Define MAC#1 interface
+ * 5      Enable dedicated VGA BIOS ROM
+ * 4      Reserved (0)
+ * 3:2    VGA memory size selection
+ * 1      Reserved (1)
+ * 0      Disable CPU boot
+ */
+#define SCU_AST2500_HW_STRAP_SPI_AUTOFETCH_ENABLE  (0x1 << 31)
+#define SCU_AST2500_HW_STRAP_GPIO_STRAP_ENABLE     (0x1 << 30)
+#define SCU_AST2500_HW_STRAP_UART_DEBUG            (0x1 << 29)
+#define     UART_DEBUG_UART1                           0
+#define     UART_DEBUG_UART5                           1
+#define SCU_AST2500_HW_STRAP_RESERVED28            (0x1 << 28)
+
+#define SCU_AST2500_HW_STRAP_FAST_RESET_DBG        (0x1 << 27)
+#define SCU_AST2500_HW_STRAP_ESPI_FLASH_ENABLE     (0x1 << 26)
+#define SCU_AST2500_HW_STRAP_ESPI_ENABLE           (0x1 << 25)
+#define SCU_AST2500_HW_STRAP_DDR4_ENABLE           (0x1 << 24)
+
+#define SCU_AST2500_HW_STRAP_ACPI_ENABLE           (0x1 << 19)
+#define SCU_AST2500_HW_STRAP_USBCKI_FREQ           (0x1 << 18)
+#define     USBCKI_FREQ_24MHZ                          0
+#define     USBCKI_FREQ_28MHZ                          1
+
+#define SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(x)  ((x) << 9)
+#define SCU_AST2500_HW_STRAP_GET_AXI_AHB_RATIO(x)  (((x) >> 9) & 7)
+#define SCU_AST2500_HW_STRAP_CPU_AXI_RATIO_MASK    (0x7 << 9)
+#define     AXI_AHB_RATIO_UNDEFINED                    0
+#define     AXI_AHB_RATIO_2_1                          1
+#define     AXI_AHB_RATIO_3_1                          2
+#define     AXI_AHB_RATIO_4_1                          3
+#define     AXI_AHB_RATIO_5_1                          4
+#define     AXI_AHB_RATIO_6_1                          5
+#define     AXI_AHB_RATIO_7_1                          6
+#define     AXI_AHB_RATIO_8_1                          7
+
+#define SCU_AST2500_HW_STRAP_RESERVED1             (0x1 << 1)
+#define SCU_AST2500_HW_STRAP_DIS_BOOT              (0x1 << 0)
+
+#define AST2500_HW_STRAP1_DEFAULTS (                                    \
+        SCU_AST2500_HW_STRAP_RESERVED28 |                               \
+        SCU_HW_STRAP_2ND_BOOT_WDT |                                     \
+        SCU_HW_STRAP_VGA_CLASS_CODE |                                   \
+        SCU_HW_STRAP_LPC_RESET_PIN |                                    \
+        SCU_AST2500_HW_STRAP_SET_AXI_AHB_RATIO(AXI_AHB_RATIO_2_1) |     \
+        SCU_HW_STRAP_VGA_SIZE_SET(VGA_16M_DRAM) |                       \
+        SCU_AST2500_HW_STRAP_RESERVED1)
+
 #endif /* ASPEED_SCU_H */
diff --git a/include/hw/misc/aspeed_sdmc.h b/include/hw/misc/aspeed_sdmc.h
new file mode 100644
index 0000000000..551c8afdf4
--- /dev/null
+++ b/include/hw/misc/aspeed_sdmc.h
@@ -0,0 +1,33 @@
+/*
+ * ASPEED SDRAM Memory Controller
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+#ifndef ASPEED_SDMC_H
+#define ASPEED_SDMC_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_SDMC "aspeed.sdmc"
+#define ASPEED_SDMC(obj) OBJECT_CHECK(AspeedSDMCState, (obj), TYPE_ASPEED_SDMC)
+
+#define ASPEED_SDMC_NR_REGS (0x8 >> 2)
+
+typedef struct AspeedSDMCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t regs[ASPEED_SDMC_NR_REGS];
+    uint32_t silicon_rev;
+    uint32_t ram_bits;
+    uint64_t ram_size;
+
+} AspeedSDMCState;
+
+#endif /* ASPEED_SDMC_H */
diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h
index f2e08e3575..c469ffe69b 100644
--- a/include/hw/net/cadence_gem.h
+++ b/include/hw/net/cadence_gem.h
@@ -30,7 +30,11 @@
 #include "net/net.h"
 #include "hw/sysbus.h"
 
-#define CADENCE_GEM_MAXREG        (0x00000640/4) /* Last valid GEM address */
+#define CADENCE_GEM_MAXREG        (0x00000800 / 4) /* Last valid GEM address */
+
+#define MAX_PRIORITY_QUEUES             8
+#define MAX_TYPE1_SCREENERS             16
+#define MAX_TYPE2_SCREENERS             16
 
 typedef struct CadenceGEMState {
     /*< private >*/
@@ -40,7 +44,12 @@ typedef struct CadenceGEMState {
     MemoryRegion iomem;
     NICState *nic;
     NICConf conf;
-    qemu_irq irq;
+    qemu_irq irq[MAX_PRIORITY_QUEUES];
+
+    /* Static properties */
+    uint8_t num_priority_queues;
+    uint8_t num_type1_screeners;
+    uint8_t num_type2_screeners;
 
     /* GEM registers backing store */
     uint32_t regs[CADENCE_GEM_MAXREG];
@@ -59,12 +68,12 @@ typedef struct CadenceGEMState {
     uint8_t phy_loop; /* Are we in phy loopback? */
 
     /* The current DMA descriptor pointers */
-    uint32_t rx_desc_addr;
-    uint32_t tx_desc_addr;
+    uint32_t rx_desc_addr[MAX_PRIORITY_QUEUES];
+    uint32_t tx_desc_addr[MAX_PRIORITY_QUEUES];
 
     uint8_t can_rx_state; /* Debug only */
 
-    unsigned rx_desc[2];
+    unsigned rx_desc[MAX_PRIORITY_QUEUES][2];
 
     bool sar_active[4];
 } CadenceGEMState;
diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h
new file mode 100644
index 0000000000..b4f5b2b104
--- /dev/null
+++ b/include/hw/nvram/chrp_nvram.h
@@ -0,0 +1,54 @@
+/*
+ * Common Hardware Reference Platform NVRAM functions.
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef CHRP_NVRAM_H
+#define CHRP_NVRAM_H
+
+/* OpenBIOS NVRAM partition */
+typedef struct {
+    uint8_t signature;
+    uint8_t checksum;
+    uint16_t len;       /* Big endian, length divided by 16 */
+    char name[12];
+} ChrpNvramPartHdr;
+
+#define CHRP_NVPART_SYSTEM 0x70
+#define CHRP_NVPART_FREE 0x7f
+
+static inline void
+chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size)
+{
+    unsigned int i, sum;
+    uint8_t *tmpptr;
+
+    /* Length divided by 16 */
+    header->len = cpu_to_be16(size >> 4);
+
+    /* Checksum */
+    tmpptr = (uint8_t *)header;
+    sum = *tmpptr;
+    for (i = 0; i < 14; i++) {
+        sum += tmpptr[2 + i];
+        sum = (sum + ((sum & 0xff00) >> 8)) & 0xff;
+    }
+    header->checksum = sum & 0xff;
+}
+
+int chrp_nvram_create_system_partition(uint8_t *data, int min_len);
+int chrp_nvram_create_free_partition(uint8_t *data, int len);
+
+#endif
diff --git a/include/hw/nvram/openbios_firmware_abi.h b/include/hw/nvram/sun_nvram.h
similarity index 50%
rename from include/hw/nvram/openbios_firmware_abi.h
rename to include/hw/nvram/sun_nvram.h
index 74cfd56180..68eaa60308 100644
--- a/include/hw/nvram/openbios_firmware_abi.h
+++ b/include/hw/nvram/sun_nvram.h
@@ -1,46 +1,5 @@
-#ifndef OPENBIOS_FIRMWARE_ABI_H
-#define OPENBIOS_FIRMWARE_ABI_H
-
-/* OpenBIOS NVRAM partition */
-struct OpenBIOS_nvpart_v1 {
-    uint8_t signature;
-    uint8_t checksum;
-    uint16_t len; // BE, length divided by 16
-    char name[12];
-};
-
-#define OPENBIOS_PART_SYSTEM 0x70
-#define OPENBIOS_PART_FREE 0x7f
-
-static inline void
-OpenBIOS_finish_partition(struct OpenBIOS_nvpart_v1 *header, uint32_t size)
-{
-    unsigned int i, sum;
-    uint8_t *tmpptr;
-
-    // Length divided by 16
-    header->len = cpu_to_be16(size >> 4);
-
-    // Checksum
-    tmpptr = (uint8_t *)header;
-    sum = *tmpptr;
-    for (i = 0; i < 14; i++) {
-        sum += tmpptr[2 + i];
-        sum = (sum + ((sum & 0xff00) >> 8)) & 0xff;
-    }
-    header->checksum = sum & 0xff;
-}
-
-static inline uint32_t
-OpenBIOS_set_var(uint8_t *nvram, uint32_t addr, const char *str)
-{
-    uint32_t len;
-
-    len = strlen(str) + 1;
-    memcpy(&nvram[addr], str, len);
-
-    return addr + len;
-}
+#ifndef SUN_NVRAM_H
+#define SUN_NVRAM_H
 
 /* Sun IDPROM structure at the end of NVRAM */
 /* from http://www.squirrel.com/squirrel/sun-nvram-hostid.faq.html */
@@ -72,4 +31,4 @@ Sun_init_header(struct Sun_nvram *header, const uint8_t *macaddr, int machine_id
 
     header->checksum = tmp;
 }
-#endif /* OPENBIOS_FIRMWARE_ABI_H */
+#endif /* SUN_NVRAM_H */
diff --git a/include/hw/or-irq.h b/include/hw/or-irq.h
new file mode 100644
index 0000000000..d400a8120b
--- /dev/null
+++ b/include/hw/or-irq.h
@@ -0,0 +1,44 @@
+/*
+ * QEMU IRQ/GPIO common code.
+ *
+ * Copyright (c) 2016 Alistair Francis <alistair@alistair23.me>.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define TYPE_OR_IRQ "or-irq"
+
+#define MAX_OR_LINES      16
+
+typedef struct OrIRQState qemu_or_irq;
+
+#define OR_IRQ(obj) OBJECT_CHECK(qemu_or_irq, (obj), TYPE_OR_IRQ)
+
+struct OrIRQState {
+    DeviceState parent_obj;
+
+    qemu_irq out_irq;
+    qemu_irq *in_irqs;
+    bool levels[MAX_OR_LINES];
+    uint16_t num_lines;
+};
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 5adc603d47..092294ed5a 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -53,8 +53,10 @@ struct sPAPRPHBState {
     bool dr_enabled;
 
     MemoryRegion memspace, iospace;
-    hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow, msiwindow;
+    hwaddr mem_win_addr, mem_win_size, mem64_win_addr, mem64_win_size;
+    uint64_t mem64_win_pciaddr;
+    hwaddr io_win_addr, io_win_size;
+    MemoryRegion mem32window, mem64window, iowindow, msiwindow;
 
     uint32_t dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS];
     hwaddr dma_win_addr, dma_win_size;
@@ -75,20 +77,27 @@ struct sPAPRPHBState {
     bool ddw_enabled;
     uint64_t page_size_mask;
     uint64_t dma64_win_addr;
-};
 
-#define SPAPR_PCI_MAX_INDEX          255
+    uint32_t numa_node;
 
-#define SPAPR_PCI_BASE_BUID          0x800000020000000ULL
+    /* Fields for migration compatibility hacks */
+    bool pre_2_8_migration;
+    uint32_t mig_liobn;
+    hwaddr mig_mem_win_addr, mig_mem_win_size;
+    hwaddr mig_io_win_addr, mig_io_win_size;
+};
 
 #define SPAPR_PCI_MEM_WIN_BUS_OFFSET 0x80000000ULL
+#define SPAPR_PCI_MEM32_WIN_SIZE     \
+    ((1ULL << 32) - SPAPR_PCI_MEM_WIN_BUS_OFFSET)
+#define SPAPR_PCI_MEM64_WIN_SIZE     0x10000000000ULL /* 1 TiB */
 
-#define SPAPR_PCI_WINDOW_BASE        0x10000000000ULL
-#define SPAPR_PCI_WINDOW_SPACING     0x1000000000ULL
-#define SPAPR_PCI_MMIO_WIN_OFF       0xA0000000
-#define SPAPR_PCI_MMIO_WIN_SIZE      (SPAPR_PCI_WINDOW_SPACING - \
-                                     SPAPR_PCI_MEM_WIN_BUS_OFFSET)
-#define SPAPR_PCI_IO_WIN_OFF         0x80000000
+/* Without manual configuration, all PCI outbound windows will be
+ * within this range */
+#define SPAPR_PCI_BASE               (1ULL << 45) /* 32 TiB */
+#define SPAPR_PCI_LIMIT              (1ULL << 46) /* 64 TiB */
+
+#define SPAPR_PCI_2_7_MMIO_WIN_SIZE  0xf80000000
 #define SPAPR_PCI_IO_WIN_SIZE        0x10000
 
 #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
@@ -106,8 +115,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
                           uint32_t xics_phandle,
                           void *fdt);
 
-void spapr_pci_msi_init(sPAPRMachineState *spapr, hwaddr addr);
-
 void spapr_pci_rtas_init(void);
 
 sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState *spapr, uint64_t buid);
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 929ec2fb07..772692f1b2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -13,9 +13,12 @@
 /* PCI bus */
 
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_BUS_NUM(x)          (((x) >> 8) & 0xff)
 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
 #define PCI_BUILD_BDF(bus, devfn)     ((bus << 8) | (devfn))
+#define PCI_BUS_MAX             256
+#define PCI_DEVFN_MAX           256
 #define PCI_SLOT_MAX            32
 #define PCI_FUNC_MAX            8
 
@@ -79,6 +82,7 @@
 #define PCI_DEVICE_ID_VIRTIO_SCSI        0x1004
 #define PCI_DEVICE_ID_VIRTIO_RNG         0x1005
 #define PCI_DEVICE_ID_VIRTIO_9P          0x1009
+#define PCI_DEVICE_ID_VIRTIO_VSOCK       0x1012
 
 #define PCI_VENDOR_ID_REDHAT             0x1b36
 #define PCI_DEVICE_ID_REDHAT_BRIDGE      0x0001
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 847fd7db33..d5891cd30e 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -45,7 +45,6 @@ void pci_bridge_update_mappings(PCIBridge *br);
 void pci_bridge_write_config(PCIDevice *d,
                              uint32_t address, uint32_t val, int len);
 void pci_bridge_disable_base_limit(PCIDevice *dev);
-void pci_bridge_reset_reg(PCIDevice *dev);
 void pci_bridge_reset(DeviceState *qdev);
 
 void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename);
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index e167bf7520..f7b64db00c 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -53,7 +53,6 @@ struct PCIESlot {
 };
 
 void pcie_chassis_create(uint8_t chassis_number);
-void pcie_main_chassis_create(void);
 PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
 int pcie_chassis_add_slot(struct PCIESlot *slot);
 void pcie_chassis_del_slot(PCIESlot *s);
diff --git a/include/hw/ppc/fdt.h b/include/hw/ppc/fdt.h
new file mode 100644
index 0000000000..0cabb6af04
--- /dev/null
+++ b/include/hw/ppc/fdt.h
@@ -0,0 +1,29 @@
+/*
+ * QEMU PowerPC helper routines for the device tree.
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_FDT_H
+#define PPC_FDT_H
+
+#include "qemu/error-report.h"
+#include "target-ppc/cpu-qom.h"
+
+#define _FDT(exp)                                                  \
+    do {                                                           \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            error_report("error creating device tree: %s: %s",   \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+size_t ppc_create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
+                                  size_t maxsize);
+
+#endif /* PPC_FDT_H */
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
new file mode 100644
index 0000000000..df98a72006
--- /dev/null
+++ b/include/hw/ppc/pnv.h
@@ -0,0 +1,129 @@
+/*
+ * QEMU PowerPC PowerNV various definitions
+ *
+ * Copyright (c) 2014-2016 BenH, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PPC_PNV_H
+#define _PPC_PNV_H
+
+#include "hw/boards.h"
+#include "hw/sysbus.h"
+#include "hw/ppc/pnv_lpc.h"
+
+#define TYPE_PNV_CHIP "powernv-chip"
+#define PNV_CHIP(obj) OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP)
+#define PNV_CHIP_CLASS(klass) \
+     OBJECT_CLASS_CHECK(PnvChipClass, (klass), TYPE_PNV_CHIP)
+#define PNV_CHIP_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(PnvChipClass, (obj), TYPE_PNV_CHIP)
+
+typedef enum PnvChipType {
+    PNV_CHIP_POWER8E,     /* AKA Murano (default) */
+    PNV_CHIP_POWER8,      /* AKA Venice */
+    PNV_CHIP_POWER8NVL,   /* AKA Naples */
+    PNV_CHIP_POWER9,      /* AKA Nimbus */
+} PnvChipType;
+
+typedef struct PnvChip {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    uint32_t     chip_id;
+    uint64_t     ram_start;
+    uint64_t     ram_size;
+
+    uint32_t     nr_cores;
+    uint64_t     cores_mask;
+    void         *cores;
+
+    hwaddr       xscom_base;
+    MemoryRegion xscom_mmio;
+    MemoryRegion xscom;
+    AddressSpace xscom_as;
+
+    PnvLpcController lpc;
+} PnvChip;
+
+typedef struct PnvChipClass {
+    /*< private >*/
+    SysBusDeviceClass parent_class;
+
+    /*< public >*/
+    const char *cpu_model;
+    PnvChipType  chip_type;
+    uint64_t     chip_cfam_id;
+    uint64_t     cores_mask;
+
+    hwaddr       xscom_base;
+    hwaddr       xscom_core_base;
+
+    uint32_t (*core_pir)(PnvChip *chip, uint32_t core_id);
+} PnvChipClass;
+
+#define TYPE_PNV_CHIP_POWER8E TYPE_PNV_CHIP "-POWER8E"
+#define PNV_CHIP_POWER8E(obj) \
+    OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP_POWER8E)
+
+#define TYPE_PNV_CHIP_POWER8 TYPE_PNV_CHIP "-POWER8"
+#define PNV_CHIP_POWER8(obj) \
+    OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP_POWER8)
+
+#define TYPE_PNV_CHIP_POWER8NVL TYPE_PNV_CHIP "-POWER8NVL"
+#define PNV_CHIP_POWER8NVL(obj) \
+    OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP_POWER8NVL)
+
+#define TYPE_PNV_CHIP_POWER9 TYPE_PNV_CHIP "-POWER9"
+#define PNV_CHIP_POWER9(obj) \
+    OBJECT_CHECK(PnvChip, (obj), TYPE_PNV_CHIP_POWER9)
+
+/*
+ * This generates a HW chip id depending on an index:
+ *
+ *    0x0, 0x1, 0x10, 0x11
+ *
+ * 4 chips should be the maximum
+ */
+#define PNV_CHIP_HWID(i) ((((i) & 0x3e) << 3) | ((i) & 0x1))
+
+#define TYPE_POWERNV_MACHINE       MACHINE_TYPE_NAME("powernv")
+#define POWERNV_MACHINE(obj) \
+    OBJECT_CHECK(PnvMachineState, (obj), TYPE_POWERNV_MACHINE)
+
+typedef struct PnvMachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    uint32_t     initrd_base;
+    long         initrd_size;
+
+    uint32_t     num_chips;
+    PnvChip      **chips;
+
+    ISABus       *isa_bus;
+} PnvMachineState;
+
+#define PNV_FDT_ADDR          0x01000000
+#define PNV_TIMEBASE_FREQ     512000000ULL
+
+/*
+ * POWER8 MMIO base addresses
+ */
+#define PNV_XSCOM_SIZE        0x800000000ull
+#define PNV_XSCOM_BASE(chip)                                            \
+    (chip->xscom_base + ((uint64_t)(chip)->chip_id) * PNV_XSCOM_SIZE)
+
+#endif /* _PPC_PNV_H */
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
new file mode 100644
index 0000000000..2955a41c90
--- /dev/null
+++ b/include/hw/ppc/pnv_core.h
@@ -0,0 +1,50 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PPC_PNV_CORE_H
+#define _PPC_PNV_CORE_H
+
+#include "hw/cpu/core.h"
+
+#define TYPE_PNV_CORE "powernv-cpu-core"
+#define PNV_CORE(obj) \
+    OBJECT_CHECK(PnvCore, (obj), TYPE_PNV_CORE)
+#define PNV_CORE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(PnvCoreClass, (klass), TYPE_PNV_CORE)
+#define PNV_CORE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(PnvCoreClass, (obj), TYPE_PNV_CORE)
+
+typedef struct PnvCore {
+    /*< private >*/
+    CPUCore parent_obj;
+
+    /*< public >*/
+    void *threads;
+    uint32_t pir;
+
+    MemoryRegion xscom_regs;
+} PnvCore;
+
+typedef struct PnvCoreClass {
+    DeviceClass parent_class;
+    ObjectClass *cpu_oc;
+} PnvCoreClass;
+
+extern char *pnv_core_typename(const char *model);
+
+#endif /* _PPC_PNV_CORE_H */
diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
new file mode 100644
index 0000000000..38e5506975
--- /dev/null
+++ b/include/hw/ppc/pnv_lpc.h
@@ -0,0 +1,67 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PPC_PNV_LPC_H
+#define _PPC_PNV_LPC_H
+
+#define TYPE_PNV_LPC "pnv-lpc"
+#define PNV_LPC(obj) \
+     OBJECT_CHECK(PnvLpcController, (obj), TYPE_PNV_LPC)
+
+typedef struct PnvLpcController {
+    DeviceState parent;
+
+    uint64_t eccb_stat_reg;
+    uint32_t eccb_data_reg;
+
+    /* OPB bus */
+    MemoryRegion opb_mr;
+    AddressSpace opb_as;
+
+    /* ISA IO and Memory space */
+    MemoryRegion isa_io;
+    MemoryRegion isa_mem;
+
+    /* Windows from OPB to ISA (aliases) */
+    MemoryRegion opb_isa_io;
+    MemoryRegion opb_isa_mem;
+    MemoryRegion opb_isa_fw;
+
+    /* Registers */
+    MemoryRegion lpc_hc_regs;
+    MemoryRegion opb_master_regs;
+
+    /* OPB Master LS registers */
+    uint32_t opb_irq_stat;
+    uint32_t opb_irq_mask;
+    uint32_t opb_irq_pol;
+    uint32_t opb_irq_input;
+
+    /* LPC HC registers */
+    uint32_t lpc_hc_fw_seg_idsel;
+    uint32_t lpc_hc_fw_rd_acc_size;
+    uint32_t lpc_hc_irqser_ctrl;
+    uint32_t lpc_hc_irqmask;
+    uint32_t lpc_hc_irqstat;
+    uint32_t lpc_hc_error_addr;
+
+    /* XSCOM registers */
+    MemoryRegion xscom_regs;
+} PnvLpcController;
+
+#endif /* _PPC_PNV_LPC_H */
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
new file mode 100644
index 0000000000..0faa1847bf
--- /dev/null
+++ b/include/hw/ppc/pnv_xscom.h
@@ -0,0 +1,75 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus definitions
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _PPC_PNV_XSCOM_H
+#define _PPC_PNV_XSCOM_H
+
+#include "qom/object.h"
+
+typedef struct PnvXScomInterface {
+    Object parent;
+} PnvXScomInterface;
+
+#define TYPE_PNV_XSCOM_INTERFACE "pnv-xscom-interface"
+#define PNV_XSCOM_INTERFACE(obj) \
+     OBJECT_CHECK(PnvXScomInterface, (obj), TYPE_PNV_XSCOM_INTERFACE)
+#define PNV_XSCOM_INTERFACE_CLASS(klass)                \
+    OBJECT_CLASS_CHECK(PnvXScomInterfaceClass, (klass), \
+                       TYPE_PNV_XSCOM_INTERFACE)
+#define PNV_XSCOM_INTERFACE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(PnvXScomInterfaceClass, (obj), TYPE_PNV_XSCOM_INTERFACE)
+
+typedef struct PnvXScomInterfaceClass {
+    InterfaceClass parent;
+    int (*populate)(PnvXScomInterface *dev, void *fdt, int offset);
+} PnvXScomInterfaceClass;
+
+/*
+ * Layout of the XSCOM PCB addresses of EX core 1 (POWER 8)
+ *
+ *   GPIO        0x1100xxxx
+ *   SCOM        0x1101xxxx
+ *   OHA         0x1102xxxx
+ *   CLOCK CTL   0x1103xxxx
+ *   FIR         0x1104xxxx
+ *   THERM       0x1105xxxx
+ *   <reserved>  0x1106xxxx
+ *               ..
+ *               0x110Exxxx
+ *   PCB SLAVE   0x110Fxxxx
+ */
+
+#define PNV_XSCOM_EX_CORE_BASE(base, i) (base | (((uint64_t)i) << 24))
+#define PNV_XSCOM_EX_CORE_SIZE    0x100000
+
+#define PNV_XSCOM_LPC_BASE        0xb0020
+#define PNV_XSCOM_LPC_SIZE        0x4
+
+extern void pnv_xscom_realize(PnvChip *chip, Error **errp);
+extern int pnv_xscom_populate(PnvChip *chip, void *fdt, int offset);
+
+extern void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset,
+                                    MemoryRegion *mr);
+extern void pnv_xscom_region_init(MemoryRegion *mr,
+                                  struct Object *owner,
+                                  const MemoryRegionOps *ops,
+                                  void *opaque,
+                                  const char *name,
+                                  uint64_t size);
+
+#endif /* _PPC_PNV_XSCOM_H */
diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h
index 3b01ae8314..66e57a5194 100644
--- a/include/hw/ppc/ppc4xx.h
+++ b/include/hw/ppc/ppc4xx.h
@@ -55,10 +55,4 @@ void ppc4xx_sdram_init (CPUPPCState *env, qemu_irq irq, int nbanks,
 
 #define TYPE_PPC4xx_PCI_HOST_BRIDGE "ppc4xx-pcihost"
 
-PCIBus *ppc4xx_pci_init(CPUPPCState *env, qemu_irq pci_irqs[4],
-                        hwaddr config_space,
-                        hwaddr int_ack,
-                        hwaddr special_cycle,
-                        hwaddr registers);
-
 #endif /* PPC4XX_H */
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index caf7be919f..bd5bcf70de 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -6,12 +6,14 @@
 #include "hw/ppc/xics.h"
 #include "hw/ppc/spapr_drc.h"
 #include "hw/mem/pc-dimm.h"
+#include "hw/ppc/spapr_ovec.h"
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
 struct sPAPRNVRAM;
 typedef struct sPAPRConfigureConnectorState sPAPRConfigureConnectorState;
 typedef struct sPAPREventLogEntry sPAPREventLogEntry;
+typedef struct sPAPREventSource sPAPREventSource;
 
 #define HPTE64_V_HPTE_DIRTY     0x0000000000000040ULL
 #define SPAPR_ENTRY_POINT       0x100
@@ -39,6 +41,11 @@ struct sPAPRMachineClass {
     /*< public >*/
     bool dr_lmb_enabled;       /* enable dynamic-reconfig/hotplug of LMBs */
     bool use_ohci_by_default;  /* use USB-OHCI instead of XHCI */
+    const char *tcg_default_cpu; /* which (TCG) CPU to simulate by default */
+    void (*phb_placement)(sPAPRMachineState *spapr, uint32_t index,
+                          uint64_t *buid, hwaddr *pio, 
+                          hwaddr *mmio32, hwaddr *mmio64,
+                          unsigned n_dma, uint32_t *liobns, Error **errp);
 };
 
 /**
@@ -58,17 +65,23 @@ struct sPAPRMachineState {
     uint32_t htab_shift;
     hwaddr rma_size;
     int vrma_adjust;
-    hwaddr fdt_addr, rtas_addr;
     ssize_t rtas_size;
     void *rtas_blob;
-    void *fdt_skel;
+    long kernel_size;
+    bool kernel_le;
+    uint32_t initrd_base;
+    long initrd_size;
     uint64_t rtc_offset; /* Now used only during incoming migration */
     struct PPCTimebase tb;
     bool has_graphics;
+    sPAPROptionVector *ov5;         /* QEMU-supported option vectors */
+    sPAPROptionVector *ov5_cas;     /* negotiated (via CAS) option vectors */
+    bool cas_reboot;
 
-    uint32_t check_exception_irq;
     Notifier epow_notifier;
     QTAILQ_HEAD(, sPAPREventLogEntry) pending_events;
+    bool use_hotplug_event_source;
+    sPAPREventSource *event_sources;
 
     /* Migration state */
     int htab_save_index;
@@ -368,9 +381,6 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn);
 target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
                              target_ulong *args);
 
-int spapr_allocate_irq(int hint, bool lsi);
-int spapr_allocate_irq_block(int num, bool lsi, bool msi);
-
 /* ibm,set-eeh-option */
 #define RTAS_EEH_DISABLE                 0
 #define RTAS_EEH_ENABLE                  1
@@ -525,8 +535,8 @@ void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn);
 target_ulong spapr_rtas_call(PowerPCCPU *cpu, sPAPRMachineState *sm,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
-int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
-                                 hwaddr rtas_size);
+void spapr_dt_rtas_tokens(void *fdt, int rtas);
+void spapr_load_rtas(sPAPRMachineState *spapr, void *fdt, hwaddr addr);
 
 #define SPAPR_TCE_PAGE_SHIFT   12
 #define SPAPR_TCE_PAGE_SIZE    (1ULL << SPAPR_TCE_PAGE_SHIFT)
@@ -576,10 +586,11 @@ struct sPAPREventLogEntry {
 };
 
 void spapr_events_init(sPAPRMachineState *sm);
-void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
+void spapr_dt_events(sPAPRMachineState *sm, void *fdt);
 int spapr_h_cas_compose_response(sPAPRMachineState *sm,
                                  target_ulong addr, target_ulong size,
-                                 bool cpu_update, bool memory_update);
+                                 bool cpu_update,
+                                 sPAPROptionVector *ov5_updates);
 sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn);
 void spapr_tce_table_enable(sPAPRTCETable *tcet,
                             uint32_t page_shift, uint64_t bus_offset,
@@ -599,6 +610,10 @@ void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
                                        uint32_t count);
 void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
                                           uint32_t count);
+void spapr_hotplug_req_add_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                            uint32_t count, uint32_t index);
+void spapr_hotplug_req_remove_by_count_indexed(sPAPRDRConnectorType drc_type,
+                                               uint32_t count, uint32_t index);
 void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp);
 void *spapr_populate_hotplug_cpu_dt(CPUState *cs, int *fdt_offset,
                                     sPAPRMachineState *spapr);
diff --git a/include/hw/ppc/spapr_cpu_core.h b/include/hw/ppc/spapr_cpu_core.h
index 1c9b3195cc..283969bafb 100644
--- a/include/hw/ppc/spapr_cpu_core.h
+++ b/include/hw/ppc/spapr_cpu_core.h
@@ -16,6 +16,10 @@
 #define TYPE_SPAPR_CPU_CORE "spapr-cpu-core"
 #define SPAPR_CPU_CORE(obj) \
     OBJECT_CHECK(sPAPRCPUCore, (obj), TYPE_SPAPR_CPU_CORE)
+#define SPAPR_CPU_CORE_CLASS(klass) \
+    OBJECT_CLASS_CHECK(sPAPRCPUCoreClass, (klass), TYPE_SPAPR_CPU_CORE)
+#define SPAPR_CPU_CORE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(sPAPRCPUCoreClass, (obj), TYPE_SPAPR_CPU_CORE)
 
 typedef struct sPAPRCPUCore {
     /*< private >*/
@@ -23,9 +27,13 @@ typedef struct sPAPRCPUCore {
 
     /*< public >*/
     void *threads;
-    ObjectClass *cpu_class;
 } sPAPRCPUCore;
 
+typedef struct sPAPRCPUCoreClass {
+    DeviceClass parent_class;
+    ObjectClass *cpu_class;
+} sPAPRCPUCoreClass;
+
 void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                          Error **errp);
 char *spapr_get_cpu_core_type(const char *model);
@@ -33,4 +41,5 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                      Error **errp);
 void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
                        Error **errp);
+void spapr_cpu_core_class_init(ObjectClass *oc, void *data);
 #endif
diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
new file mode 100644
index 0000000000..355a34411f
--- /dev/null
+++ b/include/hw/ppc/spapr_ovec.h
@@ -0,0 +1,71 @@
+/*
+ * QEMU SPAPR Option/Architecture Vector Definitions
+ *
+ * Each architecture option is organized/documented by the following
+ * in LoPAPR 1.1, Table 244:
+ *
+ *   <vector number>: the bit-vector in which the option is located
+ *   <vector byte>: the byte offset of the vector entry
+ *   <vector bit>: the bit offset within the vector entry
+ *
+ * where each vector entry can be one or more bytes.
+ *
+ * Firmware expects a somewhat literal encoding of this bit-vector
+ * structure, where each entry is stored in little-endian so that the
+ * byte ordering reflects that of the documentation, but where each bit
+ * offset is from "left-to-right" in the traditional representation of
+ * a byte value where the MSB is the left-most bit. Thus, each
+ * individual byte encodes the option bits in reverse order of the
+ * documented bit.
+ *
+ * These definitions/helpers attempt to abstract away this internal
+ * representation so that we can define/set/test for individual option
+ * bits using only the documented values. This is done mainly by relying
+ * on a bitmap to approximate the documented "bit-vector" structure and
+ * handling conversations to-from the internal representation under the
+ * covers.
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef _SPAPR_OVEC_H
+#define _SPAPR_OVEC_H
+
+#include "cpu.h"
+#include "migration/vmstate.h"
+
+typedef struct sPAPROptionVector sPAPROptionVector;
+
+#define OV_BIT(byte, bit) ((byte - 1) * BITS_PER_BYTE + bit)
+
+/* option vector 5 */
+#define OV5_DRCONF_MEMORY       OV_BIT(2, 2)
+#define OV5_FORM1_AFFINITY      OV_BIT(5, 0)
+#define OV5_HP_EVT              OV_BIT(6, 5)
+
+/* interfaces */
+sPAPROptionVector *spapr_ovec_new(void);
+sPAPROptionVector *spapr_ovec_clone(sPAPROptionVector *ov_orig);
+void spapr_ovec_intersect(sPAPROptionVector *ov,
+                          sPAPROptionVector *ov1,
+                          sPAPROptionVector *ov2);
+bool spapr_ovec_diff(sPAPROptionVector *ov,
+                     sPAPROptionVector *ov_old,
+                     sPAPROptionVector *ov_new);
+void spapr_ovec_cleanup(sPAPROptionVector *ov);
+void spapr_ovec_set(sPAPROptionVector *ov, long bitnr);
+void spapr_ovec_clear(sPAPROptionVector *ov, long bitnr);
+bool spapr_ovec_test(sPAPROptionVector *ov, long bitnr);
+sPAPROptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector);
+int spapr_ovec_populate_dt(void *fdt, int fdt_offset,
+                           sPAPROptionVector *ov, const char *name);
+
+/* migration */
+extern const VMStateDescription vmstate_spapr_ovec;
+
+#endif /* !defined (_SPAPR_OVEC_H) */
diff --git a/include/hw/ppc/spapr_rtas.h b/include/hw/ppc/spapr_rtas.h
new file mode 100644
index 0000000000..383611f10f
--- /dev/null
+++ b/include/hw/ppc/spapr_rtas.h
@@ -0,0 +1,10 @@
+#ifndef HW_SPAPR_RTAS_H
+#define HW_SPAPR_RTAS_H
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args,
+                         uint32_t nret, uint64_t rets);
+#endif /* HW_SPAPR_RTAS_H */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index d4a1e2c8af..14f502240e 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -76,16 +76,12 @@ struct VIOsPAPRDevice {
 struct VIOsPAPRBus {
     BusState bus;
     uint32_t next_reg;
-    int (*init)(VIOsPAPRDevice *dev);
-    int (*devnode)(VIOsPAPRDevice *dev, void *fdt, int node_off);
 };
 
 extern VIOsPAPRBus *spapr_vio_bus_init(void);
 extern VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg);
-extern int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt);
-extern int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus);
-
-extern int spapr_vio_signal(VIOsPAPRDevice *dev, target_ulong mode);
+void spapr_dt_vdevice(VIOsPAPRBus *bus, void *fdt);
+extern gchar *spapr_vio_stdout_path(VIOsPAPRBus *bus);
 
 static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 {
@@ -137,8 +133,6 @@ void spapr_vscsi_create(VIOsPAPRBus *bus);
 
 VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus);
 
-void spapr_vio_quiesce(void);
-
 extern const VMStateDescription vmstate_spapr_vio;
 
 #define VMSTATE_SPAPR_VIO(_f, _s) \
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 2db9f938d3..3f0c31610a 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -85,7 +85,7 @@ struct XICSState {
     uint32_t nr_servers;
     uint32_t nr_irqs;
     ICPState *ss;
-    ICSState *ics;
+    QLIST_HEAD(, ICSState) ics;
 };
 
 #define TYPE_ICP "icp"
@@ -111,29 +111,39 @@ struct ICPState {
     DeviceState parent_obj;
     /*< public >*/
     CPUState *cs;
+    ICSState *xirr_owner;
     uint32_t xirr;
     uint8_t pending_priority;
     uint8_t mfrr;
     qemu_irq output;
     bool cap_irq_xics_enabled;
+
+    XICSState *xics;
 };
 
-#define TYPE_ICS "ics"
-#define ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS)
+#define TYPE_ICS_BASE "ics-base"
+#define ICS_BASE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_BASE)
+
+/* Retain ics for sPAPR for migration from existing sPAPR guests */
+#define TYPE_ICS_SIMPLE "ics"
+#define ICS_SIMPLE(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_SIMPLE)
 
-#define TYPE_KVM_ICS "icskvm"
-#define KVM_ICS(obj) OBJECT_CHECK(ICSState, (obj), TYPE_KVM_ICS)
+#define TYPE_ICS_KVM "icskvm"
+#define ICS_KVM(obj) OBJECT_CHECK(ICSState, (obj), TYPE_ICS_KVM)
 
-#define ICS_CLASS(klass) \
-     OBJECT_CLASS_CHECK(ICSStateClass, (klass), TYPE_ICS)
-#define ICS_GET_CLASS(obj) \
-     OBJECT_GET_CLASS(ICSStateClass, (obj), TYPE_ICS)
+#define ICS_BASE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(ICSStateClass, (klass), TYPE_ICS_BASE)
+#define ICS_BASE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(ICSStateClass, (obj), TYPE_ICS_BASE)
 
 struct ICSStateClass {
     DeviceClass parent_class;
 
     void (*pre_save)(ICSState *s);
     int (*post_load)(ICSState *s, int version_id);
+    void (*reject)(ICSState *s, uint32_t irq);
+    void (*resend)(ICSState *s);
+    void (*eoi)(ICSState *s, uint32_t irq);
 };
 
 struct ICSState {
@@ -145,11 +155,12 @@ struct ICSState {
     qemu_irq *qirqs;
     ICSIRQState *irqs;
     XICSState *xics;
+    QLIST_ENTRY(ICSState) list;
 };
 
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
 {
-    return (nr >= ics->offset)
+    return (ics->offset != 0) && (nr >= ics->offset)
         && (nr < (ics->offset + ics->nr_irqs));
 }
 
@@ -172,29 +183,31 @@ struct ICSIRQState {
 #define XICS_IRQS_SPAPR               1024
 
 qemu_irq xics_get_qirq(XICSState *icp, int irq);
-int xics_spapr_alloc(XICSState *icp, int src, int irq_hint, bool lsi,
-                     Error **errp);
-int xics_spapr_alloc_block(XICSState *icp, int src, int num, bool lsi,
-                           bool align, Error **errp);
+int xics_spapr_alloc(XICSState *icp, int irq_hint, bool lsi, Error **errp);
+int xics_spapr_alloc_block(XICSState *icp, int num, bool lsi, bool align,
+                           Error **errp);
 void xics_spapr_free(XICSState *icp, int irq, int num);
+void spapr_dt_xics(XICSState *xics, void *fdt, uint32_t phandle);
 
 void xics_cpu_setup(XICSState *icp, PowerPCCPU *cpu);
 void xics_cpu_destroy(XICSState *icp, PowerPCCPU *cpu);
+void xics_set_nr_servers(XICSState *xics, uint32_t nr_servers,
+                         const char *typename, Error **errp);
 
 /* Internal XICS interfaces */
 int xics_get_cpu_index_by_dt_id(int cpu_dt_id);
 
-void icp_set_cppr(XICSState *icp, int server, uint8_t cppr);
-void icp_set_mfrr(XICSState *icp, int server, uint8_t mfrr);
+void icp_set_cppr(ICPState *icp, uint8_t cppr);
+void icp_set_mfrr(ICPState *icp, uint8_t mfrr);
 uint32_t icp_accept(ICPState *ss);
 uint32_t icp_ipoll(ICPState *ss, uint32_t *mfrr);
-void icp_eoi(XICSState *icp, int server, uint32_t xirr);
+void icp_eoi(ICPState *icp, uint32_t xirr);
 
-void ics_write_xive(ICSState *ics, int nr, int server,
-                    uint8_t priority, uint8_t saved_priority);
+void ics_simple_write_xive(ICSState *ics, int nr, int server,
+                           uint8_t priority, uint8_t saved_priority);
 
 void ics_set_irq_type(ICSState *ics, int srcno, bool lsi);
 
-int xics_find_source(XICSState *icp, int irq);
+ICSState *xics_find_source(XICSState *icp, int irq);
 
 #endif /* XICS_H */
diff --git a/include/hw/ptimer.h b/include/hw/ptimer.h
index e397db5bdb..48cccbdb51 100644
--- a/include/hw/ptimer.h
+++ b/include/hw/ptimer.h
@@ -12,11 +12,54 @@
 #include "qemu/timer.h"
 #include "migration/vmstate.h"
 
+/* The default ptimer policy retains backward compatibility with the legacy
+ * timers. Custom policies are adjusting the default one. Consider providing
+ * a correct policy for your timer.
+ *
+ * The rough edges of the default policy:
+ *  - Starting to run with a period = 0 emits error message and stops the
+ *    timer without a trigger.
+ *
+ *  - Setting period to 0 of the running timer emits error message and
+ *    stops the timer without a trigger.
+ *
+ *  - Starting to run with counter = 0 or setting it to "0" while timer
+ *    is running causes a trigger and reloads counter with a limit value.
+ *    If limit = 0, ptimer emits error message and stops the timer.
+ *
+ *  - Counter value of the running timer is one less than the actual value.
+ *
+ *  - Changing period/frequency of the running timer loses time elapsed
+ *    since the last period, effectively restarting the timer with a
+ *    counter = counter value at the moment of change (.i.e. one less).
+ */
+#define PTIMER_POLICY_DEFAULT               0
+
+/* Periodic timer counter stays with "0" for a one period before wrapping
+ * around.  */
+#define PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD (1 << 0)
+
+/* Running periodic timer that has counter = limit = 0 would continuously
+ * re-trigger every period.  */
+#define PTIMER_POLICY_CONTINUOUS_TRIGGER    (1 << 1)
+
+/* Starting to run with/setting counter to "0" won't trigger immediately,
+ * but after a one period for both oneshot and periodic modes.  */
+#define PTIMER_POLICY_NO_IMMEDIATE_TRIGGER  (1 << 2)
+
+/* Starting to run with/setting counter to "0" won't re-load counter
+ * immediately, but after a one period.  */
+#define PTIMER_POLICY_NO_IMMEDIATE_RELOAD   (1 << 3)
+
+/* Make counter value of the running timer represent the actual value and
+ * not the one less.  */
+#define PTIMER_POLICY_NO_COUNTER_ROUND_DOWN (1 << 4)
+
 /* ptimer.c */
 typedef struct ptimer_state ptimer_state;
 typedef void (*ptimer_cb)(void *opaque);
 
-ptimer_state *ptimer_init(QEMUBH *bh);
+ptimer_state *ptimer_init(QEMUBH *bh, uint8_t policy_mask);
 void ptimer_set_period(ptimer_state *s, int64_t period);
 void ptimer_set_freq(ptimer_state *s, uint32_t freq);
 uint64_t ptimer_get_limit(ptimer_state *s);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 4b4b33bec8..2c973473f7 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -224,7 +224,7 @@ typedef struct BusChild {
 struct BusState {
     Object obj;
     DeviceState *parent;
-    const char *name;
+    char *name;
     HotplugHandler *hotplug_handler;
     int max_index;
     bool realized;
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 2a9d2f90e6..306bbab088 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -146,7 +146,7 @@ extern PropertyInfo qdev_prop_arraylen;
     DEFINE_PROP(_n, _s, _f, qdev_prop_ptr, void*)
 
 #define DEFINE_PROP_CHR(_n, _s, _f)             \
-    DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharDriverState*)
+    DEFINE_PROP(_n, _s, _f, qdev_prop_chr, CharBackend)
 #define DEFINE_PROP_STRING(_n, _s, _f)             \
     DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
 #define DEFINE_PROP_NETDEV(_n, _s, _f)             \
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index 1da63e361d..c96c862057 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -20,7 +20,7 @@
 #define MAX_DEVNO 65535
 #define MAX_SCHID 65535
 #define MAX_SSID 3
-#define MAX_CSSID 254 /* 255 is reserved */
+#define MAX_CSSID 255
 #define MAX_CHPID 255
 
 #define MAX_CIWS 62
diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h
index a0c1fc8083..6ecae00386 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -36,9 +36,12 @@ typedef struct S390CcwMachineClass {
 
     /*< public >*/
     bool ri_allowed;
+    bool cpu_model_allowed;
 } S390CcwMachineClass;
 
 /* runtime-instrumentation allowed by the machine */
 bool ri_allowed(void);
+/* cpu model allowed by the machine */
+bool cpu_model_allowed(void);
 
 #endif
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index ba28d1dd0e..3008a5148a 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -98,11 +98,14 @@ typedef struct SCCBHeader {
 } QEMU_PACKED SCCBHeader;
 
 #define SCCB_DATA_LEN (SCCB_SIZE - sizeof(SCCBHeader))
+#define SCCB_CPU_FEATURE_LEN 6
 
 /* CPU information */
 typedef struct CPUEntry {
     uint8_t address;
-    uint8_t reserved0[13];
+    uint8_t reserved0;
+    uint8_t features[SCCB_CPU_FEATURE_LEN];
+    uint8_t reserved2[6];
     uint8_t type;
     uint8_t reserved1;
 } QEMU_PACKED CPUEntry;
@@ -118,12 +121,18 @@ typedef struct ReadInfo {
     uint8_t  loadparm[8];               /* 24-31 */
     uint8_t  _reserved3[48 - 32];       /* 32-47 */
     uint64_t facilities;                /* 48-55 */
-    uint8_t  _reserved0[100 - 56];
+    uint8_t  _reserved0[76 - 56];       /* 56-75 */
+    uint32_t ibc_val;
+    uint8_t  conf_char[96 - 80];        /* 80-95 */
+    uint8_t  _reserved4[99 - 96];       /* 96-98 */
+    uint8_t mha_pow;
     uint32_t rnsize2;
     uint64_t rnmax2;
-    uint8_t  _reserved4[120-112];       /* 112-119 */
+    uint8_t  _reserved6[116 - 112];     /* 112-115 */
+    uint8_t  conf_char_ext[120 - 116];   /* 116-119 */
     uint16_t highest_cpu;
-    uint8_t  _reserved5[128 - 122];     /* 122-127 */
+    uint8_t  _reserved5[124 - 122];     /* 122-123 */
+    uint32_t hmfai;
     struct CPUEntry entries[0];
 } QEMU_PACKED ReadInfo;
 
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 94d7868105..9bad49e917 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -243,7 +243,6 @@ extern const struct SCSISense sense_code_SPACE_ALLOC_FAILED;
 uint32_t scsi_data_cdb_xfer(uint8_t *buf);
 uint32_t scsi_cdb_xfer(uint8_t *buf);
 int scsi_cdb_length(uint8_t *buf);
-int scsi_sense_valid(SCSISense sense);
 int scsi_build_sense(uint8_t *in_buf, int in_len,
                      uint8_t *buf, int len, bool fixed);
 
diff --git a/include/hw/sparc/sun4m.h b/include/hw/sparc/sun4m.h
index 9c17425a43..580d87b252 100644
--- a/include/hw/sparc/sun4m.h
+++ b/include/hw/sparc/sun4m.h
@@ -24,14 +24,6 @@ static inline void sparc_iommu_memory_write(void *opaque,
     sparc_iommu_memory_rw(opaque, addr, buf, len, 1);
 }
 
-/* slavio_intctl.c */
-void slavio_pic_info(Monitor *mon, DeviceState *dev);
-void slavio_irq_info(Monitor *mon, DeviceState *dev);
-
-/* sun4m.c */
-void sun4m_hmp_info_pic(Monitor *mon, const QDict *qdict);
-void sun4m_hmp_info_irq(Monitor *mon, const QDict *qdict);
-
 /* sparc32_dma.c */
 #include "hw/sparc/sparc32_dma.h"
 
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
index def3b4507e..bdfbcc0ffa 100644
--- a/include/hw/ssi/aspeed_smc.h
+++ b/include/hw/ssi/aspeed_smc.h
@@ -42,7 +42,8 @@ typedef struct AspeedSMCController {
     uint8_t conf_enable_w0;
     uint8_t max_slaves;
     const AspeedSegments *segments;
-    uint32_t mapping_window_size;
+    hwaddr flash_window_base;
+    uint32_t flash_window_size;
 } AspeedSMCController;
 
 typedef struct AspeedSMCFlash {
diff --git a/include/hw/ssi/stm32f2xx_spi.h b/include/hw/ssi/stm32f2xx_spi.h
new file mode 100644
index 0000000000..1cd73e4cd4
--- /dev/null
+++ b/include/hw/ssi/stm32f2xx_spi.h
@@ -0,0 +1,72 @@
+/*
+ * STM32F2XX SPI
+ *
+ * Copyright (c) 2014 Alistair Francis <alistair@alistair23.me>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef HW_STM32F2XX_SPI_H
+#define HW_STM32F2XX_SPI_H
+
+#include "hw/sysbus.h"
+#include "hw/hw.h"
+#include "hw/ssi/ssi.h"
+
+#define STM_SPI_CR1     0x00
+#define STM_SPI_CR2     0x04
+#define STM_SPI_SR      0x08
+#define STM_SPI_DR      0x0C
+#define STM_SPI_CRCPR   0x10
+#define STM_SPI_RXCRCR  0x14
+#define STM_SPI_TXCRCR  0x18
+#define STM_SPI_I2SCFGR 0x1C
+#define STM_SPI_I2SPR   0x20
+
+#define STM_SPI_CR1_SPE  (1 << 6)
+#define STM_SPI_CR1_MSTR (1 << 2)
+
+#define STM_SPI_SR_RXNE   1
+
+#define TYPE_STM32F2XX_SPI "stm32f2xx-spi"
+#define STM32F2XX_SPI(obj) \
+    OBJECT_CHECK(STM32F2XXSPIState, (obj), TYPE_STM32F2XX_SPI)
+
+typedef struct {
+    /* <private> */
+    SysBusDevice parent_obj;
+
+    /* <public> */
+    MemoryRegion mmio;
+
+    uint32_t spi_cr1;
+    uint32_t spi_cr2;
+    uint32_t spi_sr;
+    uint32_t spi_dr;
+    uint32_t spi_crcpr;
+    uint32_t spi_rxcrcr;
+    uint32_t spi_txcrcr;
+    uint32_t spi_i2scfgr;
+    uint32_t spi_i2spr;
+
+    qemu_irq irq;
+    SSIBus *ssi;
+} STM32F2XXSPIState;
+
+#endif /* HW_STM32F2XX_SPI_H */
diff --git a/include/hw/sysbus.h b/include/hw/sysbus.h
index e73a5b21ac..e88bb6dae0 100644
--- a/include/hw/sysbus.h
+++ b/include/hw/sysbus.h
@@ -75,7 +75,7 @@ struct SysBusDevice {
     uint32_t pio[QDEV_MAX_PIO];
 };
 
-typedef int FindSysbusDeviceFunc(SysBusDevice *sbdev, void *opaque);
+typedef void FindSysbusDeviceFunc(SysBusDevice *sbdev, void *opaque);
 
 void sysbus_init_mmio(SysBusDevice *dev, MemoryRegion *memory);
 MemoryRegion *sysbus_mmio_get_region(SysBusDevice *dev, int n);
diff --git a/include/hw/timer/arm_mptimer.h b/include/hw/timer/arm_mptimer.h
index b34cba00ce..c46d8d2309 100644
--- a/include/hw/timer/arm_mptimer.h
+++ b/include/hw/timer/arm_mptimer.h
@@ -27,12 +27,9 @@
 
 /* State of a single timer or watchdog block */
 typedef struct {
-    uint32_t count;
-    uint32_t load;
     uint32_t control;
     uint32_t status;
-    int64_t tick;
-    QEMUTimer *timer;
+    struct ptimer_state *timer;
     qemu_irq irq;
     MemoryRegion iomem;
 } TimerBlock;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 94dfae387a..c582de18c9 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -30,6 +30,9 @@
 #include <linux/vfio.h>
 #endif
 
+#define ERR_PREFIX "vfio error: %s: "
+#define WARN_PREFIX "vfio warning: %s: "
+
 /*#define DEBUG_VFIO*/
 #ifdef DEBUG_VFIO
 #define DPRINTF(fmt, ...) \
@@ -93,7 +96,7 @@ typedef struct VFIOGuestIOMMU {
     VFIOContainer *container;
     MemoryRegion *iommu;
     hwaddr iommu_offset;
-    Notifier n;
+    IOMMUNotifier n;
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
 
@@ -152,10 +155,10 @@ void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
 void vfio_region_exit(VFIORegion *region);
 void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
-VFIOGroup *vfio_get_group(int groupid, AddressSpace *as);
+VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
 void vfio_put_group(VFIOGroup *group);
 int vfio_get_device(VFIOGroup *group, const char *name,
-                    VFIODevice *vbasedev);
+                    VFIODevice *vbasedev, Error **errp);
 
 extern const MemoryRegionOps vfio_region_ops;
 extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
index cf7f0b5a69..6e90703cad 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -73,6 +73,9 @@ typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
 typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
                                            uint64_t start1, uint64_t size1,
                                            uint64_t start2, uint64_t size2);
+typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev,
+                                            uint64_t guest_cid);
+typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start);
 
 typedef struct VhostOps {
     VhostBackendType backend_type;
@@ -102,6 +105,8 @@ typedef struct VhostOps {
     vhost_requires_shm_log_op vhost_requires_shm_log;
     vhost_migration_done_op vhost_migration_done;
     vhost_backend_can_merge_op vhost_backend_can_merge;
+    vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid;
+    vhost_vsock_set_running_op vhost_vsock_set_running;
 } VhostOps;
 
 extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost-vsock.h b/include/hw/virtio/vhost-vsock.h
new file mode 100644
index 0000000000..7b9205fe3f
--- /dev/null
+++ b/include/hw/virtio/vhost-vsock.h
@@ -0,0 +1,41 @@
+/*
+ * Vhost vsock virtio device
+ *
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _QEMU_VHOST_VSOCK_H
+#define _QEMU_VHOST_VSOCK_H
+
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+
+#define TYPE_VHOST_VSOCK "vhost-vsock-device"
+#define VHOST_VSOCK(obj) \
+        OBJECT_CHECK(VHostVSock, (obj), TYPE_VHOST_VSOCK)
+
+typedef struct {
+    uint64_t guest_cid;
+    char *vhostfd;
+} VHostVSockConf;
+
+typedef struct {
+    /*< private >*/
+    VirtIODevice parent;
+    VHostVSockConf conf;
+    struct vhost_virtqueue vhost_vqs[2];
+    struct vhost_dev vhost_dev;
+    VirtQueue *event_vq;
+    QEMUTimer *post_load_timer;
+
+    /*< public >*/
+} VHostVSock;
+
+#endif /* _QEMU_VHOST_VSOCK_H */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index e433089ea9..1fe5aadef5 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -14,11 +14,12 @@ struct vhost_virtqueue {
     void *avail;
     void *used;
     int num;
+    unsigned long long desc_phys;
+    unsigned desc_size;
+    unsigned long long avail_phys;
+    unsigned avail_size;
     unsigned long long used_phys;
     unsigned used_size;
-    void *ring;
-    unsigned long long ring_phys;
-    unsigned ring_size;
     EventNotifier masked_notifier;
 };
 
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index 180bd8db5d..9734b4c446 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -80,14 +80,6 @@ typedef struct MultiReqBuffer {
     bool is_write;
 } MultiReqBuffer;
 
-void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
-                             VirtIOBlockReq *req);
-void virtio_blk_free_request(VirtIOBlockReq *req);
-
-void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb);
-
-void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb);
-
 void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
 
 #endif
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index f3e5ef3f5b..8a51e2c564 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -53,16 +53,16 @@ typedef struct VirtioBusClass {
     bool (*query_guest_notifiers)(DeviceState *d);
     int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assign);
     void (*vmstate_change)(DeviceState *d, bool running);
+    /*
+     * Expose the features the transport layer supports before
+     * the negotiation takes place.
+     */
+    void (*pre_plugged)(DeviceState *d, Error **errp);
     /*
      * transport independent init function.
      * This is called by virtio-bus just after the device is plugged.
      */
     void (*device_plugged)(DeviceState *d, Error **errp);
-    /*
-     * Re-evaluate setup after feature bits have been validated
-     * by the device backend.
-     */
-    void (*post_plugged)(DeviceState *d, Error **errp);
     /*
      * transport independent exit function.
      * This is called by virtio-bus just before the device is unplugged.
@@ -70,21 +70,11 @@ typedef struct VirtioBusClass {
     void (*device_unplugged)(DeviceState *d);
     int (*query_nvectors)(DeviceState *d);
     /*
-     * ioeventfd handling: if the transport implements ioeventfd_started,
-     * it must implement the other ioeventfd callbacks as well
-     */
-    /* Returns true if the ioeventfd has been started for the device. */
-    bool (*ioeventfd_started)(DeviceState *d);
-    /*
-     * Sets the 'ioeventfd started' state after the ioeventfd has been
-     * started/stopped for the device. err signifies whether an error
-     * had occurred.
+     * ioeventfd handling: if the transport implements ioeventfd_assign,
+     * it must implement ioeventfd_enabled as well.
      */
-    void (*ioeventfd_set_started)(DeviceState *d, bool started, bool err);
-    /* Returns true if the ioeventfd has been disabled for the device. */
-    bool (*ioeventfd_disabled)(DeviceState *d);
-    /* Sets the 'ioeventfd disabled' state for the device. */
-    void (*ioeventfd_set_disabled)(DeviceState *d, bool disabled);
+    /* Returns true if the ioeventfd is enabled for the device. */
+    bool (*ioeventfd_enabled)(DeviceState *d);
     /*
      * Assigns/deassigns the ioeventfd backing for the transport on
      * the device for queue number n. Returns an error value on
@@ -102,6 +92,21 @@ typedef struct VirtioBusClass {
 
 struct VirtioBusState {
     BusState parent_obj;
+
+    /*
+     * Set if ioeventfd has been started.
+     */
+    bool ioeventfd_started;
+
+    /*
+     * Set if ioeventfd has been grabbed by vhost.  When ioeventfd
+     * is grabbed by vhost, we track its started/stopped state (which
+     * depends in turn on the virtio status register), but do not
+     * register a handler for the ioeventfd.  When ioeventfd is
+     * released, if ioeventfd_started is true we finally register
+     * the handler so that QEMU's device model can use ioeventfd.
+     */
+    int ioeventfd_grabbed;
 };
 
 void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp);
@@ -111,9 +116,6 @@ void virtio_bus_device_unplugged(VirtIODevice *bus);
 uint16_t virtio_bus_get_vdev_id(VirtioBusState *bus);
 /* Get the config_len field of the plugged device. */
 size_t virtio_bus_get_vdev_config_len(VirtioBusState *bus);
-/* Get the features of the plugged device. */
-uint32_t virtio_bus_get_vdev_features(VirtioBusState *bus,
-                                    uint32_t requested_features);
 /* Get bad features of the plugged device. */
 uint32_t virtio_bus_get_vdev_bad_features(VirtioBusState *bus);
 /* Get config of the plugged device. */
@@ -133,10 +135,16 @@ static inline VirtIODevice *virtio_bus_get_device(VirtioBusState *bus)
     return (VirtIODevice *)qdev;
 }
 
+/* Return whether the proxy allows ioeventfd.  */
+bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus);
 /* Start the ioeventfd. */
-void virtio_bus_start_ioeventfd(VirtioBusState *bus);
+int virtio_bus_start_ioeventfd(VirtioBusState *bus);
 /* Stop the ioeventfd. */
 void virtio_bus_stop_ioeventfd(VirtioBusState *bus);
+/* Tell the bus that vhost is grabbing the ioeventfd. */
+int virtio_bus_grab_ioeventfd(VirtioBusState *bus);
+/* bus that vhost is not using the ioeventfd anymore. */
+void virtio_bus_release_ioeventfd(VirtioBusState *bus);
 /* Switch from/to the generic ioeventfd handler */
 int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign);
 
diff --git a/include/hw/virtio/virtio-crypto.h b/include/hw/virtio/virtio-crypto.h
new file mode 100644
index 0000000000..a00a0bfaba
--- /dev/null
+++ b/include/hw/virtio/virtio-crypto.h
@@ -0,0 +1,101 @@
+/*
+ * Virtio crypto Support
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef _QEMU_VIRTIO_CRYPTO_H
+#define _QEMU_VIRTIO_CRYPTO_H
+
+#include "standard-headers/linux/virtio_crypto.h"
+#include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
+#include "sysemu/cryptodev.h"
+
+
+#define DEBUG_VIRTIO_CRYPTO 0
+
+#define DPRINTF(fmt, ...) \
+do { \
+    if (DEBUG_VIRTIO_CRYPTO) { \
+        fprintf(stderr, "virtio_crypto: " fmt, ##__VA_ARGS__); \
+    } \
+} while (0)
+
+
+#define TYPE_VIRTIO_CRYPTO "virtio-crypto-device"
+#define VIRTIO_CRYPTO(obj) \
+        OBJECT_CHECK(VirtIOCrypto, (obj), TYPE_VIRTIO_CRYPTO)
+#define VIRTIO_CRYPTO_GET_PARENT_CLASS(obj) \
+        OBJECT_GET_PARENT_CLASS(obj, TYPE_VIRTIO_CRYPTO)
+
+
+typedef struct VirtIOCryptoConf {
+    CryptoDevBackend *cryptodev;
+
+    /* Supported service mask */
+    uint32_t crypto_services;
+
+    /* Detailed algorithms mask */
+    uint32_t cipher_algo_l;
+    uint32_t cipher_algo_h;
+    uint32_t hash_algo;
+    uint32_t mac_algo_l;
+    uint32_t mac_algo_h;
+    uint32_t aead_algo;
+
+    /* Maximum length of cipher key */
+    uint32_t max_cipher_key_len;
+    /* Maximum length of authenticated key */
+    uint32_t max_auth_key_len;
+    /* Maximum size of each crypto request's content */
+    uint64_t max_size;
+} VirtIOCryptoConf;
+
+struct VirtIOCrypto;
+
+typedef struct VirtIOCryptoReq {
+    VirtQueueElement elem;
+    /* flags of operation, such as type of algorithm */
+    uint32_t flags;
+    struct virtio_crypto_inhdr *in;
+    struct iovec *in_iov; /* Head address of dest iovec */
+    unsigned int in_num; /* Number of dest iovec */
+    size_t in_len;
+    VirtQueue *vq;
+    struct VirtIOCrypto *vcrypto;
+    union {
+        CryptoDevBackendSymOpInfo *sym_op_info;
+    } u;
+} VirtIOCryptoReq;
+
+typedef struct VirtIOCryptoQueue {
+    VirtQueue *dataq;
+    QEMUBH *dataq_bh;
+    struct VirtIOCrypto *vcrypto;
+} VirtIOCryptoQueue;
+
+typedef struct VirtIOCrypto {
+    VirtIODevice parent_obj;
+
+    VirtQueue *ctrl_vq;
+    VirtIOCryptoQueue *vqs;
+    VirtIOCryptoConf conf;
+    CryptoDevBackend *cryptodev;
+
+    uint32_t max_queues;
+    uint32_t status;
+
+    int multiqueue;
+    uint32_t curr_queues;
+    size_t config_size;
+} VirtIOCrypto;
+
+#endif /* _QEMU_VIRTIO_CRYPTO_H */
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index 91ed97cfcd..0ced975c57 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -35,6 +35,7 @@ typedef struct virtio_net_conf
     uint32_t txtimer;
     int32_t txburst;
     char *tx;
+    uint16_t rx_queue_size;
 } virtio_net_conf;
 
 /* Maximum packet size we can receive from tap device: header + 64k */
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index a1e0cfb449..73751969ba 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -134,9 +134,8 @@ void virtio_scsi_free_req(VirtIOSCSIReq *req);
 void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev,
                             uint32_t event, uint32_t reason);
 
-void virtio_scsi_set_iothread(VirtIOSCSI *s, IOThread *iothread);
-void virtio_scsi_dataplane_start(VirtIOSCSI *s);
-void virtio_scsi_dataplane_stop(VirtIOSCSI *s);
-void virtio_scsi_dataplane_notify(VirtIODevice *vdev, VirtIOSCSIReq *req);
+void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
+int virtio_scsi_dataplane_start(VirtIODevice *s);
+void virtio_scsi_dataplane_stop(VirtIODevice *s);
 
 #endif /* QEMU_VIRTIO_SCSI_H */
diff --git a/include/hw/virtio/virtio-serial.h b/include/hw/virtio/virtio-serial.h
index 730c88d2a7..b19c44727f 100644
--- a/include/hw/virtio/virtio-serial.h
+++ b/include/hw/virtio/virtio-serial.h
@@ -184,6 +184,8 @@ struct VirtIOSerial {
     struct VirtIOSerialPostLoad *post_load;
 
     virtio_serial_conf serial;
+
+    uint64_t host_features;
 };
 
 /* Interface to the virtio-serial bus */
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index d2490c1975..ab0e030cc4 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -87,6 +87,7 @@ struct VirtIODevice
     VirtQueue *vq;
     uint16_t device_id;
     bool vm_running;
+    bool broken; /* device in invalid state, needs reset */
     VMChangeStateEntry *vmstate;
     char *bus_name;
     uint8_t device_endian;
@@ -112,6 +113,11 @@ typedef struct VirtioDeviceClass {
     void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
     void (*reset)(VirtIODevice *vdev);
     void (*set_status)(VirtIODevice *vdev, uint8_t val);
+    /* For transitional devices, this is a bitmap of features
+     * that are only exposed on the legacy interface but not
+     * the modern one.
+     */
+    uint64_t legacy_features;
     /* Test and clear event pending status.
      * Should be called after unmask to avoid losing events.
      * If backend does not support masking,
@@ -124,8 +130,14 @@ typedef struct VirtioDeviceClass {
      * must mask in frontend instead.
      */
     void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
+    int (*start_ioeventfd)(VirtIODevice *vdev);
+    void (*stop_ioeventfd)(VirtIODevice *vdev);
+    /* Saving and loading of a device; trying to deprecate save/load
+     * use vmsd for new devices.
+     */
     void (*save)(VirtIODevice *vdev, QEMUFile *f);
     int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
+    const VMStateDescription *vmsd;
 } VirtioDeviceClass;
 
 void virtio_instance_init_common(Object *proxy_obj, void *data,
@@ -135,6 +147,8 @@ void virtio_init(VirtIODevice *vdev, const char *name,
                          uint16_t device_id, size_t config_size);
 void virtio_cleanup(VirtIODevice *vdev);
 
+void virtio_error(VirtIODevice *vdev, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+
 /* Set the child bus name. */
 void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name);
 
@@ -143,17 +157,16 @@ typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *);
 VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                             VirtIOHandleOutput handle_output);
 
-VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
-                                VirtIOHandleOutput handle_output);
-
 void virtio_del_queue(VirtIODevice *vdev, int n);
 
-void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num);
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len);
 void virtqueue_flush(VirtQueue *vq, unsigned int count);
-void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
-                       unsigned int len);
+void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
+                              unsigned int len);
+void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
+                     unsigned int len);
+bool virtqueue_rewind(VirtQueue *vq, unsigned int num);
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
 
@@ -168,28 +181,18 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
                                unsigned max_in_bytes, unsigned max_out_bytes);
 
 bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq);
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
 void virtio_save(VirtIODevice *vdev, QEMUFile *f);
-void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size);
-
-#define VMSTATE_VIRTIO_DEVICE(devname, v, getf, putf) \
-    static const VMStateDescription vmstate_virtio_ ## devname = { \
-        .name = "virtio-" #devname ,          \
-        .minimum_version_id = v,              \
-        .version_id = v,                      \
-        .fields = (VMStateField[]) {          \
-            {                                 \
-                .name = "virtio",             \
-                .info = &(const VMStateInfo) {\
-                        .name = "virtio",     \
-                        .get = getf,          \
-                        .put = putf,          \
-                    },                        \
-                .flags = VMS_SINGLE,          \
-            },                                \
-            VMSTATE_END_OF_LIST()             \
-        }                                     \
+
+extern const VMStateInfo virtio_vmstate_info;
+
+#define VMSTATE_VIRTIO_DEVICE \
+    {                                         \
+        .name = "virtio",                     \
+        .info = &virtio_vmstate_info,         \
+        .flags = VMS_SINGLE,                  \
     }
 
 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
@@ -257,11 +260,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n);
-hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
-hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n);
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
@@ -270,13 +271,16 @@ uint16_t virtio_get_queue_index(VirtQueue *vq);
 EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
 void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                 bool with_irqfd);
+int virtio_device_start_ioeventfd(VirtIODevice *vdev);
+void virtio_device_stop_ioeventfd(VirtIODevice *vdev);
+int virtio_device_grab_ioeventfd(VirtIODevice *vdev);
+void virtio_device_release_ioeventfd(VirtIODevice *vdev);
+bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev);
 EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
-void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
-                                               bool set_handler);
+void virtio_queue_host_notifier_read(EventNotifier *n);
 void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
                                                 void (*fn)(VirtIODevice *,
                                                            VirtQueue *));
-void virtio_irq(VirtQueue *vq);
 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
 VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
 
diff --git a/include/hw/xen/xen_backend.h b/include/hw/xen/xen_backend.h
index 0df282ab5f..4f4799a610 100644
--- a/include/hw/xen/xen_backend.h
+++ b/include/hw/xen/xen_backend.h
@@ -2,59 +2,16 @@
 #define QEMU_HW_XEN_BACKEND_H
 
 #include "hw/xen/xen_common.h"
+#include "hw/xen/xen_pvdev.h"
 #include "sysemu/sysemu.h"
 #include "net/net.h"
 
-/* ------------------------------------------------------------- */
+#define TYPE_XENSYSDEV "xen-sysdev"
+#define TYPE_XENSYSBUS "xen-sysbus"
+#define TYPE_XENBACKEND "xen-backend"
 
-#define XEN_BUFSIZE 1024
-
-struct XenDevice;
-
-/* driver uses grant tables  ->  open gntdev device (xendev->gnttabdev) */
-#define DEVOPS_FLAG_NEED_GNTDEV   1
-/* don't expect frontend doing correct state transitions (aka console quirk) */
-#define DEVOPS_FLAG_IGNORE_STATE  2
-
-struct XenDevOps {
-    size_t    size;
-    uint32_t  flags;
-    void      (*alloc)(struct XenDevice *xendev);
-    int       (*init)(struct XenDevice *xendev);
-    int       (*initialise)(struct XenDevice *xendev);
-    void      (*connected)(struct XenDevice *xendev);
-    void      (*event)(struct XenDevice *xendev);
-    void      (*disconnect)(struct XenDevice *xendev);
-    int       (*free)(struct XenDevice *xendev);
-    void      (*backend_changed)(struct XenDevice *xendev, const char *node);
-    void      (*frontend_changed)(struct XenDevice *xendev, const char *node);
-    int       (*backend_register)(void);
-};
-
-struct XenDevice {
-    const char         *type;
-    int                dom;
-    int                dev;
-    char               name[64];
-    int                debug;
-
-    enum xenbus_state  be_state;
-    enum xenbus_state  fe_state;
-    int                online;
-    char               be[XEN_BUFSIZE];
-    char               *fe;
-    char               *protocol;
-    int                remote_port;
-    int                local_port;
-
-    xenevtchn_handle   *evtchndev;
-    xengnttab_handle   *gnttabdev;
-
-    struct XenDevOps   *ops;
-    QTAILQ_ENTRY(XenDevice) next;
-};
-
-/* ------------------------------------------------------------- */
+#define XENBACKEND_DEVICE(obj) \
+    OBJECT_CHECK(XenDevice, (obj), TYPE_XENBACKEND)
 
 /* variables */
 extern xc_interface *xen_xc;
@@ -62,27 +19,22 @@ extern xenforeignmemory_handle *xen_fmem;
 extern struct xs_handle *xenstore;
 extern const char *xen_protocol;
 extern DeviceState *xen_sysdev;
+extern BusState *xen_sysbus;
 
-/* xenstore helper functions */
 int xenstore_mkdir(char *path, int p);
-int xenstore_write_str(const char *base, const char *node, const char *val);
-int xenstore_write_int(const char *base, const char *node, int ival);
-int xenstore_write_int64(const char *base, const char *node, int64_t ival);
-char *xenstore_read_str(const char *base, const char *node);
-int xenstore_read_int(const char *base, const char *node, int *ival);
-
 int xenstore_write_be_str(struct XenDevice *xendev, const char *node, const char *val);
 int xenstore_write_be_int(struct XenDevice *xendev, const char *node, int ival);
 int xenstore_write_be_int64(struct XenDevice *xendev, const char *node, int64_t ival);
 char *xenstore_read_be_str(struct XenDevice *xendev, const char *node);
 int xenstore_read_be_int(struct XenDevice *xendev, const char *node, int *ival);
+void xenstore_update_fe(char *watch, struct XenDevice *xendev);
+void xenstore_update_be(char *watch, char *type, int dom,
+                        struct XenDevOps *ops);
 char *xenstore_read_fe_str(struct XenDevice *xendev, const char *node);
 int xenstore_read_fe_int(struct XenDevice *xendev, const char *node, int *ival);
-int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
-int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node, uint64_t *uval);
+int xenstore_read_fe_uint64(struct XenDevice *xendev, const char *node,
+                            uint64_t *uval);
 
-const char *xenbus_strstate(enum xenbus_state state);
-struct XenDevice *xen_be_find_xendev(const char *type, int dom, int dev);
 void xen_be_check_state(struct XenDevice *xendev);
 
 /* xen backend driver bits */
@@ -91,10 +43,6 @@ void xen_be_register_common(void);
 int xen_be_register(const char *type, struct XenDevOps *ops);
 int xen_be_set_state(struct XenDevice *xendev, enum xenbus_state state);
 int xen_be_bind_evtchn(struct XenDevice *xendev);
-void xen_be_unbind_evtchn(struct XenDevice *xendev);
-int xen_be_send_notify(struct XenDevice *xendev);
-void xen_be_printf(struct XenDevice *xendev, int msg_level, const char *fmt, ...)
-    GCC_FMT_ATTR(3, 4);
 
 /* actual backend drivers */
 extern struct XenDevOps xen_console_ops;      /* xen_console.c     */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index bd39287b8f..8e1580d526 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -424,4 +424,18 @@ static inline int xen_domain_create(xc_interface *xc, uint32_t ssidref,
 #endif
 #endif
 
+/* Xen before 4.8 */
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 480
+
+
+typedef void *xengnttab_grant_copy_segment_t;
+
+static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
+                                       xengnttab_grant_copy_segment_t *segs)
+{
+    return -ENOSYS;
+}
+#endif
+
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
new file mode 100644
index 0000000000..d473e9b34d
--- /dev/null
+++ b/include/hw/xen/xen_pvdev.h
@@ -0,0 +1,79 @@
+#ifndef QEMU_HW_XEN_PVDEV_H
+#define QEMU_HW_XEN_PVDEV_H
+
+#include "hw/xen/xen_common.h"
+/* ------------------------------------------------------------- */
+
+#define XEN_BUFSIZE 1024
+
+struct XenDevice;
+
+/* driver uses grant tables  ->  open gntdev device (xendev->gnttabdev) */
+#define DEVOPS_FLAG_NEED_GNTDEV   1
+/* don't expect frontend doing correct state transitions (aka console quirk) */
+#define DEVOPS_FLAG_IGNORE_STATE  2
+
+struct XenDevOps {
+    size_t    size;
+    uint32_t  flags;
+    void      (*alloc)(struct XenDevice *xendev);
+    int       (*init)(struct XenDevice *xendev);
+    int       (*initialise)(struct XenDevice *xendev);
+    void      (*connected)(struct XenDevice *xendev);
+    void      (*event)(struct XenDevice *xendev);
+    void      (*disconnect)(struct XenDevice *xendev);
+    int       (*free)(struct XenDevice *xendev);
+    void      (*backend_changed)(struct XenDevice *xendev, const char *node);
+    void      (*frontend_changed)(struct XenDevice *xendev, const char *node);
+    int       (*backend_register)(void);
+};
+
+struct XenDevice {
+    DeviceState        qdev;
+    const char         *type;
+    int                dom;
+    int                dev;
+    char               name[64];
+    int                debug;
+
+    enum xenbus_state  be_state;
+    enum xenbus_state  fe_state;
+    int                online;
+    char               be[XEN_BUFSIZE];
+    char               *fe;
+    char               *protocol;
+    int                remote_port;
+    int                local_port;
+
+    xenevtchn_handle   *evtchndev;
+    xengnttab_handle   *gnttabdev;
+
+    struct XenDevOps   *ops;
+    QTAILQ_ENTRY(XenDevice) next;
+};
+
+/* ------------------------------------------------------------- */
+
+/* xenstore helper functions */
+int xenstore_write_str(const char *base, const char *node, const char *val);
+int xenstore_write_int(const char *base, const char *node, int ival);
+int xenstore_write_int64(const char *base, const char *node, int64_t ival);
+char *xenstore_read_str(const char *base, const char *node);
+int xenstore_read_int(const char *base, const char *node, int *ival);
+int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
+void xenstore_update(void *unused);
+
+const char *xenbus_strstate(enum xenbus_state state);
+
+void xen_pv_evtchn_event(void *opaque);
+void xen_pv_insert_xendev(struct XenDevice *xendev);
+void xen_pv_del_xendev(struct XenDevice *xendev);
+struct XenDevice *xen_pv_find_xendev(const char *type, int dom, int dev);
+
+void xen_pv_unbind_evtchn(struct XenDevice *xendev);
+int xen_pv_send_notify(struct XenDevice *xendev);
+
+void xen_pv_printf(struct XenDevice *xendev, int msg_level,
+                   const char *fmt, ...)  GCC_FMT_ATTR(3, 4);
+
+#endif /* QEMU_HW_XEN_PVDEV_H */
diff --git a/include/io/channel.h b/include/io/channel.h
index 752e89f4dc..32a9470794 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -40,9 +40,9 @@ typedef struct QIOChannelClass QIOChannelClass;
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
-    QIO_CHANNEL_FEATURE_FD_PASS  = (1 << 0),
-    QIO_CHANNEL_FEATURE_SHUTDOWN = (1 << 1),
-    QIO_CHANNEL_FEATURE_LISTEN   = (1 << 2),
+    QIO_CHANNEL_FEATURE_FD_PASS,
+    QIO_CHANNEL_FEATURE_SHUTDOWN,
+    QIO_CHANNEL_FEATURE_LISTEN,
 };
 
 
@@ -79,6 +79,7 @@ typedef gboolean (*QIOChannelFunc)(QIOChannel *ioc,
 struct QIOChannel {
     Object parent;
     unsigned int features; /* bitmask of QIOChannelFeatures */
+    char *name;
 #ifdef _WIN32
     HANDLE event; /* For use with GSource on Win32 */
 #endif
@@ -148,6 +149,28 @@ struct QIOChannelClass {
 bool qio_channel_has_feature(QIOChannel *ioc,
                              QIOChannelFeature feature);
 
+/**
+ * qio_channel_set_feature:
+ * @ioc: the channel object
+ * @feature: the feature to set support for
+ *
+ * Add channel support for the feature named in @feature.
+ */
+void qio_channel_set_feature(QIOChannel *ioc,
+                             QIOChannelFeature feature);
+
+/**
+ * qio_channel_set_name:
+ * @ioc: the channel object
+ * @name: the name of the channel
+ *
+ * Sets the name of the channel, which serves as an aid
+ * to debugging. The name is used when creating GSource
+ * watches for this channel.
+ */
+void qio_channel_set_name(QIOChannel *ioc,
+                          const char *name);
+
 /**
  * qio_channel_readv_full:
  * @ioc: the channel object
diff --git a/include/migration/colo.h b/include/migration/colo.h
new file mode 100644
index 0000000000..e32eef4763
--- /dev/null
+++ b/include/migration/colo.h
@@ -0,0 +1,38 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_H
+#define QEMU_COLO_H
+
+#include "qemu-common.h"
+#include "migration/migration.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+
+bool colo_supported(void);
+void colo_info_init(void);
+
+void migrate_start_colo_process(MigrationState *s);
+bool migration_in_colo_state(void);
+
+/* loadvm */
+bool migration_incoming_enable_colo(void);
+void migration_incoming_exit_colo(void);
+void *colo_process_incoming_thread(void *opaque);
+bool migration_incoming_in_colo_state(void);
+
+COLOMode get_colo_mode(void);
+
+/* failover */
+void colo_do_failover(MigrationState *s);
+#endif
diff --git a/include/migration/cpu.h b/include/migration/cpu.h
index f3abbab650..f3d5dfcf61 100644
--- a/include/migration/cpu.h
+++ b/include/migration/cpu.h
@@ -18,6 +18,8 @@
     VMSTATE_UINT64_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)
+#define VMSTATE_UINTTL_TEST(_f, _s, _t)                               \
+    VMSTATE_UINT64_TEST(_f, _s, _t)
 #define vmstate_info_uinttl vmstate_info_uint64
 #else
 #define qemu_put_betl qemu_put_be32
@@ -35,6 +37,8 @@
     VMSTATE_UINT32_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)
+#define VMSTATE_UINTTL_TEST(_f, _s, _t)                               \
+    VMSTATE_UINT32_TEST(_f, _s, _t)
 #define vmstate_info_uinttl vmstate_info_uint32
 #endif
 
diff --git a/include/migration/failover.h b/include/migration/failover.h
new file mode 100644
index 0000000000..ad91ef2381
--- /dev/null
+++ b/include/migration/failover.h
@@ -0,0 +1,26 @@
+/*
+ *  COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ *  (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_FAILOVER_H
+#define QEMU_FAILOVER_H
+
+#include "qemu-common.h"
+#include "qapi-types.h"
+
+void failover_init_state(void);
+FailoverStatus failover_set_state(FailoverStatus old_state,
+                                     FailoverStatus new_state);
+FailoverStatus failover_get_state(void);
+void failover_request_active(Error **errp);
+bool failover_request_is_active(void);
+
+#endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 3c96623d3d..c309d23370 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -21,6 +21,7 @@
 #include "migration/vmstate.h"
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
+#include "qemu/coroutine_int.h"
 
 #define QEMU_VM_FILE_MAGIC           0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x00000002
@@ -107,6 +108,12 @@ struct MigrationIncomingState {
     QEMUBH *bh;
 
     int state;
+
+    bool have_colo_incoming_thread;
+    QemuThread colo_incoming_thread;
+    /* The coroutine we should enter (back) after failover */
+    Coroutine *migration_incoming_co;
+
     /* See savevm.c */
     LoadStateEntry_Head loadvm_handlers;
 };
@@ -129,7 +136,6 @@ struct MigrationSrcPageRequest {
 
 struct MigrationState
 {
-    int64_t bandwidth_limit;
     size_t bytes_xfer;
     size_t xfer_limit;
     QemuThread thread;
@@ -229,8 +235,6 @@ void migrate_fd_error(MigrationState *s, const Error *error);
 
 void migrate_fd_connect(MigrationState *s);
 
-int migrate_fd_close(MigrationState *s);
-
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
 MigrationState *migrate_init(const MigrationParams *params);
@@ -301,6 +305,7 @@ int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
 
 int migrate_use_xbzrle(void);
 int64_t migrate_xbzrle_cache_size(void);
+bool migrate_colo_enabled(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
 
diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index a714d8ef80..8cc532ec0e 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -9,7 +9,7 @@
 extern Monitor *cur_mon;
 
 /* flags for monitor_init */
-#define MONITOR_IS_DEFAULT    0x01
+/* 0x01 unused */
 #define MONITOR_USE_READLINE  0x02
 #define MONITOR_USE_CONTROL   0x04
 #define MONITOR_USE_PRETTY    0x08
diff --git a/include/monitor/qdev.h b/include/monitor/qdev.h
index 8e504bc66b..0ff3331284 100644
--- a/include/monitor/qdev.h
+++ b/include/monitor/qdev.h
@@ -12,5 +12,6 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp);
 
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts, Error **errp);
+void qdev_set_id(DeviceState *dev, const char *id);
 
 #endif
diff --git a/include/net/net.h b/include/net/net.h
index e8d9e9e4e9..99b28d5b38 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -138,8 +138,6 @@ NetClientState *qemu_get_queue(NICState *nic);
 NICState *qemu_get_nic(NetClientState *nc);
 void *qemu_get_nic_opaque(NetClientState *nc);
 void qemu_del_net_client(NetClientState *nc);
-NetClientState *qemu_find_vlan_client_by_name(Monitor *mon, int vlan_id,
-                                              const char *client_str);
 typedef void (*qemu_nic_foreach)(NICState *nic, void *opaque);
 void qemu_foreach_nic(qemu_nic_foreach func, void *opaque);
 int qemu_can_send_packet(NetClientState *nc);
diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 48c11b66d1..57651ea955 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -36,6 +36,7 @@ typedef struct QmpCommand
 
 void qmp_register_command(const char *name, QmpCommandFunc *fn,
                           QmpCommandOptions options);
+void qmp_unregister_command(const char *name);
 QmpCommand *qmp_find_command(const char *name);
 QObject *qmp_dispatch(QObject *request);
 void qmp_disable_command(const char *name);
diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h
index 71b8eb0416..fe9a4c5c60 100644
--- a/include/qapi/qmp/qdict.h
+++ b/include/qapi/qmp/qdict.h
@@ -73,6 +73,7 @@ void qdict_flatten(QDict *qdict);
 void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start);
 void qdict_array_split(QDict *src, QList **dst);
 int qdict_array_entries(QDict *src, const char *subqdict);
+QObject *qdict_crumple(const QDict *src, Error **errp);
 
 void qdict_join(QDict *dest, QDict *src, bool overwrite);
 
diff --git a/include/qapi/qmp-input-visitor.h b/include/qapi/qobject-input-visitor.h
similarity index 63%
rename from include/qapi/qmp-input-visitor.h
rename to include/qapi/qobject-input-visitor.h
index f3ff5f3e98..cde328da9f 100644
--- a/include/qapi/qmp-input-visitor.h
+++ b/include/qapi/qobject-input-visitor.h
@@ -11,20 +11,20 @@
  *
  */
 
-#ifndef QMP_INPUT_VISITOR_H
-#define QMP_INPUT_VISITOR_H
+#ifndef QOBJECT_INPUT_VISITOR_H
+#define QOBJECT_INPUT_VISITOR_H
 
 #include "qapi/visitor.h"
 #include "qapi/qmp/qobject.h"
 
-typedef struct QmpInputVisitor QmpInputVisitor;
+typedef struct QObjectInputVisitor QObjectInputVisitor;
 
 /*
- * Return a new input visitor that converts QMP to QAPI.
+ * Return a new input visitor that converts a QObject to a QAPI object.
  *
  * Set @strict to reject a parse that doesn't consume all keys of a
  * dictionary; otherwise excess input is ignored.
  */
-Visitor *qmp_input_visitor_new(QObject *obj, bool strict);
+Visitor *qobject_input_visitor_new(QObject *obj, bool strict);
 
 #endif
diff --git a/include/qapi/qmp-output-visitor.h b/include/qapi/qobject-output-visitor.h
similarity index 66%
rename from include/qapi/qmp-output-visitor.h
rename to include/qapi/qobject-output-visitor.h
index 040fdda142..8241877bd7 100644
--- a/include/qapi/qmp-output-visitor.h
+++ b/include/qapi/qobject-output-visitor.h
@@ -11,20 +11,20 @@
  *
  */
 
-#ifndef QMP_OUTPUT_VISITOR_H
-#define QMP_OUTPUT_VISITOR_H
+#ifndef QOBJECT_OUTPUT_VISITOR_H
+#define QOBJECT_OUTPUT_VISITOR_H
 
 #include "qapi/visitor.h"
 #include "qapi/qmp/qobject.h"
 
-typedef struct QmpOutputVisitor QmpOutputVisitor;
+typedef struct QObjectOutputVisitor QObjectOutputVisitor;
 
 /*
- * Create a new QMP output visitor.
+ * Create a new QObject output visitor.
  *
  * If everything else succeeds, pass @result to visit_complete() to
  * collect the result of the visit.
  */
-Visitor *qmp_output_visitor_new(QObject **result);
+Visitor *qobject_output_visitor_new(QObject **result);
 
 #endif
diff --git a/include/qapi/visitor.h b/include/qapi/visitor.h
index 6c77a913db..9bb6cba237 100644
--- a/include/qapi/visitor.h
+++ b/include/qapi/visitor.h
@@ -25,14 +25,14 @@
  * for doing work at each node of a QAPI graph; it can also be used
  * for a virtual walk, where there is no actual QAPI C struct.
  *
- * There are four kinds of visitor classes: input visitors (QMP,
+ * There are four kinds of visitor classes: input visitors (QObject,
  * string, and QemuOpts) parse an external representation and build
- * the corresponding QAPI graph, output visitors (QMP and string) take
+ * the corresponding QAPI graph, output visitors (QObject and string) take
  * a completed QAPI graph and generate an external representation, the
  * dealloc visitor can take a QAPI graph (possibly partially
  * constructed) and recursively free its resources, and the clone
  * visitor performs a deep clone of one QAPI object to another.  While
- * the dealloc and QMP input/output visitors are general, the string,
+ * the dealloc and QObject input/output visitors are general, the string,
  * QemuOpts, and clone visitors have some implementation limitations;
  * see the documentation for each visitor for more details on what it
  * supports.  Also, see visitor-impl.h for the callback contracts
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 9a908537ab..f1dc51c580 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -80,6 +80,19 @@ void tcg_exec_init(unsigned long tb_size);
 bool tcg_enabled(void);
 
 void cpu_exec_init_all(void);
+void cpu_exec_step_atomic(CPUState *cpu);
+
+/**
+ * set_preferred_target_page_bits:
+ * @bits: number of bits needed to represent an address within the page
+ *
+ * Set the preferred target page size (the actual target page
+ * size may be smaller than any given CPU's preference).
+ * Returns true on success, false on failure (which can only happen
+ * if this is called after the system has already finalized its
+ * choice of page size and the requested page size is smaller than that).
+ */
+bool set_preferred_target_page_bits(int bits);
 
 /**
  * Sends a (part of) iovec down a socket, yielding when the socket is full, or
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 43b06458f1..878fa0700d 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -72,17 +72,17 @@
  * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends().
  */
 
-#define smp_mb()    ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); barrier(); })
-#define smp_wmb()   ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); })
-#define smp_rmb()   ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); })
+#define smp_mb()                     ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); })
+#define smp_mb_release()             ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); })
+#define smp_mb_acquire()             ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); })
 
 /* Most compilers currently treat consume and acquire the same, but really
  * no processors except Alpha need a barrier here.  Leave it in if
  * using Thread Sanitizer to avoid warnings, otherwise optimize it away.
  */
 #if defined(__SANITIZE_THREAD__)
-#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); })
-#elsif defined(__alpha__)
+#define smp_read_barrier_depends()   ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); })
+#elif defined(__alpha__)
 #define smp_read_barrier_depends()   asm volatile("mb":::"memory")
 #else
 #define smp_read_barrier_depends()   barrier()
@@ -92,19 +92,28 @@
 /* Weak atomic operations prevent the compiler moving other
  * loads/stores past the atomic operation load/store. However there is
  * no explicit memory barrier for the processor.
+ *
+ * The C11 memory model says that variables that are accessed from
+ * different threads should at least be done with __ATOMIC_RELAXED
+ * primitives or the result is undefined. Generally this has little to
+ * no effect on the generated code but not using the atomic primitives
+ * will get flagged by sanitizers as a violation.
  */
+#define atomic_read__nocheck(ptr) \
+    __atomic_load_n(ptr, __ATOMIC_RELAXED)
+
 #define atomic_read(ptr)                              \
     ({                                                \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
-    typeof_strip_qual(*ptr) _val;                     \
-     __atomic_load(ptr, &_val, __ATOMIC_RELAXED);     \
-    _val;                                             \
+    atomic_read__nocheck(ptr);                        \
     })
 
+#define atomic_set__nocheck(ptr, i) \
+    __atomic_store_n(ptr, i, __ATOMIC_RELAXED)
+
 #define atomic_set(ptr, i)  do {                      \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
-    typeof(*ptr) _val = (i);                          \
-    __atomic_store(ptr, &_val, __ATOMIC_RELAXED);     \
+    atomic_set__nocheck(ptr, i);                      \
 } while(0)
 
 /* See above: most compilers currently treat consume and acquire the
@@ -129,70 +138,46 @@
 
 #define atomic_rcu_set(ptr, i) do {                   \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \
-    typeof(*ptr) _val = (i);                          \
-    __atomic_store(ptr, &_val, __ATOMIC_RELEASE);     \
+    __atomic_store_n(ptr, i, __ATOMIC_RELEASE);       \
 } while(0)
 
-/* atomic_mb_read/set semantics map Java volatile variables. They are
- * less expensive on some platforms (notably POWER & ARMv7) than fully
- * sequentially consistent operations.
- *
- * As long as they are used as paired operations they are safe to
- * use. See docs/atomic.txt for more discussion.
- */
-
-#if defined(_ARCH_PPC)
-#define atomic_mb_read(ptr)                             \
-    ({                                                  \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
-    typeof_strip_qual(*ptr) _val;                       \
-     __atomic_load(ptr, &_val, __ATOMIC_RELAXED);       \
-     smp_rmb();                                         \
-    _val;                                               \
-    })
-
-#define atomic_mb_set(ptr, i)  do {                     \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
-    typeof(*ptr) _val = (i);                            \
-    smp_wmb();                                          \
-    __atomic_store(ptr, &_val, __ATOMIC_RELAXED);       \
-    smp_mb();                                           \
-} while(0)
-#else
-#define atomic_mb_read(ptr)                             \
+#define atomic_load_acquire(ptr)                        \
     ({                                                  \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
     typeof_strip_qual(*ptr) _val;                       \
-    __atomic_load(ptr, &_val, __ATOMIC_SEQ_CST);        \
+    __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE);        \
     _val;                                               \
     })
 
-#define atomic_mb_set(ptr, i)  do {                     \
+#define atomic_store_release(ptr, i)  do {              \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));   \
-    typeof(*ptr) _val = (i);                            \
-    __atomic_store(ptr, &_val, __ATOMIC_SEQ_CST);       \
+    __atomic_store_n(ptr, i, __ATOMIC_RELEASE);         \
 } while(0)
-#endif
 
 
 /* All the remaining operations are fully sequentially consistent */
 
+#define atomic_xchg__nocheck(ptr, i)    ({                  \
+    __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST);        \
+})
+
 #define atomic_xchg(ptr, i)    ({                           \
     QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));       \
-    typeof_strip_qual(*ptr) _new = (i), _old;               \
-    __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
-    _old;                                                   \
+    atomic_xchg__nocheck(ptr, i);                           \
 })
 
 /* Returns the eventual value, failed or not */
-#define atomic_cmpxchg(ptr, old, new)                                   \
-    ({                                                                  \
-    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
-    typeof_strip_qual(*ptr) _old = (old), _new = (new);                 \
-    __atomic_compare_exchange(ptr, &_old, &_new, false,                 \
+#define atomic_cmpxchg__nocheck(ptr, old, new)    ({                    \
+    typeof_strip_qual(*ptr) _old = (old);                               \
+    __atomic_compare_exchange_n(ptr, &_old, new, false,                 \
                               __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);      \
     _old;                                                               \
-    })
+})
+
+#define atomic_cmpxchg(ptr, old, new)    ({                             \
+    QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *));                   \
+    atomic_cmpxchg__nocheck(ptr, old, new);                             \
+})
 
 /* Provide shorter names for GCC atomic builtins, return old value */
 #define atomic_fetch_inc(ptr)  __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)
@@ -201,6 +186,15 @@
 #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)
 #define atomic_fetch_or(ptr, n)  __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)
+
+#define atomic_inc_fetch(ptr)    __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST)
+#define atomic_dec_fetch(ptr)    __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST)
+#define atomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_or_fetch(ptr, n)  __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST)
+#define atomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST)
 
 /* And even shorter names that return void.  */
 #define atomic_inc(ptr)    ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST))
@@ -209,6 +203,7 @@
 #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST))
 #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST))
 #define atomic_or(ptr, n)  ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST))
+#define atomic_xor(ptr, n) ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST))
 
 #else /* __ATOMIC_RELAXED */
 
@@ -240,8 +235,8 @@
  * here (a compiler barrier only).  QEMU doesn't do accesses to write-combining
  * qemu memory or non-temporal load/stores from C code.
  */
-#define smp_wmb()   barrier()
-#define smp_rmb()   barrier()
+#define smp_mb_release()   barrier()
+#define smp_mb_acquire()   barrier()
 
 /*
  * __sync_lock_test_and_set() is documented to be an acquire barrier only,
@@ -250,11 +245,6 @@
  */
 #define atomic_xchg(ptr, i)    (barrier(), __sync_lock_test_and_set(ptr, i))
 
-/*
- * Load/store with Java volatile semantics.
- */
-#define atomic_mb_set(ptr, i)  ((void)atomic_xchg(ptr, i))
-
 #elif defined(_ARCH_PPC)
 
 /*
@@ -265,13 +255,15 @@
  * smp_mb has the same problem as on x86 for not-very-new GCC
  * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
  */
-#define smp_wmb()   ({ asm volatile("eieio" ::: "memory"); (void)0; })
+#define smp_wmb()          ({ asm volatile("eieio" ::: "memory"); (void)0; })
 #if defined(__powerpc64__)
-#define smp_rmb()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
+#define smp_mb_release()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
+#define smp_mb_acquire()   ({ asm volatile("lwsync" ::: "memory"); (void)0; })
 #else
-#define smp_rmb()   ({ asm volatile("sync" ::: "memory"); (void)0; })
+#define smp_mb_release()   ({ asm volatile("sync" ::: "memory"); (void)0; })
+#define smp_mb_acquire()   ({ asm volatile("sync" ::: "memory"); (void)0; })
 #endif
-#define smp_mb()    ({ asm volatile("sync" ::: "memory"); (void)0; })
+#define smp_mb()           ({ asm volatile("sync" ::: "memory"); (void)0; })
 
 #endif /* _ARCH_PPC */
 
@@ -279,18 +271,18 @@
  * For (host) platforms we don't have explicit barrier definitions
  * for, we use the gcc __sync_synchronize() primitive to generate a
  * full barrier.  This should be safe on all platforms, though it may
- * be overkill for smp_wmb() and smp_rmb().
+ * be overkill for smp_mb_acquire() and smp_mb_release().
  */
 #ifndef smp_mb
-#define smp_mb()    __sync_synchronize()
+#define smp_mb()           __sync_synchronize()
 #endif
 
-#ifndef smp_wmb
-#define smp_wmb()   __sync_synchronize()
+#ifndef smp_mb_acquire
+#define smp_mb_acquire()   __sync_synchronize()
 #endif
 
-#ifndef smp_rmb
-#define smp_rmb()   __sync_synchronize()
+#ifndef smp_mb_release
+#define smp_mb_release()   __sync_synchronize()
 #endif
 
 #ifndef smp_read_barrier_depends
@@ -300,8 +292,11 @@
 /* These will only be atomic if the processor does the fetch or store
  * in a single issue memory operation
  */
-#define atomic_read(ptr)       (*(__typeof__(*ptr) volatile*) (ptr))
-#define atomic_set(ptr, i)     ((*(__typeof__(*ptr) volatile*) (ptr)) = (i))
+#define atomic_read__nocheck(p)   (*(__typeof__(*(p)) volatile*) (p))
+#define atomic_set__nocheck(p, i) ((*(__typeof__(*(p)) volatile*) (p)) = (i))
+
+#define atomic_read(ptr)       atomic_read__nocheck(ptr)
+#define atomic_set(ptr, i)     atomic_set__nocheck(ptr,i)
 
 /**
  * atomic_rcu_read - reads a RCU-protected pointer to a local variable
@@ -343,41 +338,16 @@
     atomic_set(ptr, i);                           \
 } while (0)
 
-/* These have the same semantics as Java volatile variables.
- * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html:
- * "1. Issue a StoreStore barrier (wmb) before each volatile store."
- *  2. Issue a StoreLoad barrier after each volatile store.
- *     Note that you could instead issue one before each volatile load, but
- *     this would be slower for typical programs using volatiles in which
- *     reads greatly outnumber writes. Alternatively, if available, you
- *     can implement volatile store as an atomic instruction (for example
- *     XCHG on x86) and omit the barrier. This may be more efficient if
- *     atomic instructions are cheaper than StoreLoad barriers.
- *  3. Issue LoadLoad and LoadStore barriers after each volatile load."
- *
- * If you prefer to think in terms of "pairing" of memory barriers,
- * an atomic_mb_read pairs with an atomic_mb_set.
- *
- * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq,
- * while an atomic_mb_set is a st.rel followed by a memory barrier.
- *
- * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST
- * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough.
- * Just always use the barriers manually by the rules above.
- */
-#define atomic_mb_read(ptr)    ({           \
+#define atomic_load_acquire(ptr)    ({      \
     typeof(*ptr) _val = atomic_read(ptr);   \
-    smp_rmb();                              \
+    smp_mb_acquire();                       \
     _val;                                   \
 })
 
-#ifndef atomic_mb_set
-#define atomic_mb_set(ptr, i)  do {         \
-    smp_wmb();                              \
+#define atomic_store_release(ptr, i)  do {  \
+    smp_mb_release();                       \
     atomic_set(ptr, i);                     \
-    smp_mb();                               \
 } while (0)
-#endif
 
 #ifndef atomic_xchg
 #if defined(__clang__)
@@ -387,15 +357,27 @@
 #define atomic_xchg(ptr, i)    (smp_mb(), __sync_lock_test_and_set(ptr, i))
 #endif
 #endif
+#define atomic_xchg__nocheck  atomic_xchg
 
 /* Provide shorter names for GCC atomic builtins.  */
 #define atomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
 #define atomic_fetch_dec(ptr)  __sync_fetch_and_add(ptr, -1)
-#define atomic_fetch_add       __sync_fetch_and_add
-#define atomic_fetch_sub       __sync_fetch_and_sub
-#define atomic_fetch_and       __sync_fetch_and_and
-#define atomic_fetch_or        __sync_fetch_and_or
-#define atomic_cmpxchg         __sync_val_compare_and_swap
+#define atomic_fetch_add(ptr, n) __sync_fetch_and_add(ptr, n)
+#define atomic_fetch_sub(ptr, n) __sync_fetch_and_sub(ptr, n)
+#define atomic_fetch_and(ptr, n) __sync_fetch_and_and(ptr, n)
+#define atomic_fetch_or(ptr, n) __sync_fetch_and_or(ptr, n)
+#define atomic_fetch_xor(ptr, n) __sync_fetch_and_xor(ptr, n)
+
+#define atomic_inc_fetch(ptr)  __sync_add_and_fetch(ptr, 1)
+#define atomic_dec_fetch(ptr)  __sync_add_and_fetch(ptr, -1)
+#define atomic_add_fetch(ptr, n) __sync_add_and_fetch(ptr, n)
+#define atomic_sub_fetch(ptr, n) __sync_sub_and_fetch(ptr, n)
+#define atomic_and_fetch(ptr, n) __sync_and_and_fetch(ptr, n)
+#define atomic_or_fetch(ptr, n) __sync_or_and_fetch(ptr, n)
+#define atomic_xor_fetch(ptr, n) __sync_xor_and_fetch(ptr, n)
+
+#define atomic_cmpxchg(ptr, old, new) __sync_val_compare_and_swap(ptr, old, new)
+#define atomic_cmpxchg__nocheck(ptr, old, new)  atomic_cmpxchg(ptr, old, new)
 
 /* And even shorter names that return void.  */
 #define atomic_inc(ptr)        ((void) __sync_fetch_and_add(ptr, 1))
@@ -404,6 +386,42 @@
 #define atomic_sub(ptr, n)     ((void) __sync_fetch_and_sub(ptr, n))
 #define atomic_and(ptr, n)     ((void) __sync_fetch_and_and(ptr, n))
 #define atomic_or(ptr, n)      ((void) __sync_fetch_and_or(ptr, n))
+#define atomic_xor(ptr, n)     ((void) __sync_fetch_and_xor(ptr, n))
 
 #endif /* __ATOMIC_RELAXED */
+
+#ifndef smp_wmb
+#define smp_wmb()   smp_mb_release()
+#endif
+#ifndef smp_rmb
+#define smp_rmb()   smp_mb_acquire()
+#endif
+
+/* This is more efficient than a store plus a fence.  */
+#if !defined(__SANITIZE_THREAD__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+#define atomic_mb_set(ptr, i)  ((void)atomic_xchg(ptr, i))
+#endif
+#endif
+
+/* atomic_mb_read/set semantics map Java volatile variables. They are
+ * less expensive on some platforms (notably POWER) than fully
+ * sequentially consistent operations.
+ *
+ * As long as they are used as paired operations they are safe to
+ * use. See docs/atomic.txt for more discussion.
+ */
+
+#ifndef atomic_mb_read
+#define atomic_mb_read(ptr)                             \
+    atomic_load_acquire(ptr)
+#endif
+
+#ifndef atomic_mb_set
+#define atomic_mb_set(ptr, i)  do {                     \
+    atomic_store_release(ptr, i);                       \
+    smp_mb();                                           \
+} while(0)
+#endif
+
 #endif /* QEMU_ATOMIC_H */
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index ec5146f84e..63ea2d0b1e 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -57,11 +57,8 @@
  * find_next_bit(addr, nbits, bit)	Position next set bit in *addr >= bit
  */
 
-#define BITMAP_LAST_WORD_MASK(nbits)                                    \
-    (                                                                   \
-        ((nbits) % BITS_PER_LONG) ?                                     \
-        (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL                       \
-        )
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
 
 #define DECLARE_BITMAP(name,bits)                  \
         unsigned long name[BITS_TO_LONGS(bits)]
@@ -75,10 +72,6 @@ int slow_bitmap_equal(const unsigned long *bitmap1,
                       const unsigned long *bitmap2, long bits);
 void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
                             long bits);
-void slow_bitmap_shift_right(unsigned long *dst,
-                             const unsigned long *src, int shift, long bits);
-void slow_bitmap_shift_left(unsigned long *dst,
-                            const unsigned long *src, int shift, long bits);
 int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
                     const unsigned long *bitmap2, long bits);
 void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 98fb005aba..1881284cb5 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -218,7 +218,7 @@ static inline unsigned long hweight_long(unsigned long w)
  */
 static inline uint8_t rol8(uint8_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> (8 - shift));
+    return (word << shift) | (word >> ((8 - shift) & 7));
 }
 
 /**
@@ -228,7 +228,7 @@ static inline uint8_t rol8(uint8_t word, unsigned int shift)
  */
 static inline uint8_t ror8(uint8_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << (8 - shift));
+    return (word >> shift) | (word << ((8 - shift) & 7));
 }
 
 /**
@@ -238,7 +238,7 @@ static inline uint8_t ror8(uint8_t word, unsigned int shift)
  */
 static inline uint16_t rol16(uint16_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> (16 - shift));
+    return (word << shift) | (word >> ((16 - shift) & 15));
 }
 
 /**
@@ -248,7 +248,7 @@ static inline uint16_t rol16(uint16_t word, unsigned int shift)
  */
 static inline uint16_t ror16(uint16_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << (16 - shift));
+    return (word >> shift) | (word << ((16 - shift) & 15));
 }
 
 /**
@@ -258,7 +258,7 @@ static inline uint16_t ror16(uint16_t word, unsigned int shift)
  */
 static inline uint32_t rol32(uint32_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> (32 - shift));
+    return (word << shift) | (word >> ((32 - shift) & 31));
 }
 
 /**
@@ -268,7 +268,7 @@ static inline uint32_t rol32(uint32_t word, unsigned int shift)
  */
 static inline uint32_t ror32(uint32_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << (32 - shift));
+    return (word >> shift) | (word << ((32 - shift) & 31));
 }
 
 /**
@@ -278,7 +278,7 @@ static inline uint32_t ror32(uint32_t word, unsigned int shift)
  */
 static inline uint64_t rol64(uint64_t word, unsigned int shift)
 {
-    return (word << shift) | (word >> (64 - shift));
+    return (word << shift) | (word >> ((64 - shift) & 63));
 }
 
 /**
@@ -288,7 +288,7 @@ static inline uint64_t rol64(uint64_t word, unsigned int shift)
  */
 static inline uint64_t ror64(uint64_t word, unsigned int shift)
 {
-    return (word >> shift) | (word << (64 - shift));
+    return (word >> shift) | (word << ((64 - shift) & 63));
 }
 
 /**
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index 338d3a65b3..157698bfa9 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -1,4 +1,8 @@
-/* public domain */
+/* compiler.h: macros to abstract away compiler specifics
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
 
 #ifndef COMPILER_H
 #define COMPILER_H
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index ac8d4c9cc8..e6a60d55fd 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -92,6 +92,19 @@ Coroutine *coroutine_fn qemu_coroutine_self(void);
  */
 bool qemu_in_coroutine(void);
 
+/**
+ * Return true if the coroutine is currently entered
+ *
+ * A coroutine is "entered" if it has not yielded from the current
+ * qemu_coroutine_enter() call used to run it.  This does not mean that the
+ * coroutine is currently executing code since it may have transferred control
+ * to another coroutine using qemu_coroutine_enter().
+ *
+ * When several coroutines enter each other there may be no way to know which
+ * ones have already been entered.  In such situations this function can be
+ * used to avoid recursively entering coroutines.
+ */
+bool qemu_coroutine_entered(Coroutine *co);
 
 
 /**
@@ -143,6 +156,7 @@ bool qemu_co_queue_empty(CoQueue *queue);
  */
 typedef struct CoMutex {
     bool locked;
+    Coroutine *holder;
     CoQueue queue;
 } CoMutex;
 
diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h
index 581a7f5140..14d4f1d1f2 100644
--- a/include/qemu/coroutine_int.h
+++ b/include/qemu/coroutine_int.h
@@ -28,6 +28,8 @@
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
 
+#define COROUTINE_STACK_SIZE (1 << 20)
+
 typedef enum {
     COROUTINE_YIELD = 1,
     COROUTINE_TERMINATE = 2,
@@ -39,6 +41,7 @@ struct Coroutine {
     void *entry_arg;
     Coroutine *caller;
     QSLIST_ENTRY(Coroutine) pool_next;
+    size_t locks_held;
 
     /* Coroutines that should be woken up when we yield or terminate */
     QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 3e4ea236f0..8033929139 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -168,9 +168,8 @@ int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
 /* used to print char* safely */
 #define STR_OR_NULL(str) ((str) ? (str) : "null")
 
-bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len);
-size_t buffer_find_nonzero_offset(const void *buf, size_t len);
 bool buffer_is_zero(const void *buf, size_t len);
+bool test_buffer_is_zero_next_accel(void);
 
 /*
  * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
index 499ec8b12a..3001865896 100644
--- a/include/qemu/error-report.h
+++ b/include/qemu/error-report.h
@@ -32,6 +32,7 @@ void loc_set_file(const char *fname, int lno);
 
 void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
 void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
 void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
 void error_set_progname(const char *argv0);
 void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 8ab721e5aa..eb464759d5 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -145,6 +145,85 @@ void hbitmap_reset_all(HBitmap *hb);
  */
 bool hbitmap_get(const HBitmap *hb, uint64_t item);
 
+/**
+ * hbitmap_serialization_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Granularity of serialization chunks, used by other serialization functions.
+ * For every chunk:
+ * 1. Chunk start should be aligned to this granularity.
+ * 2. Chunk size should be aligned too, except for last chunk (for which
+ *      start + count == hb->size)
+ */
+uint64_t hbitmap_serialization_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_serialization_size:
+ * @hb: HBitmap to operate on.
+ * @start: Starting bit
+ * @count: Number of bits
+ *
+ * Return number of bytes hbitmap_(de)serialize_part needs
+ */
+uint64_t hbitmap_serialization_size(const HBitmap *hb,
+                                    uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_serialize_part
+ * @hb: HBitmap to operate on.
+ * @buf: Buffer to store serialized bitmap.
+ * @start: First bit to store.
+ * @count: Number of bits to store.
+ *
+ * Stores HBitmap data corresponding to given region. The format of saved data
+ * is linear sequence of bits, so it can be used by hbitmap_deserialize_part
+ * independently of endianness and size of HBitmap level array elements
+ */
+void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf,
+                            uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_deserialize_part
+ * @hb: HBitmap to operate on.
+ * @buf: Buffer to restore bitmap data from.
+ * @start: First bit to restore.
+ * @count: Number of bits to restore.
+ * @finish: Whether to call hbitmap_deserialize_finish automatically.
+ *
+ * Restores HBitmap data corresponding to given region. The format is the same
+ * as for hbitmap_serialize_part.
+ *
+ * If @finish is false, caller must call hbitmap_serialize_finish before using
+ * the bitmap.
+ */
+void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf,
+                              uint64_t start, uint64_t count,
+                              bool finish);
+
+/**
+ * hbitmap_deserialize_zeroes
+ * @hb: HBitmap to operate on.
+ * @start: First bit to restore.
+ * @count: Number of bits to restore.
+ * @finish: Whether to call hbitmap_deserialize_finish automatically.
+ *
+ * Fills the bitmap with zeroes.
+ *
+ * If @finish is false, caller must call hbitmap_serialize_finish before using
+ * the bitmap.
+ */
+void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count,
+                                bool finish);
+
+/**
+ * hbitmap_deserialize_finish
+ * @hb: HBitmap to operate on.
+ *
+ * Repair HBitmap after calling hbitmap_deserialize_data. Actually, all HBitmap
+ * layers are restored here.
+ */
+void hbitmap_deserialize_finish(HBitmap *hb);
+
 /**
  * hbitmap_free:
  * @hb: HBitmap to operate on.
@@ -178,6 +257,27 @@ void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
  */
 unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
 
+/* hbitmap_create_meta:
+ * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap.
+ * The caller owns the created bitmap and must call hbitmap_free_meta(hb) to
+ * free it.
+ *
+ * Currently, we only guarantee that if a bit in the hbitmap is changed it
+ * will be reflected in the meta bitmap, but we do not yet guarantee the
+ * opposite.
+ *
+ * @hb: The HBitmap to operate on.
+ * @chunk_size: How many bits in @hb does one bit in the meta track.
+ */
+HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size);
+
+/* hbitmap_free_meta:
+ * Free the meta bitmap of @hb.
+ *
+ * @hb: The HBitmap whose meta bitmap should be freed.
+ */
+void hbitmap_free_meta(HBitmap *hb);
+
 /**
  * hbitmap_iter_next:
  * @hbi: HBitmapIter to operate on.
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index c5988813df..5c9890db8b 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -1,6 +1,149 @@
 #ifndef INT128_H
 #define INT128_H
 
+#ifdef CONFIG_INT128
+#include "qemu/bswap.h"
+
+typedef __int128_t Int128;
+
+static inline Int128 int128_make64(uint64_t a)
+{
+    return a;
+}
+
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+{
+    return (__uint128_t)hi << 64 | lo;
+}
+
+static inline uint64_t int128_get64(Int128 a)
+{
+    uint64_t r = a;
+    assert(r == a);
+    return r;
+}
+
+static inline uint64_t int128_getlo(Int128 a)
+{
+    return a;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+    return a >> 64;
+}
+
+static inline Int128 int128_zero(void)
+{
+    return 0;
+}
+
+static inline Int128 int128_one(void)
+{
+    return 1;
+}
+
+static inline Int128 int128_2_64(void)
+{
+    return (Int128)1 << 64;
+}
+
+static inline Int128 int128_exts64(int64_t a)
+{
+    return a;
+}
+
+static inline Int128 int128_and(Int128 a, Int128 b)
+{
+    return a & b;
+}
+
+static inline Int128 int128_rshift(Int128 a, int n)
+{
+    return a >> n;
+}
+
+static inline Int128 int128_add(Int128 a, Int128 b)
+{
+    return a + b;
+}
+
+static inline Int128 int128_neg(Int128 a)
+{
+    return -a;
+}
+
+static inline Int128 int128_sub(Int128 a, Int128 b)
+{
+    return a - b;
+}
+
+static inline bool int128_nonneg(Int128 a)
+{
+    return a >= 0;
+}
+
+static inline bool int128_eq(Int128 a, Int128 b)
+{
+    return a == b;
+}
+
+static inline bool int128_ne(Int128 a, Int128 b)
+{
+    return a != b;
+}
+
+static inline bool int128_ge(Int128 a, Int128 b)
+{
+    return a >= b;
+}
+
+static inline bool int128_lt(Int128 a, Int128 b)
+{
+    return a < b;
+}
+
+static inline bool int128_le(Int128 a, Int128 b)
+{
+    return a <= b;
+}
+
+static inline bool int128_gt(Int128 a, Int128 b)
+{
+    return a > b;
+}
+
+static inline bool int128_nz(Int128 a)
+{
+    return a != 0;
+}
+
+static inline Int128 int128_min(Int128 a, Int128 b)
+{
+    return a < b ? a : b;
+}
+
+static inline Int128 int128_max(Int128 a, Int128 b)
+{
+    return a > b ? a : b;
+}
+
+static inline void int128_addto(Int128 *a, Int128 b)
+{
+    *a += b;
+}
+
+static inline void int128_subfrom(Int128 *a, Int128 b)
+{
+    *a -= b;
+}
+
+static inline Int128 bswap128(Int128 a)
+{
+    return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a)));
+}
+
+#else /* !CONFIG_INT128 */
 
 typedef struct Int128 Int128;
 
@@ -14,12 +157,27 @@ static inline Int128 int128_make64(uint64_t a)
     return (Int128) { a, 0 };
 }
 
+static inline Int128 int128_make128(uint64_t lo, uint64_t hi)
+{
+    return (Int128) { lo, hi };
+}
+
 static inline uint64_t int128_get64(Int128 a)
 {
     assert(!a.hi);
     return a.lo;
 }
 
+static inline uint64_t int128_getlo(Int128 a)
+{
+    return a.lo;
+}
+
+static inline int64_t int128_gethi(Int128 a)
+{
+    return a.hi;
+}
+
 static inline Int128 int128_zero(void)
 {
     return int128_make64(0);
@@ -53,9 +211,9 @@ static inline Int128 int128_rshift(Int128 a, int n)
     }
     h = a.hi >> (n & 63);
     if (n >= 64) {
-        return (Int128) { h, h >> 63 };
+        return int128_make128(h, h >> 63);
     } else {
-        return (Int128) { (a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h };
+        return int128_make128((a.lo >> n) | ((uint64_t)a.hi << (64 - n)), h);
     }
 }
 
@@ -69,18 +227,18 @@ static inline Int128 int128_add(Int128 a, Int128 b)
      *
      * So the carry is lo < a.lo.
      */
-    return (Int128) { lo, (uint64_t)a.hi + b.hi + (lo < a.lo) };
+    return int128_make128(lo, (uint64_t)a.hi + b.hi + (lo < a.lo));
 }
 
 static inline Int128 int128_neg(Int128 a)
 {
     uint64_t lo = -a.lo;
-    return (Int128) { lo, ~(uint64_t)a.hi + !lo };
+    return int128_make128(lo, ~(uint64_t)a.hi + !lo);
 }
 
 static inline Int128 int128_sub(Int128 a, Int128 b)
 {
-    return (Int128){ a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo) };
+    return int128_make128(a.lo - b.lo, (uint64_t)a.hi - b.hi - (a.lo < b.lo));
 }
 
 static inline bool int128_nonneg(Int128 a)
@@ -143,4 +301,5 @@ static inline void int128_subfrom(Int128 *a, Int128 b)
     *a = int128_sub(*a, b);
 }
 
-#endif
+#endif /* CONFIG_INT128 */
+#endif /* INT128_H */
diff --git a/include/qemu/jhash.h b/include/qemu/jhash.h
new file mode 100644
index 0000000000..7222242615
--- /dev/null
+++ b/include/qemu/jhash.h
@@ -0,0 +1,59 @@
+/* jhash.h: Jenkins hash support.
+  *
+  * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+  *
+  * http://burtleburtle.net/bob/hash/
+  *
+  * These are the credits from Bob's sources:
+  *
+  * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+  *
+  * These are functions for producing 32-bit hashes for hash table lookup.
+  * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+  * are externally useful functions.  Routines to test the hash are included
+  * if SELF_TEST is defined.  You can use this free for any purpose. It's in
+  * the public domain.  It has no warranty.
+  *
+  * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+  *
+  * I've modified Bob's hash to be useful in the Linux kernel, and
+  * any bugs present are my fault.
+  * Jozsef
+  */
+
+#ifndef QEMU_JHASH_H__
+#define QEMU_JHASH_H__
+
+#include "qemu/bitops.h"
+
+/*
+ * hashtable relation copy from linux kernel jhash
+ */
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)                \
+{                                           \
+    a -= c;  a ^= rol32(c, 4);  c += b;     \
+    b -= a;  b ^= rol32(a, 6);  a += c;     \
+    c -= b;  c ^= rol32(b, 8);  b += a;     \
+    a -= c;  a ^= rol32(c, 16); c += b;     \
+    b -= a;  b ^= rol32(a, 19); a += c;     \
+    c -= b;  c ^= rol32(b, 4);  b += a;     \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)  \
+{                               \
+    c ^= b; c -= rol32(b, 14);  \
+    a ^= c; a -= rol32(c, 11);  \
+    b ^= a; b -= rol32(a, 25);  \
+    c ^= b; c -= rol32(b, 16);  \
+    a ^= c; a -= rol32(c, 4);   \
+    b ^= a; b -= rol32(a, 14);  \
+    c ^= b; c -= rol32(b, 24);  \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL           0xdeadbeef
+
+#endif /* QEMU_JHASH_H__ */
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 6a9bd0177a..74c7fd66ef 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -57,6 +57,22 @@ static inline bool qemu_loglevel_mask(int mask)
     return (qemu_loglevel & mask) != 0;
 }
 
+/* Lock output for a series of related logs.  Since this is not needed
+ * for a single qemu_log / qemu_log_mask / qemu_log_mask_and_addr, we
+ * assume that qemu_loglevel_mask has already been tested, and that
+ * qemu_loglevel is never set when qemu_logfile is unset.
+ */
+
+static inline void qemu_log_lock(void)
+{
+    qemu_flockfile(qemu_logfile);
+}
+
+static inline void qemu_log_unlock(void)
+{
+    qemu_funlockfile(qemu_logfile);
+}
+
 /* Logging functions: */
 
 /* main logging function
diff --git a/include/qemu/module.h b/include/qemu/module.h
index 2370708445..877cca7d7b 100644
--- a/include/qemu/module.h
+++ b/include/qemu/module.h
@@ -44,6 +44,7 @@ typedef enum {
     MODULE_INIT_OPTS,
     MODULE_INIT_QAPI,
     MODULE_INIT_QOM,
+    MODULE_INIT_TRACE,
     MODULE_INIT_MAX
 } module_init_type;
 
@@ -51,10 +52,14 @@ typedef enum {
 #define opts_init(function) module_init(function, MODULE_INIT_OPTS)
 #define qapi_init(function) module_init(function, MODULE_INIT_QAPI)
 #define type_init(function) module_init(function, MODULE_INIT_QOM)
+#define trace_init(function) module_init(function, MODULE_INIT_TRACE)
+
+#define block_module_load_one(lib) module_load_one("block-", lib)
 
 void register_module_init(void (*fn)(void), module_init_type type);
 void register_dso_module_init(void (*fn)(void), module_init_type type);
 
 void module_call_init(module_init_type type);
+void module_load_one(const char *prefix, const char *lib_name);
 
 #endif
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 9e9fa61546..689f253ea7 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -128,6 +128,9 @@ extern int daemon(int, int);
 #if !defined(EMEDIUMTYPE)
 #define EMEDIUMTYPE 4098
 #endif
+#if !defined(ESHUTDOWN)
+#define ESHUTDOWN 4099
+#endif
 #ifndef TIME_MAX
 #define TIME_MAX LONG_MAX
 #endif
@@ -141,6 +144,14 @@ extern int daemon(int, int);
 # error Unknown pointer size
 #endif
 
+/* Mac OSX has a <stdint.h> bug that incorrectly defines SIZE_MAX with
+ * the wrong type. Our replacement isn't usable in preprocessor
+ * expressions, but it is sufficient for our needs. */
+#if defined(HAVE_BROKEN_SIZE_MAX) && HAVE_BROKEN_SIZE_MAX
+#undef SIZE_MAX
+#define SIZE_MAX ((size_t)-1)
+#endif
+
 #ifndef MIN
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
 #endif
@@ -387,6 +398,16 @@ void os_mem_prealloc(int fd, char *area, size_t sz, Error **errp);
 
 int qemu_read_password(char *buf, int buf_size);
 
+/**
+ * qemu_get_pid_name:
+ * @pid: pid of a process
+ *
+ * For given @pid fetch its name. Caller is responsible for
+ * freeing the string when no longer needed.
+ * Returns allocated string on success, NULL on failure.
+ */
+char *qemu_get_pid_name(pid_t pid);
+
 /**
  * qemu_fork:
  *
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index c2b6c8149d..342073fb4d 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -407,6 +407,7 @@ struct {                                                                \
         else                                                            \
                 (head)->tqh_last = (elm)->field.tqe_prev;               \
         *(elm)->field.tqe_prev = (elm)->field.tqe_next;                 \
+        (elm)->field.tqe_prev = NULL;                                   \
 } while (/*CONSTCOND*/0)
 
 #define QTAILQ_FOREACH(var, head, field)                                \
@@ -430,6 +431,7 @@ struct {                                                                \
 #define QTAILQ_EMPTY(head)               ((head)->tqh_first == NULL)
 #define QTAILQ_FIRST(head)               ((head)->tqh_first)
 #define QTAILQ_NEXT(elm, field)          ((elm)->field.tqe_next)
+#define QTAILQ_IN_USE(elm, field)        ((elm)->field.tqe_prev != NULL)
 
 #define QTAILQ_LAST(head, headname) \
         (*(((struct headname *)((head)->tqh_last))->tqh_last))
diff --git a/include/qemu/rfifolock.h b/include/qemu/rfifolock.h
deleted file mode 100644
index b23ab538a6..0000000000
--- a/include/qemu/rfifolock.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Recursive FIFO lock
- *
- * Copyright Red Hat, Inc. 2013
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef QEMU_RFIFOLOCK_H
-#define QEMU_RFIFOLOCK_H
-
-#include "qemu/thread.h"
-
-/* Recursive FIFO lock
- *
- * This lock provides more features than a plain mutex:
- *
- * 1. Fairness - enforces FIFO order.
- * 2. Nesting - can be taken recursively.
- * 3. Contention callback - optional, called when thread must wait.
- *
- * The recursive FIFO lock is heavyweight so prefer other synchronization
- * primitives if you do not need its features.
- */
-typedef struct {
-    QemuMutex lock;             /* protects all fields */
-
-    /* FIFO order */
-    unsigned int head;          /* active ticket number */
-    unsigned int tail;          /* waiting ticket number */
-    QemuCond cond;              /* used to wait for our ticket number */
-
-    /* Nesting */
-    QemuThread owner_thread;    /* thread that currently has ownership */
-    unsigned int nesting;       /* amount of nesting levels */
-
-    /* Contention callback */
-    void (*cb)(void *);         /* called when thread must wait, with ->lock
-                                 * held so it may not recursively lock/unlock
-                                 */
-    void *cb_opaque;
-} RFifoLock;
-
-void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque);
-void rfifolock_destroy(RFifoLock *r);
-void rfifolock_lock(RFifoLock *r);
-void rfifolock_unlock(RFifoLock *r);
-
-#endif /* QEMU_RFIFOLOCK_H */
diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
index 2e2be4c4f0..8dee11d101 100644
--- a/include/qemu/seqlock.h
+++ b/include/qemu/seqlock.h
@@ -31,7 +31,7 @@ static inline void seqlock_init(QemuSeqLock *sl)
 /* Lock out other writers and update the count.  */
 static inline void seqlock_write_begin(QemuSeqLock *sl)
 {
-    ++sl->sequence;
+    atomic_set(&sl->sequence, sl->sequence + 1);
 
     /* Write sequence before updating other fields.  */
     smp_wmb();
@@ -42,7 +42,7 @@ static inline void seqlock_write_end(QemuSeqLock *sl)
     /* Write other fields before finalizing sequence.  */
     smp_wmb();
 
-    ++sl->sequence;
+    atomic_set(&sl->sequence, sl->sequence + 1);
 }
 
 static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 9eb24707df..5589e6842b 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -34,6 +34,8 @@ typedef void NonBlockingConnectHandler(int fd, Error *err, void *opaque);
 
 InetSocketAddress *inet_parse(const char *str, Error **errp);
 int inet_connect(const char *str, Error **errp);
+int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
+                       NonBlockingConnectHandler *callback, void *opaque);
 
 NetworkAddressFamily inet_netfamily(int family);
 
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index aa03567e5e..09d1e15728 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -4,6 +4,12 @@
 #include <pthread.h>
 #include <semaphore.h>
 
+typedef QemuMutex QemuRecMutex;
+#define qemu_rec_mutex_destroy qemu_mutex_destroy
+#define qemu_rec_mutex_lock qemu_mutex_lock
+#define qemu_rec_mutex_try_lock qemu_mutex_try_lock
+#define qemu_rec_mutex_unlock qemu_mutex_unlock
+
 struct QemuMutex {
     pthread_mutex_t lock;
 };
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index c7ce8dcd45..5fb6541ae9 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -8,6 +8,16 @@ struct QemuMutex {
     LONG owner;
 };
 
+typedef struct QemuRecMutex QemuRecMutex;
+struct QemuRecMutex {
+    CRITICAL_SECTION lock;
+};
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
+void qemu_rec_mutex_lock(QemuRecMutex *mutex);
+int qemu_rec_mutex_trylock(QemuRecMutex *mutex);
+void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
+
 struct QemuCond {
     LONG waiters, target;
     HANDLE sema;
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 31237e93ee..e8e665f020 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -25,6 +25,9 @@ void qemu_mutex_lock(QemuMutex *mutex);
 int qemu_mutex_trylock(QemuMutex *mutex);
 void qemu_mutex_unlock(QemuMutex *mutex);
 
+/* Prototypes for other functions are in thread-posix.h/thread-win32.h.  */
+void qemu_rec_mutex_init(QemuRecMutex *mutex);
+
 void qemu_cond_init(QemuCond *cond);
 void qemu_cond_destroy(QemuCond *cond);
 
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index aa6409a636..d2b806f5f0 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -22,23 +22,20 @@
  * @QEMU_CLOCK_REALTIME: Real time clock
  *
  * The real time clock should be used only for stuff which does not
- * change the virtual machine state, as it is run even if the virtual
- * machine is stopped. The real time clock has a frequency of 1000
- * Hz.
+ * change the virtual machine state, as it runs even if the virtual
+ * machine is stopped.
  *
  * @QEMU_CLOCK_VIRTUAL: virtual clock
  *
- * The virtual clock is only run during the emulation. It is stopped
- * when the virtual machine is stopped. Virtual timers use a high
- * precision clock, usually cpu cycles (use ticks_per_sec).
+ * The virtual clock only runs during the emulation. It stops
+ * when the virtual machine is stopped.
  *
  * @QEMU_CLOCK_HOST: host clock
  *
- * The host clock should be use for device models that emulate accurate
+ * The host clock should be used for device models that emulate accurate
  * real time sources. It will continue to run when the virtual machine
  * is suspended, and it will reflect system time changes the host may
- * undergo (e.g. due to NTP). The host clock has the same precision as
- * the virtual clock.
+ * undergo (e.g. due to NTP).
  *
  * @QEMU_CLOCK_VIRTUAL_RT: realtime clock used for icount warp
  *
@@ -76,10 +73,6 @@ struct QEMUTimer {
 
 extern QEMUTimerListGroup main_loop_tlg;
 
-/*
- * QEMUClockType
- */
-
 /*
  * qemu_clock_get_ns;
  * @type: the clock type
@@ -179,7 +172,7 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type);
  * qemu_clock_get_main_loop_timerlist:
  * @type: the clock type
  *
- * Return the default timer list assocatiated with a clock.
+ * Return the default timer list associated with a clock.
  *
  * Returns: the default timer list
  */
@@ -431,6 +424,7 @@ void timer_init_tl(QEMUTimer *ts,
 
 /**
  * timer_init:
+ * @ts: the timer to be initialised
  * @type: the clock to associate with the timer
  * @scale: the scale value for the timer
  * @cb: the callback to call when the timer expires
@@ -450,6 +444,7 @@ static inline void timer_init(QEMUTimer *ts, QEMUClockType type, int scale,
 
 /**
  * timer_init_ns:
+ * @ts: the timer to be initialised
  * @type: the clock to associate with the timer
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
@@ -468,6 +463,7 @@ static inline void timer_init_ns(QEMUTimer *ts, QEMUClockType type,
 
 /**
  * timer_init_us:
+ * @ts: the timer to be initialised
  * @type: the clock to associate with the timer
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
@@ -486,6 +482,7 @@ static inline void timer_init_us(QEMUTimer *ts, QEMUClockType type,
 
 /**
  * timer_init_ms:
+ * @ts: the timer to be initialised
  * @type: the clock to associate with the timer
  * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
@@ -509,7 +506,7 @@ static inline void timer_init_ms(QEMUTimer *ts, QEMUClockType type,
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
- * Creeate a new timer and associate it with @timer_list.
+ * Create a new timer and associate it with @timer_list.
  * The memory is allocated by the function.
  *
  * This is not the preferred interface unless you know you
@@ -534,7 +531,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
  * @cb: the callback to be called when the timer expires
  * @opaque: the opaque pointer to be passed to the callback
  *
- * Creeate a new timer and associate it with the default
+ * Create a new timer and associate it with the default
  * timer list for the clock type @type.
  *
  * Returns: a pointer to the timer
@@ -547,8 +544,8 @@ static inline QEMUTimer *timer_new(QEMUClockType type, int scale,
 
 /**
  * timer_new_ns:
- * @clock: the clock to associate with the timer
- * @callback: the callback to call when the timer expires
+ * @type: the clock type to associate with the timer
+ * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
  * Create a new timer with nanosecond scale on the default timer list
@@ -564,8 +561,8 @@ static inline QEMUTimer *timer_new_ns(QEMUClockType type, QEMUTimerCB *cb,
 
 /**
  * timer_new_us:
- * @clock: the clock to associate with the timer
- * @callback: the callback to call when the timer expires
+ * @type: the clock type to associate with the timer
+ * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
  * Create a new timer with microsecond scale on the default timer list
@@ -581,8 +578,8 @@ static inline QEMUTimer *timer_new_us(QEMUClockType type, QEMUTimerCB *cb,
 
 /**
  * timer_new_ms:
- * @clock: the clock to associate with the timer
- * @callback: the callback to call when the timer expires
+ * @type: the clock type to associate with the timer
+ * @cb: the callback to call when the timer expires
  * @opaque: the opaque pointer to pass to the callback
  *
  * Create a new timer with millisecond scale on the default timer list
@@ -691,6 +688,7 @@ bool timer_pending(QEMUTimer *ts);
 /**
  * timer_expired:
  * @ts: the timer
+ * @current_time: the current time
  *
  * Determines whether a timer has expired.
  *
@@ -797,7 +795,7 @@ static inline int64_t get_max_clock_jump(void)
  * Low level clock functions
  */
 
-/* real time host monotonic timer */
+/* get host real time in nanosecond */
 static inline int64_t get_clock_realtime(void)
 {
     struct timeval tv;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index b113fcf156..1b8c30a7a0 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -11,6 +11,7 @@ typedef struct AioContext AioContext;
 typedef struct AllwinnerAHCIState AllwinnerAHCIState;
 typedef struct AudioState AudioState;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
+typedef struct BdrvDirtyBitmapIter BdrvDirtyBitmapIter;
 typedef struct BlockBackend BlockBackend;
 typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index de99b3bd4b..d201c61260 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h
@@ -102,8 +102,6 @@ typedef struct QueryParams {
 } QueryParams;
 
 struct QueryParams *query_params_new (int init_alloc);
-int query_param_append (QueryParams *ps, const char *name, const char *value);
-extern char *query_param_to_string (const QueryParams *ps);
 extern QueryParams *query_params_parse (const char *query);
 extern void query_params_free (QueryParams *ps);
 
diff --git a/include/qemu/uuid.h b/include/qemu/uuid.h
new file mode 100644
index 0000000000..afe4840296
--- /dev/null
+++ b/include/qemu/uuid.h
@@ -0,0 +1,59 @@
+/*
+ *  QEMU UUID functions
+ *
+ *  Copyright 2016 Red Hat, Inc.
+ *
+ *  Authors:
+ *   Fam Zheng <famz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef QEMU_UUID_H
+#define QEMU_UUID_H
+
+#include "qemu-common.h"
+
+/* Version 4 UUID (pseudo random numbers), RFC4122 4.4. */
+
+typedef struct {
+    union {
+        unsigned char data[16];
+        struct {
+            /* Generated in BE endian, can be swapped with qemu_uuid_bswap. */
+            uint32_t time_low;
+            uint16_t time_mid;
+            uint16_t time_high_and_version;
+            uint8_t  clock_seq_and_reserved;
+            uint8_t  clock_seq_low;
+            uint8_t  node[6];
+        } fields;
+    };
+} QemuUUID;
+
+#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-" \
+                 "%02hhx%02hhx-%02hhx%02hhx-" \
+                 "%02hhx%02hhx-" \
+                 "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
+
+#define UUID_FMT_LEN 36
+
+#define UUID_NONE "00000000-0000-0000-0000-000000000000"
+
+void qemu_uuid_generate(QemuUUID *out);
+
+int qemu_uuid_is_null(const QemuUUID *uu);
+
+void qemu_uuid_unparse(const QemuUUID *uuid, char *out);
+
+char *qemu_uuid_unparse_strdup(const QemuUUID *uuid);
+
+int qemu_uuid_parse(const char *str, QemuUUID *uuid);
+
+void qemu_uuid_bswap(QemuUUID *uuid);
+
+#endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ce0c406f27..3f79a8e955 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -27,7 +27,6 @@
 #include "qemu/bitmap.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
-#include "trace/generated-events.h"
 
 typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
                                      void *opaque);
@@ -232,13 +231,26 @@ struct kvm_run;
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 
 /* work queue */
-struct qemu_work_item {
-    struct qemu_work_item *next;
-    void (*func)(void *data);
-    void *data;
-    int done;
-    bool free;
-};
+
+/* The union type allows passing of 64 bit target pointers on 32 bit
+ * hosts in a single parameter
+ */
+typedef union {
+    int           host_int;
+    unsigned long host_ulong;
+    void         *host_ptr;
+    vaddr         target_ptr;
+} run_on_cpu_data;
+
+#define RUN_ON_CPU_HOST_PTR(p)    ((run_on_cpu_data){.host_ptr = (p)})
+#define RUN_ON_CPU_HOST_INT(i)    ((run_on_cpu_data){.host_int = (i)})
+#define RUN_ON_CPU_HOST_ULONG(ul) ((run_on_cpu_data){.host_ulong = (ul)})
+#define RUN_ON_CPU_TARGET_PTR(v)  ((run_on_cpu_data){.target_ptr = (v)})
+#define RUN_ON_CPU_NULL           RUN_ON_CPU_HOST_PTR(NULL)
+
+typedef void (*run_on_cpu_func)(CPUState *cpu, run_on_cpu_data data);
+
+struct qemu_work_item;
 
 /**
  * CPUState:
@@ -247,7 +259,9 @@ struct qemu_work_item {
  * @nr_threads: Number of threads within this CPU.
  * @numa_node: NUMA node this CPU is belonging to.
  * @host_tid: Host thread ID.
- * @running: #true if CPU is currently running (usermode).
+ * @running: #true if CPU is currently running (lockless).
+ * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
+ * valid under cpu_list_lock.
  * @created: Indicates whether the CPU thread has been successfully created.
  * @interrupt_request: Indicates a pending interrupt request.
  * @halted: Nonzero if the CPU is in suspended state.
@@ -257,7 +271,6 @@ struct qemu_work_item {
  * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU
  * @tcg_exit_req: Set to force TCG to stop executing linked TBs for this
  *           CPU and return to its top level loop.
- * @tb_flushed: Indicates the translation buffer has been flushed.
  * @singlestep_enabled: Flags for single-stepping.
  * @icount_extra: Instructions until next timer event.
  * @icount_decr: Number of cycles left, with interrupt flag in high bit.
@@ -301,7 +314,7 @@ struct CPUState {
 #endif
     int thread_id;
     uint32_t host_tid;
-    bool running;
+    bool running, has_waiter;
     struct QemuCond *halt_cond;
     bool thread_kicked;
     bool created;
@@ -310,7 +323,6 @@ struct CPUState {
     bool unplug;
     bool crash_occurred;
     bool exit_request;
-    bool tb_flushed;
     uint32_t interrupt_request;
     int singlestep_enabled;
     int64_t icount_extra;
@@ -325,7 +337,10 @@ struct CPUState {
     MemoryRegion *memory;
 
     void *env_ptr; /* CPUArchState */
+
+    /* Writes protected by tb_lock, reads not thread-safe  */
     struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+
     struct GDBRegisterState *gdb_regs;
     int gdb_num_regs;
     int gdb_num_g_regs;
@@ -350,8 +365,12 @@ struct CPUState {
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
-    /* Used for events with 'vcpu' and *without* the 'disabled' properties */
-    DECLARE_BITMAP(trace_dstate, TRACE_VCPU_EVENT_COUNT);
+    /*
+     * Used for events with 'vcpu' and *without* the 'disabled' properties.
+     * Dynamically allocated based on bitmap requried to hold up to
+     * trace_get_vcpu_event_count() entries.
+     */
+    unsigned long *trace_dstate;
 
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index; /* used by alpha TCG */
@@ -542,6 +561,18 @@ static inline int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
 }
 #endif
 
+/**
+ * cpu_list_add:
+ * @cpu: The CPU to be added to the list of CPUs.
+ */
+void cpu_list_add(CPUState *cpu);
+
+/**
+ * cpu_list_remove:
+ * @cpu: The CPU to be removed from the list of CPUs.
+ */
+void cpu_list_remove(CPUState *cpu);
+
 /**
  * cpu_reset:
  * @cpu: The CPU whose state is to be reset.
@@ -615,6 +646,18 @@ void qemu_cpu_kick(CPUState *cpu);
  */
 bool cpu_is_stopped(CPUState *cpu);
 
+/**
+ * do_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ * @mutex: Mutex to release while waiting for @func to run.
+ *
+ * Used internally in the implementation of run_on_cpu.
+ */
+void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
+                   QemuMutex *mutex);
+
 /**
  * run_on_cpu:
  * @cpu: The vCPU to run on.
@@ -623,7 +666,7 @@ bool cpu_is_stopped(CPUState *cpu);
  *
  * Schedules the function @func for execution on the vCPU @cpu.
  */
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
 
 /**
  * async_run_on_cpu:
@@ -633,7 +676,21 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
  *
  * Schedules the function @func for execution on the vCPU @cpu asynchronously.
  */
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
+
+/**
+ * async_safe_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously,
+ * while all other vCPUs are sleeping.
+ *
+ * Unlike run_on_cpu and async_run_on_cpu, the function is run outside the
+ * BQL.
+ */
+void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data);
 
 /**
  * qemu_get_cpu:
@@ -793,6 +850,49 @@ void cpu_remove(CPUState *cpu);
  */
 void cpu_remove_sync(CPUState *cpu);
 
+/**
+ * process_queued_cpu_work() - process all items on CPU work queue
+ * @cpu: The CPU which work queue to process.
+ */
+void process_queued_cpu_work(CPUState *cpu);
+
+/**
+ * cpu_exec_start:
+ * @cpu: The CPU for the current thread.
+ *
+ * Record that a CPU has started execution and can be interrupted with
+ * cpu_exit.
+ */
+void cpu_exec_start(CPUState *cpu);
+
+/**
+ * cpu_exec_end:
+ * @cpu: The CPU for the current thread.
+ *
+ * Record that a CPU has stopped execution and exclusive sections
+ * can be executed without interrupting it.
+ */
+void cpu_exec_end(CPUState *cpu);
+
+/**
+ * start_exclusive:
+ *
+ * Wait for a concurrent exclusive section to end, and then start
+ * a section of work that is run while other CPUs are not running
+ * between cpu_exec_start and cpu_exec_end.  CPUs that are running
+ * cpu_exec are exited immediately.  CPUs that call cpu_exec_start
+ * during the exclusive section go to sleep until this CPU calls
+ * end_exclusive.
+ */
+void start_exclusive(void);
+
+/**
+ * end_exclusive:
+ *
+ * Concludes an exclusive execution section started by start_exclusive.
+ */
+void end_exclusive(void);
+
 /**
  * qemu_init_vcpu:
  * @cpu: The vCPU to initialize.
@@ -867,7 +967,9 @@ AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx);
 
 void QEMU_NORETURN cpu_abort(CPUState *cpu, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
-void cpu_exec_exit(CPUState *cpu);
+void cpu_exec_initfn(CPUState *cpu);
+void cpu_exec_realizefn(CPUState *cpu, Error **errp);
+void cpu_exec_unrealizefn(CPUState *cpu);
 
 #ifdef CONFIG_SOFTMMU
 extern const struct VMStateDescription vmstate_cpu_common;
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 354f0decf1..5c10f7e25d 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -611,6 +611,37 @@
 #define KEY_KBDINPUTASSIST_ACCEPT		0x264
 #define KEY_KBDINPUTASSIST_CANCEL		0x265
 
+/* Diagonal movement keys */
+#define KEY_RIGHT_UP			0x266
+#define KEY_RIGHT_DOWN			0x267
+#define KEY_LEFT_UP			0x268
+#define KEY_LEFT_DOWN			0x269
+
+#define KEY_ROOT_MENU			0x26a /* Show Device's Root Menu */
+/* Show Top Menu of the Media (e.g. DVD) */
+#define KEY_MEDIA_TOP_MENU		0x26b
+#define KEY_NUMERIC_11			0x26c
+#define KEY_NUMERIC_12			0x26d
+/*
+ * Toggle Audio Description: refers to an audio service that helps blind and
+ * visually impaired consumers understand the action in a program. Note: in
+ * some countries this is referred to as "Video Description".
+ */
+#define KEY_AUDIO_DESC			0x26e
+#define KEY_3D_MODE			0x26f
+#define KEY_NEXT_FAVORITE		0x270
+#define KEY_STOP_RECORD			0x271
+#define KEY_PAUSE_RECORD		0x272
+#define KEY_VOD				0x273 /* Video on Demand */
+#define KEY_UNMUTE			0x274
+#define KEY_FASTREVERSE			0x275
+#define KEY_SLOWREVERSE			0x276
+/*
+ * Control a data application associated with the currently viewed channel,
+ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
+ */
+#define KEY_DATA			0x275
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
@@ -749,6 +780,7 @@
 #define SW_ROTATE_LOCK		0x0c  /* set = rotate locked/disabled */
 #define SW_LINEIN_INSERT	0x0d  /* set = inserted */
 #define SW_MUTE_DEVICE		0x0e  /* set = device disabled */
+#define SW_PEN_INSERTED		0x0f  /* set = pen inserted */
 #define SW_MAX_			0x0f
 #define SW_CNT			(SW_MAX_+1)
 
diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h
index a52b2025ba..7361a16b50 100644
--- a/include/standard-headers/linux/input.h
+++ b/include/standard-headers/linux/input.h
@@ -244,6 +244,7 @@ struct input_mask {
 #define BUS_ATARI		0x1B
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
+#define BUS_CEC			0x1E
 
 /*
  * MT_TOOL types
diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h
index b30d0cb0c1..b777069699 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -49,7 +49,7 @@
  * transport being used (eg. virtio_ring), the rest are per-device feature
  * bits. */
 #define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		33
+#define VIRTIO_TRANSPORT_F_END		34
 
 #ifndef VIRTIO_CONFIG_NO_LEGACY
 /* Do we get callbacks when the ring is completely used, even if we've
@@ -63,4 +63,12 @@
 /* v1.0 compliant. */
 #define VIRTIO_F_VERSION_1		32
 
+/*
+ * If clear - device has the IOMMU bypass quirk feature.
+ * If set - use platform tools to detect the IOMMU.
+ *
+ * Note the reverse polarity (compared to most other features),
+ * this is for compatibility with legacy systems.
+ */
+#define VIRTIO_F_IOMMU_PLATFORM		33
 #endif /* _LINUX_VIRTIO_CONFIG_H */
diff --git a/include/standard-headers/linux/virtio_crypto.h b/include/standard-headers/linux/virtio_crypto.h
new file mode 100644
index 0000000000..82275a84d8
--- /dev/null
+++ b/include/standard-headers/linux/virtio_crypto.h
@@ -0,0 +1,429 @@
+#ifndef _LINUX_VIRTIO_CRYPTO_H
+#define _LINUX_VIRTIO_CRYPTO_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+#include "standard-headers/linux/types.h"
+#include "standard-headers/linux/virtio_config.h"
+#include "standard-headers/linux/virtio_types.h"
+
+
+#define VIRTIO_CRYPTO_SERVICE_CIPHER 0
+#define VIRTIO_CRYPTO_SERVICE_HASH 1
+#define VIRTIO_CRYPTO_SERVICE_MAC  2
+#define VIRTIO_CRYPTO_SERVICE_AEAD 3
+
+#define VIRTIO_CRYPTO_OPCODE(service, op)   (((service) << 8) | (op))
+
+struct virtio_crypto_ctrl_header {
+#define VIRTIO_CRYPTO_CIPHER_CREATE_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x02)
+#define VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x03)
+#define VIRTIO_CRYPTO_HASH_CREATE_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x02)
+#define VIRTIO_CRYPTO_HASH_DESTROY_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x03)
+#define VIRTIO_CRYPTO_MAC_CREATE_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x02)
+#define VIRTIO_CRYPTO_MAC_DESTROY_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x03)
+#define VIRTIO_CRYPTO_AEAD_CREATE_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02)
+#define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \
+       VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03)
+    __virtio32 opcode;
+    __virtio32 algo;
+    __virtio32 flag;
+    /* data virtqueue id */
+    __virtio32 queue_id;
+};
+
+struct virtio_crypto_cipher_session_para {
+#define VIRTIO_CRYPTO_NO_CIPHER                 0
+#define VIRTIO_CRYPTO_CIPHER_ARC4               1
+#define VIRTIO_CRYPTO_CIPHER_AES_ECB            2
+#define VIRTIO_CRYPTO_CIPHER_AES_CBC            3
+#define VIRTIO_CRYPTO_CIPHER_AES_CTR            4
+#define VIRTIO_CRYPTO_CIPHER_DES_ECB            5
+#define VIRTIO_CRYPTO_CIPHER_DES_CBC            6
+#define VIRTIO_CRYPTO_CIPHER_3DES_ECB           7
+#define VIRTIO_CRYPTO_CIPHER_3DES_CBC           8
+#define VIRTIO_CRYPTO_CIPHER_3DES_CTR           9
+#define VIRTIO_CRYPTO_CIPHER_KASUMI_F8          10
+#define VIRTIO_CRYPTO_CIPHER_SNOW3G_UEA2        11
+#define VIRTIO_CRYPTO_CIPHER_AES_F8             12
+#define VIRTIO_CRYPTO_CIPHER_AES_XTS            13
+#define VIRTIO_CRYPTO_CIPHER_ZUC_EEA3           14
+    __virtio32 algo;
+    /* length of key */
+    __virtio32 keylen;
+
+#define VIRTIO_CRYPTO_OP_ENCRYPT  1
+#define VIRTIO_CRYPTO_OP_DECRYPT  2
+    /* encrypt or decrypt */
+    __virtio32 op;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_session_input {
+    /* Device-writable part */
+    __virtio64 session_id;
+    __virtio32 status;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_cipher_session_req {
+    struct virtio_crypto_cipher_session_para para;
+};
+
+struct virtio_crypto_hash_session_para {
+#define VIRTIO_CRYPTO_NO_HASH            0
+#define VIRTIO_CRYPTO_HASH_MD5           1
+#define VIRTIO_CRYPTO_HASH_SHA1          2
+#define VIRTIO_CRYPTO_HASH_SHA_224       3
+#define VIRTIO_CRYPTO_HASH_SHA_256       4
+#define VIRTIO_CRYPTO_HASH_SHA_384       5
+#define VIRTIO_CRYPTO_HASH_SHA_512       6
+#define VIRTIO_CRYPTO_HASH_SHA3_224      7
+#define VIRTIO_CRYPTO_HASH_SHA3_256      8
+#define VIRTIO_CRYPTO_HASH_SHA3_384      9
+#define VIRTIO_CRYPTO_HASH_SHA3_512      10
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE128      11
+#define VIRTIO_CRYPTO_HASH_SHA3_SHAKE256      12
+    __virtio32 algo;
+    /* hash result length */
+    __virtio32 hash_result_len;
+};
+
+struct virtio_crypto_hash_create_session_req {
+    struct virtio_crypto_hash_session_para para;
+};
+
+struct virtio_crypto_mac_session_para {
+#define VIRTIO_CRYPTO_NO_MAC                       0
+#define VIRTIO_CRYPTO_MAC_HMAC_MD5                 1
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA1                2
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_224             3
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_256             4
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_384             5
+#define VIRTIO_CRYPTO_MAC_HMAC_SHA_512             6
+#define VIRTIO_CRYPTO_MAC_CMAC_3DES                25
+#define VIRTIO_CRYPTO_MAC_CMAC_AES                 26
+#define VIRTIO_CRYPTO_MAC_KASUMI_F9                27
+#define VIRTIO_CRYPTO_MAC_SNOW3G_UIA2              28
+#define VIRTIO_CRYPTO_MAC_GMAC_AES                 41
+#define VIRTIO_CRYPTO_MAC_GMAC_TWOFISH             42
+#define VIRTIO_CRYPTO_MAC_CBCMAC_AES               49
+#define VIRTIO_CRYPTO_MAC_CBCMAC_KASUMI_F9         50
+#define VIRTIO_CRYPTO_MAC_XCBC_AES                 53
+    __virtio32 algo;
+    /* hash result length */
+    __virtio32 hash_result_len;
+    /* length of authenticated key */
+    __virtio32 auth_key_len;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_mac_create_session_req {
+    struct virtio_crypto_mac_session_para para;
+};
+
+struct virtio_crypto_aead_session_para {
+#define VIRTIO_CRYPTO_NO_AEAD     0
+#define VIRTIO_CRYPTO_AEAD_GCM    1
+#define VIRTIO_CRYPTO_AEAD_CCM    2
+#define VIRTIO_CRYPTO_AEAD_CHACHA20_POLY1305  3
+    __virtio32 algo;
+    /* length of key */
+    __virtio32 key_len;
+    /* digest result length */
+    __virtio32 digest_result_len;
+    /* length of the additional authenticated data (AAD) in bytes */
+    __virtio32 aad_len;
+    /* encrypt or decrypt, See above VIRTIO_CRYPTO_OP_* */
+    __virtio32 op;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_aead_create_session_req {
+    struct virtio_crypto_aead_session_para para;
+};
+
+struct virtio_crypto_alg_chain_session_para {
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER  1
+#define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH  2
+    __virtio32 alg_chain_order;
+/* Plain hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_PLAIN    1
+/* Authenticated hash (mac) */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_AUTH     2
+/* Nested hash */
+#define VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED   3
+    __virtio32 hash_mode;
+    struct virtio_crypto_cipher_session_para cipher_param;
+    union {
+        struct virtio_crypto_hash_session_para hash_param;
+        struct virtio_crypto_mac_session_para mac_param;
+    } u;
+    /* length of the additional authenticated data (AAD) in bytes */
+    __virtio32 aad_len;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_alg_chain_session_req {
+    struct virtio_crypto_alg_chain_session_para para;
+};
+
+struct virtio_crypto_sym_create_session_req {
+    union {
+        struct virtio_crypto_cipher_session_req cipher;
+        struct virtio_crypto_alg_chain_session_req chain;
+    } u;
+
+    /* Device-readable part */
+
+/* No operation */
+#define VIRTIO_CRYPTO_SYM_OP_NONE  0
+/* Cipher only operation on the data */
+#define VIRTIO_CRYPTO_SYM_OP_CIPHER  1
+/* Chain any cipher with any hash or mac operation. The order
+   depends on the value of alg_chain_order param */
+#define VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING  2
+    __virtio32 op_type;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_destroy_session_req {
+    /* Device-readable part */
+    __virtio64  session_id;
+};
+
+/* The request of the control viritqueue's packet */
+struct virtio_crypto_op_ctrl_req {
+    struct virtio_crypto_ctrl_header header;
+
+    union {
+        struct virtio_crypto_sym_create_session_req   sym_create_session;
+        struct virtio_crypto_hash_create_session_req  hash_create_session;
+        struct virtio_crypto_mac_create_session_req   mac_create_session;
+        struct virtio_crypto_aead_create_session_req  aead_create_session;
+        struct virtio_crypto_destroy_session_req      destroy_session;
+    } u;
+};
+
+struct virtio_crypto_op_header {
+#define VIRTIO_CRYPTO_CIPHER_ENCRYPT \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x00)
+#define VIRTIO_CRYPTO_CIPHER_DECRYPT \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_CIPHER, 0x01)
+#define VIRTIO_CRYPTO_HASH \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_HASH, 0x00)
+#define VIRTIO_CRYPTO_MAC \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_MAC, 0x00)
+#define VIRTIO_CRYPTO_AEAD_ENCRYPT \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00)
+#define VIRTIO_CRYPTO_AEAD_DECRYPT \
+    VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01)
+    __virtio32 opcode;
+    /* algo should be service-specific algorithms */
+    __virtio32 algo;
+    /* session_id should be service-specific algorithms */
+    __virtio64 session_id;
+    /* control flag to control the request */
+    __virtio32 flag;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_cipher_para {
+    /*
+     * Byte Length of valid IV/Counter
+     *
+     * - For block ciphers in CBC or F8 mode, or for Kasumi in F8 mode, or for
+     *   SNOW3G in UEA2 mode, this is the length of the IV (which
+     *   must be the same as the block length of the cipher).
+     * - For block ciphers in CTR mode, this is the length of the counter
+     *   (which must be the same as the block length of the cipher).
+     * - For AES-XTS, this is the 128bit tweak, i, from IEEE Std 1619-2007.
+     *
+     * The IV/Counter will be updated after every partial cryptographic
+     * operation.
+     */
+    __virtio32 iv_len;
+    /* length of source data */
+    __virtio32 src_data_len;
+    /* length of dst data */
+    __virtio32 dst_data_len;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_hash_para {
+    /* length of source data */
+    __virtio32 src_data_len;
+    /* hash result length */
+    __virtio32 hash_result_len;
+};
+
+struct virtio_crypto_mac_para {
+    struct virtio_crypto_hash_para hash;
+};
+
+struct virtio_crypto_aead_para {
+    /*
+     * Byte Length of valid IV data pointed to by the below iv_addr
+     * parameter.
+     *
+     * - For GCM mode, this is either 12 (for 96-bit IVs) or 16, in which
+     *   case iv_addr points to J0.
+     * - For CCM mode, this is the length of the nonce, which can be in the
+     *   range 7 to 13 inclusive.
+     */
+    __virtio32 iv_len;
+    /* length of additional auth data */
+    __virtio32 aad_len;
+    /* length of source data */
+    __virtio32 src_data_len;
+    /* length of dst data */
+    __virtio32 dst_data_len;
+};
+
+struct virtio_crypto_cipher_data_req {
+    /* Device-readable part */
+    struct virtio_crypto_cipher_para para;
+};
+
+struct virtio_crypto_hash_data_req {
+    /* Device-readable part */
+    struct virtio_crypto_hash_para para;
+};
+
+struct virtio_crypto_mac_data_req {
+    /* Device-readable part */
+    struct virtio_crypto_mac_para para;
+};
+
+struct virtio_crypto_alg_chain_data_para {
+    __virtio32 iv_len;
+    /* Length of source data */
+    __virtio32 src_data_len;
+    /* Length of destination data */
+    __virtio32 dst_data_len;
+    /* Starting point for cipher processing in source data */
+    __virtio32 cipher_start_src_offset;
+    /* Length of the source data that the cipher will be computed on */
+    __virtio32 len_to_cipher;
+    /* Starting point for hash processing in source data */
+    __virtio32 hash_start_src_offset;
+    /* Length of the source data that the hash will be computed on */
+    __virtio32 len_to_hash;
+    /* Length of the additional auth data */
+    __virtio32 aad_len;
+    /* Length of the hash result */
+    __virtio32 hash_result_len;
+    __virtio32 reserved;
+};
+
+struct virtio_crypto_alg_chain_data_req {
+    /* Device-readable part */
+    struct virtio_crypto_alg_chain_data_para para;
+};
+
+struct virtio_crypto_sym_data_req {
+    union {
+        struct virtio_crypto_cipher_data_req cipher;
+        struct virtio_crypto_alg_chain_data_req chain;
+    } u;
+
+    /* See above VIRTIO_CRYPTO_SYM_OP_* */
+    __virtio32 op_type;
+    __virtio32 padding;
+};
+
+struct virtio_crypto_aead_data_req {
+    /* Device-readable part */
+    struct virtio_crypto_aead_para para;
+};
+
+/* The request of the data viritqueue's packet */
+struct virtio_crypto_op_data_req {
+    struct virtio_crypto_op_header header;
+
+    union {
+        struct virtio_crypto_sym_data_req  sym_req;
+        struct virtio_crypto_hash_data_req hash_req;
+        struct virtio_crypto_mac_data_req mac_req;
+        struct virtio_crypto_aead_data_req aead_req;
+    } u;
+};
+
+#define VIRTIO_CRYPTO_OK        0
+#define VIRTIO_CRYPTO_ERR       1
+#define VIRTIO_CRYPTO_BADMSG    2
+#define VIRTIO_CRYPTO_NOTSUPP   3
+#define VIRTIO_CRYPTO_INVSESS   4 /* Invaild session id */
+
+/* The accelerator hardware is ready */
+#define VIRTIO_CRYPTO_S_HW_READY  (1 << 0)
+#define VIRTIO_CRYPTO_S_STARTED  (1 << 1)
+
+struct virtio_crypto_config {
+    /* See VIRTIO_CRYPTO_* above */
+    __virtio32  status;
+
+    /*
+     * Maximum number of data queue legal values are between 1 and 0x8000
+     */
+    __virtio32  max_dataqueues;
+
+    /* Specifies the services mask which the devcie support,
+       see VIRTIO_CRYPTO_SERVICE_* above */
+    __virtio32 crypto_services;
+
+    /* Detailed algorithms mask */
+    __virtio32 cipher_algo_l;
+    __virtio32 cipher_algo_h;
+    __virtio32 hash_algo;
+    __virtio32 mac_algo_l;
+    __virtio32 mac_algo_h;
+    __virtio32 aead_algo;
+
+    /* Maximum length of cipher key */
+    uint32_t max_cipher_key_len;
+    /* Maximum length of authenticated key */
+    uint32_t max_auth_key_len;
+
+    __virtio32 reserve;
+
+    /* The maximum size of per request's content */
+    __virtio64 max_size;
+};
+
+struct virtio_crypto_inhdr {
+    /* See VIRTIO_CRYPTO_* above */
+    uint8_t status;
+};
+
+#endif /* _LINUX_VIRTIO_CRYPTO_H */
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index 77925f587b..fe74e422d4 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -41,5 +41,6 @@
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
 #define VIRTIO_ID_GPU          16 /* virtio GPU */
 #define VIRTIO_ID_INPUT        18 /* virtio input */
-
+#define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
+#define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index a78f33e775..30ff24940d 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -35,6 +35,7 @@
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
 #define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */
+#define VIRTIO_NET_F_MTU	3	/* Initial MTU advice */
 #define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
 #define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
 #define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
@@ -73,6 +74,8 @@ struct virtio_net_config {
 	 * Legal values are between 1 and 0x8000
 	 */
 	uint16_t max_virtqueue_pairs;
+	/* Default maximum transmit unit advice */
+	uint16_t mtu;
 } QEMU_PACKED;
 
 /*
diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h
new file mode 100644
index 0000000000..be443211ce
--- /dev/null
+++ b/include/standard-headers/linux/virtio_vsock.h
@@ -0,0 +1,94 @@
+/*
+ * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
+ * anyone can use the definitions to implement compatible drivers/servers:
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (C) Red Hat, Inc., 2013-2015
+ * Copyright (C) Asias He <asias@redhat.com>, 2013
+ * Copyright (C) Stefan Hajnoczi <stefanha@redhat.com>, 2015
+ */
+
+#ifndef _LINUX_VIRTIO_VSOCK_H
+#define _LINUX_VIRTIO_VSOCK_H
+
+#include "standard-headers/linux/types.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_config.h"
+
+struct virtio_vsock_config {
+	uint64_t guest_cid;
+} QEMU_PACKED;
+
+enum virtio_vsock_event_id {
+	VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0,
+};
+
+struct virtio_vsock_event {
+	uint32_t id;
+} QEMU_PACKED;
+
+struct virtio_vsock_hdr {
+	uint64_t	src_cid;
+	uint64_t	dst_cid;
+	uint32_t	src_port;
+	uint32_t	dst_port;
+	uint32_t	len;
+	uint16_t	type;		/* enum virtio_vsock_type */
+	uint16_t	op;		/* enum virtio_vsock_op */
+	uint32_t	flags;
+	uint32_t	buf_alloc;
+	uint32_t	fwd_cnt;
+} QEMU_PACKED;
+
+enum virtio_vsock_type {
+	VIRTIO_VSOCK_TYPE_STREAM = 1,
+};
+
+enum virtio_vsock_op {
+	VIRTIO_VSOCK_OP_INVALID = 0,
+
+	/* Connect operations */
+	VIRTIO_VSOCK_OP_REQUEST = 1,
+	VIRTIO_VSOCK_OP_RESPONSE = 2,
+	VIRTIO_VSOCK_OP_RST = 3,
+	VIRTIO_VSOCK_OP_SHUTDOWN = 4,
+
+	/* To send payload */
+	VIRTIO_VSOCK_OP_RW = 5,
+
+	/* Tell the peer our credit info */
+	VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
+	/* Request the peer to send the credit info to us */
+	VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
+};
+
+/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */
+enum virtio_vsock_shutdown {
+	VIRTIO_VSOCK_SHUTDOWN_RCV = 1,
+	VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
+};
+
+#endif /* _LINUX_VIRTIO_VSOCK_H */
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index d690dfabdf..1c9dad1b72 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -35,5 +35,14 @@ int kvm_available(void);
 int xen_available(void);
 
 CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp);
+CpuModelExpansionInfo *arch_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *mode,
+                                                      Error **errp);
+CpuModelCompareInfo *arch_query_cpu_model_comparison(CpuModelInfo *modela,
+                                                     CpuModelInfo *modelb,
+                                                     Error **errp);
+CpuModelBaselineInfo *arch_query_cpu_model_baseline(CpuModelInfo *modela,
+                                                    CpuModelInfo *modelb,
+                                                    Error **errp);
 
 #endif
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 2da4905d18..6444e41d39 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -98,6 +98,7 @@ BlockDriverState *blk_bs(BlockBackend *blk);
 void blk_remove_bs(BlockBackend *blk);
 void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 bool bdrv_has_blk(BlockDriverState *bs);
+bool bdrv_is_root_node(BlockDriverState *bs);
 
 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
 void blk_iostatus_enable(BlockBackend *blk);
@@ -106,10 +107,12 @@ BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
 void blk_iostatus_disable(BlockBackend *blk);
 void blk_iostatus_reset(BlockBackend *blk);
 void blk_iostatus_set_err(BlockBackend *blk, int error);
-int blk_attach_dev(BlockBackend *blk, void *dev);
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev);
+int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
+void blk_attach_dev_legacy(BlockBackend *blk, void *dev);
 void blk_detach_dev(BlockBackend *blk, void *dev);
 void *blk_get_attached_dev(BlockBackend *blk);
+BlockBackend *blk_by_dev(void *dev);
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
                           int count);
@@ -143,13 +146,13 @@ BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int count,
                              BlockCompletionFunc *cb, void *opaque);
 void blk_aio_cancel(BlockAIOCB *acb);
 void blk_aio_cancel_async(BlockAIOCB *acb);
+int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
                           BlockCompletionFunc *cb, void *opaque);
 int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count);
 int blk_co_flush(BlockBackend *blk);
 int blk_flush(BlockBackend *blk);
-int blk_flush_all(void);
 int blk_commit_all(void);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
@@ -196,15 +199,15 @@ void blk_io_unplug(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
 BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
 void blk_update_root_state(BlockBackend *blk);
-void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs);
+bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
 int blk_get_open_flags_from_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
                   BlockCompletionFunc *cb, void *opaque);
 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
                                       int count, BdrvRequestFlags flags);
-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
-                         const uint8_t *buf, int nb_sectors);
+int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
+                          int count);
 int blk_truncate(BlockBackend *blk, int64_t offset);
 int blk_pdiscard(BlockBackend *blk, int64_t offset, int count);
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index ee7e55468f..0a149428cf 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -9,15 +9,17 @@
 #include "qapi/qmp/qobject.h"
 #include "qapi/qmp/qstring.h"
 #include "qemu/main-loop.h"
+#include "qemu/bitmap.h"
 
 /* character device */
 
-#define CHR_EVENT_BREAK   0 /* serial break char */
-#define CHR_EVENT_FOCUS   1 /* focus to this terminal (modal input needed) */
-#define CHR_EVENT_OPENED  2 /* new connection established */
-#define CHR_EVENT_MUX_IN  3 /* mux-focus was set to this terminal */
-#define CHR_EVENT_MUX_OUT 4 /* mux-focus will move on */
-#define CHR_EVENT_CLOSED  5 /* connection closed */
+typedef enum {
+    CHR_EVENT_BREAK, /* serial break char */
+    CHR_EVENT_OPENED, /* new connection established */
+    CHR_EVENT_MUX_IN, /* mux-focus was set to this terminal */
+    CHR_EVENT_MUX_OUT, /* mux-focus will move on */
+    CHR_EVENT_CLOSED /* connection closed */
+} QEMUChrEvent;
 
 
 #define CHR_IOCTL_SERIAL_SET_PARAMS   1
@@ -58,42 +60,59 @@ struct ParallelIOArg {
 
 typedef void IOEventHandler(void *opaque, int event);
 
+typedef enum {
+    /* Whether the chardev peer is able to close and
+     * reopen the data channel, thus requiring support
+     * for qemu_chr_wait_connected() to wait for a
+     * valid connection */
+    QEMU_CHAR_FEATURE_RECONNECTABLE,
+    /* Whether it is possible to send/recv file descriptors
+     * over the data channel */
+    QEMU_CHAR_FEATURE_FD_PASS,
+
+    QEMU_CHAR_FEATURE_LAST,
+} CharDriverFeature;
+
+/* This is the backend as seen by frontend, the actual backend is
+ * CharDriverState */
+typedef struct CharBackend {
+    CharDriverState *chr;
+    IOEventHandler *chr_event;
+    IOCanReadHandler *chr_can_read;
+    IOReadHandler *chr_read;
+    void *opaque;
+    int tag;
+    int fe_open;
+} CharBackend;
+
 struct CharDriverState {
     QemuMutex chr_write_lock;
-    void (*init)(struct CharDriverState *s);
     int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len);
     int (*chr_sync_read)(struct CharDriverState *s,
                          const uint8_t *buf, int len);
     GSource *(*chr_add_watch)(struct CharDriverState *s, GIOCondition cond);
-    void (*chr_update_read_handler)(struct CharDriverState *s);
+    void (*chr_update_read_handler)(struct CharDriverState *s,
+                                    GMainContext *context);
     int (*chr_ioctl)(struct CharDriverState *s, int cmd, void *arg);
     int (*get_msgfds)(struct CharDriverState *s, int* fds, int num);
     int (*set_msgfds)(struct CharDriverState *s, int *fds, int num);
     int (*chr_add_client)(struct CharDriverState *chr, int fd);
     int (*chr_wait_connected)(struct CharDriverState *chr, Error **errp);
-    IOEventHandler *chr_event;
-    IOCanReadHandler *chr_can_read;
-    IOReadHandler *chr_read;
-    void *handler_opaque;
-    void (*chr_close)(struct CharDriverState *chr);
+    void (*chr_free)(struct CharDriverState *chr);
     void (*chr_disconnect)(struct CharDriverState *chr);
     void (*chr_accept_input)(struct CharDriverState *chr);
     void (*chr_set_echo)(struct CharDriverState *chr, bool echo);
     void (*chr_set_fe_open)(struct CharDriverState *chr, int fe_open);
-    void (*chr_fe_event)(struct CharDriverState *chr, int event);
+    CharBackend *be;
     void *opaque;
     char *label;
     char *filename;
     int logfd;
     int be_open;
-    int fe_open;
-    int explicit_fe_open;
-    int explicit_be_open;
-    int avail_connections;
     int is_mux;
     guint fd_in_tag;
-    QemuOpts *opts;
     bool replay;
+    DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
     QTAILQ_ENTRY(CharDriverState) next;
 };
 
@@ -114,13 +133,11 @@ CharDriverState *qemu_chr_alloc(ChardevCommon *backend, Error **errp);
  * Create a new character backend from a QemuOpts list.
  *
  * @opts see qemu-config.c for a list of valid options
- * @init not sure..
  *
  * Returns: a new character backend
  */
 CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
-                                    void (*init)(struct CharDriverState *s),
-                                    Error **errp);
+                                        Error **errp);
 
 /**
  * @qemu_chr_parse_common:
@@ -139,18 +156,19 @@ void qemu_chr_parse_common(QemuOpts *opts, ChardevCommon *backend);
  *
  * @label the name of the backend
  * @filename the URI
- * @init not sure..
  *
  * Returns: a new character backend
  */
-CharDriverState *qemu_chr_new(const char *label, const char *filename,
-                              void (*init)(struct CharDriverState *s));
+CharDriverState *qemu_chr_new(const char *label, const char *filename);
+
+
 /**
- * @qemu_chr_disconnect:
+ * @qemu_chr_fe_disconnect:
  *
  * Close a fd accpeted by character backend.
+ * Without associated CharDriver, do nothing.
  */
-void qemu_chr_disconnect(CharDriverState *chr);
+void qemu_chr_fe_disconnect(CharBackend *be);
 
 /**
  * @qemu_chr_cleanup:
@@ -160,11 +178,12 @@ void qemu_chr_disconnect(CharDriverState *chr);
 void qemu_chr_cleanup(void);
 
 /**
- * @qemu_chr_wait_connected:
+ * @qemu_chr_fe_wait_connected:
  *
- * Wait for characted backend to be connected.
+ * Wait for characted backend to be connected, return < 0 on error or
+ * if no assicated CharDriver.
  */
-int qemu_chr_wait_connected(CharDriverState *chr, Error **errp);
+int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp);
 
 /**
  * @qemu_chr_new_noreplay:
@@ -175,12 +194,10 @@ int qemu_chr_wait_connected(CharDriverState *chr, Error **errp);
  *
  * @label the name of the backend
  * @filename the URI
- * @init not sure..
  *
  * Returns: a new character backend
  */
-CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename,
-                                       void (*init)(struct CharDriverState *s));
+CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename);
 
 /**
  * @qemu_chr_delete:
@@ -203,37 +220,31 @@ void qemu_chr_free(CharDriverState *chr);
  * Ask the backend to override its normal echo setting.  This only really
  * applies to the stdio backend and is used by the QMP server such that you
  * can see what you type if you try to type QMP commands.
+ * Without associated CharDriver, do nothing.
  *
  * @echo true to enable echo, false to disable echo
  */
-void qemu_chr_fe_set_echo(struct CharDriverState *chr, bool echo);
+void qemu_chr_fe_set_echo(CharBackend *be, bool echo);
 
 /**
  * @qemu_chr_fe_set_open:
  *
  * Set character frontend open status.  This is an indication that the
  * front end is ready (or not) to begin doing I/O.
+ * Without associated CharDriver, do nothing.
  */
-void qemu_chr_fe_set_open(struct CharDriverState *chr, int fe_open);
-
-/**
- * @qemu_chr_fe_event:
- *
- * Send an event from the front end to the back end.
- *
- * @event the event to send
- */
-void qemu_chr_fe_event(CharDriverState *s, int event);
+void qemu_chr_fe_set_open(CharBackend *be, int fe_open);
 
 /**
  * @qemu_chr_fe_printf:
  *
- * Write to a character backend using a printf style interface.
- * This function is thread-safe.
+ * Write to a character backend using a printf style interface.  This
+ * function is thread-safe. It does nothing without associated
+ * CharDriver.
  *
  * @fmt see #printf
  */
-void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...)
+void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
     GCC_FMT_ATTR(2, 3);
 
 /**
@@ -242,13 +253,13 @@ void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...)
  * If the backend is connected, create and add a #GSource that fires
  * when the given condition (typically G_IO_OUT|G_IO_HUP or G_IO_HUP)
  * is active; return the #GSource's tag.  If it is disconnected,
- * return 0.
+ * or without associated CharDriver, return 0.
  *
  * @cond the condition to poll for
  * @func the function to call when the condition happens
  * @user_data the opaque pointer to pass to @func
  */
-guint qemu_chr_fe_add_watch(CharDriverState *s, GIOCondition cond,
+guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
                             GIOFunc func, void *user_data);
 
 /**
@@ -261,9 +272,9 @@ guint qemu_chr_fe_add_watch(CharDriverState *s, GIOCondition cond,
  * @buf the data
  * @len the number of bytes to send
  *
- * Returns: the number of bytes consumed
+ * Returns: the number of bytes consumed (0 if no assicated CharDriver)
  */
-int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len);
+int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
 
 /**
  * @qemu_chr_fe_write_all:
@@ -276,9 +287,9 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len);
  * @buf the data
  * @len the number of bytes to send
  *
- * Returns: the number of bytes consumed
+ * Returns: the number of bytes consumed (0 if no assicated CharDriver)
  */
-int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len);
+int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
 
 /**
  * @qemu_chr_fe_read_all:
@@ -288,9 +299,9 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len);
  * @buf the data buffer
  * @len the number of bytes to read
  *
- * Returns: the number of bytes read
+ * Returns: the number of bytes read (0 if no assicated CharDriver)
  */
-int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len);
+int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len);
 
 /**
  * @qemu_chr_fe_ioctl:
@@ -300,10 +311,11 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len);
  * @cmd see CHR_IOCTL_*
  * @arg the data associated with @cmd
  *
- * Returns: if @cmd is not supported by the backend, -ENOTSUP, otherwise the
- *          return value depends on the semantics of @cmd
+ * Returns: if @cmd is not supported by the backend or there is no
+ *          associated CharDriver, -ENOTSUP, otherwise the return
+ *          value depends on the semantics of @cmd
  */
-int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg);
+int qemu_chr_fe_ioctl(CharBackend *be, int cmd, void *arg);
 
 /**
  * @qemu_chr_fe_get_msgfd:
@@ -316,7 +328,7 @@ int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg);
  *          this function will return -1 until a client sends a new file
  *          descriptor.
  */
-int qemu_chr_fe_get_msgfd(CharDriverState *s);
+int qemu_chr_fe_get_msgfd(CharBackend *be);
 
 /**
  * @qemu_chr_fe_get_msgfds:
@@ -329,7 +341,7 @@ int qemu_chr_fe_get_msgfd(CharDriverState *s);
  *          this function will return -1 until a client sends a new set of file
  *          descriptors.
  */
-int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int num);
+int qemu_chr_fe_get_msgfds(CharBackend *be, int *fds, int num);
 
 /**
  * @qemu_chr_fe_set_msgfds:
@@ -340,38 +352,9 @@ int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int num);
  * result in overwriting the fd array with the new value without being send.
  * Upon writing the message the fd array is freed.
  *
- * Returns: -1 if fd passing isn't supported.
- */
-int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num);
-
-/**
- * @qemu_chr_fe_claim:
- *
- * Claim a backend before using it, should be called before calling
- * qemu_chr_add_handlers(). 
- *
- * Returns: -1 if the backend is already in use by another frontend, 0 on
- *          success.
- */
-int qemu_chr_fe_claim(CharDriverState *s);
-
-/**
- * @qemu_chr_fe_claim_no_fail:
- *
- * Like qemu_chr_fe_claim, but will exit qemu with an error when the
- * backend is already in use.
+ * Returns: -1 if fd passing isn't supported or no associated CharDriver.
  */
-void qemu_chr_fe_claim_no_fail(CharDriverState *s);
-
-/**
- * @qemu_chr_fe_claim:
- *
- * Release a backend for use by another frontend.
- *
- * Returns: -1 if the backend is already in use by another frontend, 0 on
- *          success.
- */
-void qemu_chr_fe_release(CharDriverState *s);
+int qemu_chr_fe_set_msgfds(CharBackend *be, int *fds, int num);
 
 /**
  * @qemu_chr_be_can_write:
@@ -416,24 +399,89 @@ void qemu_chr_be_write_impl(CharDriverState *s, uint8_t *buf, int len);
  */
 void qemu_chr_be_event(CharDriverState *s, int event);
 
-void qemu_chr_add_handlers(CharDriverState *s,
-                           IOCanReadHandler *fd_can_read,
-                           IOReadHandler *fd_read,
-                           IOEventHandler *fd_event,
-                           void *opaque);
+/**
+ * @qemu_chr_fe_init:
+ *
+ * Initializes a front end for the given CharBackend and
+ * CharDriver. Call qemu_chr_fe_deinit() to remove the association and
+ * release the driver.
+ *
+ * Returns: false on error.
+ */
+bool qemu_chr_fe_init(CharBackend *b, CharDriverState *s, Error **errp);
+
+/**
+ * @qemu_chr_fe_get_driver:
+ *
+ * Returns the driver associated with a CharBackend or NULL if no
+ * associated CharDriver.
+ */
+CharDriverState *qemu_chr_fe_get_driver(CharBackend *be);
+
+/**
+ * @qemu_chr_fe_deinit:
+ *
+ * Dissociate the CharBackend from the CharDriver.
+ *
+ * Safe to call without associated CharDriver.
+ */
+void qemu_chr_fe_deinit(CharBackend *b);
+
+/**
+ * @qemu_chr_fe_set_handlers:
+ * @b: a CharBackend
+ * @fd_can_read: callback to get the amount of data the frontend may
+ *               receive
+ * @fd_read: callback to receive data from char
+ * @fd_event: event callback
+ * @opaque: an opaque pointer for the callbacks
+ * @context: a main loop context or NULL for the default
+ * @set_open: whether to call qemu_chr_fe_set_open() implicitely when
+ * any of the handler is non-NULL
+ *
+ * Set the front end char handlers. The front end takes the focus if
+ * any of the handler is non-NULL.
+ *
+ * Without associated CharDriver, nothing is changed.
+ */
+void qemu_chr_fe_set_handlers(CharBackend *b,
+                              IOCanReadHandler *fd_can_read,
+                              IOReadHandler *fd_read,
+                              IOEventHandler *fd_event,
+                              void *opaque,
+                              GMainContext *context,
+                              bool set_open);
+
+/**
+ * @qemu_chr_fe_take_focus:
+ *
+ * Take the focus (if the front end is muxed).
+ *
+ * Without associated CharDriver, nothing is changed.
+ */
+void qemu_chr_fe_take_focus(CharBackend *b);
 
 void qemu_chr_be_generic_open(CharDriverState *s);
-void qemu_chr_accept_input(CharDriverState *s);
+void qemu_chr_fe_accept_input(CharBackend *be);
 int qemu_chr_add_client(CharDriverState *s, int fd);
 CharDriverState *qemu_chr_find(const char *name);
 bool chr_is_ringbuf(const CharDriverState *chr);
 
+bool qemu_chr_has_feature(CharDriverState *chr,
+                          CharDriverFeature feature);
+void qemu_chr_set_feature(CharDriverState *chr,
+                          CharDriverFeature feature);
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename);
 
+typedef void CharDriverParse(QemuOpts *opts, ChardevBackend *backend,
+                             Error **errp);
+typedef CharDriverState *CharDriverCreate(const char *id,
+                                          ChardevBackend *backend,
+                                          ChardevReturn *ret, bool *be_opened,
+                                          Error **errp);
+
 void register_char_driver(const char *name, ChardevBackendKind kind,
-        void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp),
-        CharDriverState *(*create)(const char *id, ChardevBackend *backend,
-                                   ChardevReturn *ret, Error **errp));
+                          CharDriverParse *parse, CharDriverCreate *create);
 
 extern int term_escape_char;
 
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index fe992a8946..3728a1ea7e 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -29,12 +29,9 @@ void qtest_clock_warp(int64_t dest);
 
 #ifndef CONFIG_USER_ONLY
 /* vl.c */
+/* *-user doesn't have configurable SMP topology */
 extern int smp_cores;
 extern int smp_threads;
-#else
-/* *-user doesn't have configurable SMP topology */
-#define smp_cores   1
-#define smp_threads 1
 #endif
 
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg);
diff --git a/include/sysemu/cryptodev.h b/include/sysemu/cryptodev.h
new file mode 100644
index 0000000000..84526c0d35
--- /dev/null
+++ b/include/sysemu/cryptodev.h
@@ -0,0 +1,298 @@
+/*
+ * QEMU Crypto Device Implementation
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ *
+ * Authors:
+ *    Gonglei <arei.gonglei@huawei.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef CRYPTODEV_H
+#define CRYPTODEV_H
+
+#include "qom/object.h"
+#include "qemu-common.h"
+
+/**
+ * CryptoDevBackend:
+ *
+ * The CryptoDevBackend object is an interface
+ * for different cryptodev backends, which provides crypto
+ * operation wrapper.
+ *
+ */
+
+#define TYPE_CRYPTODEV_BACKEND "cryptodev-backend"
+
+#define CRYPTODEV_BACKEND(obj) \
+    OBJECT_CHECK(CryptoDevBackend, \
+                 (obj), TYPE_CRYPTODEV_BACKEND)
+#define CRYPTODEV_BACKEND_GET_CLASS(obj) \
+    OBJECT_GET_CLASS(CryptoDevBackendClass, \
+                 (obj), TYPE_CRYPTODEV_BACKEND)
+#define CRYPTODEV_BACKEND_CLASS(klass) \
+    OBJECT_CLASS_CHECK(CryptoDevBackendClass, \
+                (klass), TYPE_CRYPTODEV_BACKEND)
+
+
+#define MAX_CRYPTO_QUEUE_NUM  64
+
+typedef struct CryptoDevBackendConf CryptoDevBackendConf;
+typedef struct CryptoDevBackendPeers CryptoDevBackendPeers;
+typedef struct CryptoDevBackendClient
+                     CryptoDevBackendClient;
+typedef struct CryptoDevBackend CryptoDevBackend;
+
+enum CryptoDevBackendAlgType {
+    CRYPTODEV_BACKEND_ALG_SYM,
+    CRYPTODEV_BACKEND_ALG__MAX,
+};
+
+/**
+ * CryptoDevBackendSymSessionInfo:
+ *
+ * @op_code: operation code (refer to virtio_crypto.h)
+ * @cipher_alg: algorithm type of CIPHER
+ * @key_len: byte length of cipher key
+ * @hash_alg: algorithm type of HASH/MAC
+ * @hash_result_len: byte length of HASH operation result
+ * @auth_key_len: byte length of authenticated key
+ * @add_len: byte length of additional authenticated data
+ * @op_type: operation type (refer to virtio_crypto.h)
+ * @direction: encryption or direction for CIPHER
+ * @hash_mode: HASH mode for HASH operation (refer to virtio_crypto.h)
+ * @alg_chain_order: order of algorithm chaining (CIPHER then HASH,
+ *                   or HASH then CIPHER)
+ * @cipher_key: point to a key of CIPHER
+ * @auth_key: point to an authenticated key of MAC
+ *
+ */
+typedef struct CryptoDevBackendSymSessionInfo {
+    /* corresponding with virtio crypto spec */
+    uint32_t op_code;
+    uint32_t cipher_alg;
+    uint32_t key_len;
+    uint32_t hash_alg;
+    uint32_t hash_result_len;
+    uint32_t auth_key_len;
+    uint32_t add_len;
+    uint8_t op_type;
+    uint8_t direction;
+    uint8_t hash_mode;
+    uint8_t alg_chain_order;
+    uint8_t *cipher_key;
+    uint8_t *auth_key;
+} CryptoDevBackendSymSessionInfo;
+
+/**
+ * CryptoDevBackendSymOpInfo:
+ *
+ * @session_id: session index which was previously
+ *              created by cryptodev_backend_sym_create_session()
+ * @aad_len: byte length of additional authenticated data
+ * @iv_len: byte length of initialization vector or counter
+ * @src_len: byte length of source data
+ * @dst_len: byte length of destination data
+ * @digest_result_len: byte length of hash digest result
+ * @hash_start_src_offset: Starting point for hash processing, specified
+ *  as number of bytes from start of packet in source data, only used for
+ *  algorithm chain
+ * @cipher_start_src_offset: Starting point for cipher processing, specified
+ *  as number of bytes from start of packet in source data, only used for
+ *  algorithm chain
+ * @len_to_hash: byte length of source data on which the hash
+ *  operation will be computed, only used for algorithm chain
+ * @len_to_cipher: byte length of source data on which the cipher
+ *  operation will be computed, only used for algorithm chain
+ * @op_type: operation type (refer to virtio_crypto.h)
+ * @iv: point to the initialization vector or counter
+ * @src: point to the source data
+ * @dst: point to the destination data
+ * @aad_data: point to the additional authenticated data
+ * @digest_result: point to the digest result data
+ * @data[0]: point to the extensional memory by one memory allocation
+ *
+ */
+typedef struct CryptoDevBackendSymOpInfo {
+    uint64_t session_id;
+    uint32_t aad_len;
+    uint32_t iv_len;
+    uint32_t src_len;
+    uint32_t dst_len;
+    uint32_t digest_result_len;
+    uint32_t hash_start_src_offset;
+    uint32_t cipher_start_src_offset;
+    uint32_t len_to_hash;
+    uint32_t len_to_cipher;
+    uint8_t op_type;
+    uint8_t *iv;
+    uint8_t *src;
+    uint8_t *dst;
+    uint8_t *aad_data;
+    uint8_t *digest_result;
+    uint8_t data[0];
+} CryptoDevBackendSymOpInfo;
+
+typedef struct CryptoDevBackendClass {
+    ObjectClass parent_class;
+
+    void (*init)(CryptoDevBackend *backend, Error **errp);
+    void (*cleanup)(CryptoDevBackend *backend, Error **errp);
+
+    int64_t (*create_session)(CryptoDevBackend *backend,
+                       CryptoDevBackendSymSessionInfo *sess_info,
+                       uint32_t queue_index, Error **errp);
+    int (*close_session)(CryptoDevBackend *backend,
+                           uint64_t session_id,
+                           uint32_t queue_index, Error **errp);
+    int (*do_sym_op)(CryptoDevBackend *backend,
+                     CryptoDevBackendSymOpInfo *op_info,
+                     uint32_t queue_index, Error **errp);
+} CryptoDevBackendClass;
+
+
+struct CryptoDevBackendClient {
+    char *model;
+    char *name;
+    char *info_str;
+    unsigned int queue_index;
+    QTAILQ_ENTRY(CryptoDevBackendClient) next;
+};
+
+struct CryptoDevBackendPeers {
+    CryptoDevBackendClient *ccs[MAX_CRYPTO_QUEUE_NUM];
+    uint32_t queues;
+};
+
+struct CryptoDevBackendConf {
+    CryptoDevBackendPeers peers;
+
+    /* Supported service mask */
+    uint32_t crypto_services;
+
+    /* Detailed algorithms mask */
+    uint32_t cipher_algo_l;
+    uint32_t cipher_algo_h;
+    uint32_t hash_algo;
+    uint32_t mac_algo_l;
+    uint32_t mac_algo_h;
+    uint32_t aead_algo;
+    /* Maximum length of cipher key */
+    uint32_t max_cipher_key_len;
+    /* Maximum length of authenticated key */
+    uint32_t max_auth_key_len;
+    /* Maximum size of each crypto request's content */
+    uint64_t max_size;
+};
+
+struct CryptoDevBackend {
+    Object parent_obj;
+
+    bool ready;
+    CryptoDevBackendConf conf;
+};
+
+/**
+ * cryptodev_backend_new_client:
+ * @model: the cryptodev backend model
+ * @name: the cryptodev backend name, can be NULL
+ *
+ * Creates a new cryptodev backend client object
+ * with the @name in the model @model.
+ *
+ * The returned object must be released with
+ * cryptodev_backend_free_client() when no
+ * longer required
+ *
+ * Returns: a new cryptodev backend client object
+ */
+CryptoDevBackendClient *
+cryptodev_backend_new_client(const char *model,
+                                    const char *name);
+/**
+ * cryptodev_backend_free_client:
+ * @cc: the cryptodev backend client object
+ *
+ * Release the memory associated with @cc that
+ * was previously allocated by cryptodev_backend_new_client()
+ */
+void cryptodev_backend_free_client(
+                  CryptoDevBackendClient *cc);
+
+/**
+ * cryptodev_backend_cleanup:
+ * @backend: the cryptodev backend object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Clean the resouce associated with @backend that realizaed
+ * by the specific backend's init() callback
+ */
+void cryptodev_backend_cleanup(
+           CryptoDevBackend *backend,
+           Error **errp);
+
+/**
+ * cryptodev_backend_sym_create_session:
+ * @backend: the cryptodev backend object
+ * @sess_info: parameters needed by session creating
+ * @queue_index: queue index of cryptodev backend client
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Create a session for symmetric algorithms
+ *
+ * Returns: session id on success, or -1 on error
+ */
+int64_t cryptodev_backend_sym_create_session(
+           CryptoDevBackend *backend,
+           CryptoDevBackendSymSessionInfo *sess_info,
+           uint32_t queue_index, Error **errp);
+
+/**
+ * cryptodev_backend_sym_close_session:
+ * @backend: the cryptodev backend object
+ * @session_id: the session id
+ * @queue_index: queue index of cryptodev backend client
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Close a session for symmetric algorithms which was previously
+ * created by cryptodev_backend_sym_create_session()
+ *
+ * Returns: 0 on success, or Negative on error
+ */
+int cryptodev_backend_sym_close_session(
+           CryptoDevBackend *backend,
+           uint64_t session_id,
+           uint32_t queue_index, Error **errp);
+
+/**
+ * cryptodev_backend_crypto_operation:
+ * @backend: the cryptodev backend object
+ * @opaque: pointer to a VirtIOCryptoReq object
+ * @queue_index: queue index of cryptodev backend client
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Do crypto operation, such as encryption and
+ * decryption
+ *
+ * Returns: VIRTIO_CRYPTO_OK on success,
+ *         or -VIRTIO_CRYPTO_* on error
+ */
+int cryptodev_backend_crypto_operation(
+                 CryptoDevBackend *backend,
+                 void *opaque,
+                 uint32_t queue_index, Error **errp);
+
+#endif /* CRYPTODEV_H */
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 34c8eaf64e..c228c66513 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -199,14 +199,14 @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
                               void *opaque);
 
 BlockAIOCB *dma_blk_io(AioContext *ctx,
-                       QEMUSGList *sg, uint64_t offset,
+                       QEMUSGList *sg, uint64_t offset, uint32_t align,
                        DMAIOFunc *io_func, void *io_func_opaque,
                        BlockCompletionFunc *cb, void *opaque, DMADirection dir);
 BlockAIOCB *dma_blk_read(BlockBackend *blk,
-                         QEMUSGList *sg, uint64_t offset,
+                         QEMUSGList *sg, uint64_t offset, uint32_t align,
                          BlockCompletionFunc *cb, void *opaque);
 BlockAIOCB *dma_blk_write(BlockBackend *blk,
-                          QEMUSGList *sg, uint64_t offset,
+                          QEMUSGList *sg, uint64_t offset, uint32_t align,
                           BlockCompletionFunc *cb, void *opaque);
 uint64_t dma_buf_read(uint8_t *ptr, int32_t len, QEMUSGList *sg);
 uint64_t dma_buf_write(uint8_t *ptr, int32_t len, QEMUSGList *sg);
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 2eefea1cc2..68ac2de83a 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -35,5 +35,6 @@ typedef struct {
 
 char *iothread_get_id(IOThread *iothread);
 AioContext *iothread_get_aio_context(IOThread *iothread);
+void iothread_stop_all(void);
 
 #endif /* IOTHREAD_H */
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c9c243631e..df67cc0672 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -53,6 +53,7 @@ extern bool kvm_gsi_direct_mapping;
 extern bool kvm_readonly_mem_allowed;
 extern bool kvm_direct_msi_allowed;
 extern bool kvm_ioeventfd_any_length_allowed;
+extern bool kvm_msi_use_devid;
 
 #if defined CONFIG_KVM || !defined NEED_CPU_H
 #define kvm_enabled()           (kvm_allowed)
@@ -169,6 +170,13 @@ extern bool kvm_ioeventfd_any_length_allowed;
  */
 #define kvm_ioeventfd_any_length_enabled() (kvm_ioeventfd_any_length_allowed)
 
+/**
+ * kvm_msi_devid_required:
+ * Returns: true if KVM requires a device id to be provided while
+ * defining an MSI routing entry.
+ */
+#define kvm_msi_devid_required() (kvm_msi_use_devid)
+
 #else
 #define kvm_enabled()           (0)
 #define kvm_irqchip_in_kernel() (false)
@@ -184,6 +192,7 @@ extern bool kvm_ioeventfd_any_length_allowed;
 #define kvm_readonly_mem_enabled() (false)
 #define kvm_direct_msi_enabled() (false)
 #define kvm_ioeventfd_any_length_enabled() (false)
+#define kvm_msi_devid_required() (false)
 #endif
 
 struct kvm_run;
@@ -221,7 +230,6 @@ int kvm_destroy_vcpu(CPUState *cpu);
 #ifdef NEED_CPU_H
 #include "cpu.h"
 
-void kvm_setup_guest_memory(void *start, size_t size);
 void kvm_flush_coalesced_mmio_buffer(void);
 
 int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
@@ -327,8 +335,6 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run);
 
 int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run);
 
-int kvm_arch_handle_ioapic_eoi(CPUState *cpu, struct kvm_run *run);
-
 int kvm_arch_process_async_events(CPUState *cpu);
 
 int kvm_arch_get_registers(CPUState *cpu);
@@ -372,7 +378,6 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
 
 void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
 
-void kvm_put_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
 void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
 
 struct kvm_guest_debug;
diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
index bb184c9cfe..4da808a6e9 100644
--- a/include/sysemu/numa.h
+++ b/include/sysemu/numa.h
@@ -32,4 +32,7 @@ void numa_set_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
 void numa_unset_mem_node_id(ram_addr_t addr, uint64_t size, uint32_t node);
 uint32_t numa_get_node(ram_addr_t addr, Error **errp);
 
+/* on success returns node index in numa_info,
+ * on failure returns nb_numa_nodes */
+int numa_get_node_for_cpu(int idx);
 #endif
diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index 9c7dfdfbec..b0a6c0695b 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -60,4 +60,43 @@ int qemu_utimens(const char *path, const qemu_timespec *times);
 
 bool is_daemonized(void);
 
+/**
+ * qemu_alloc_stack:
+ * @sz: pointer to a size_t holding the requested usable stack size
+ *
+ * Allocate memory that can be used as a stack, for instance for
+ * coroutines. If the memory cannot be allocated, this function
+ * will abort (like g_malloc()). This function also inserts an
+ * additional guard page to catch a potential stack overflow.
+ * Note that the memory required for the guard page and alignment
+ * and minimal stack size restrictions will increase the value of sz.
+ *
+ * The allocated stack must be freed with qemu_free_stack().
+ *
+ * Returns: pointer to (the lowest address of) the stack memory.
+ */
+void *qemu_alloc_stack(size_t *sz);
+
+/**
+ * qemu_free_stack:
+ * @stack: stack to free
+ * @sz: size of stack in bytes
+ *
+ * Free a stack allocated via qemu_alloc_stack(). Note that sz must
+ * be exactly the adjusted stack size returned by qemu_alloc_stack.
+ */
+void qemu_free_stack(void *stack, size_t sz);
+
+/* POSIX and Mingw32 differ in the name of the stdio lock functions.  */
+
+static inline void qemu_flockfile(FILE *f)
+{
+    flockfile(f);
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+    funlockfile(f);
+}
+
 #endif
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 3d0d34a8ee..090891fc51 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -110,6 +110,21 @@ static inline char *realpath(const char *path, char *resolved_path)
     return resolved_path;
 }
 
+/* ??? Mingw appears to export _lock_file and _unlock_file as the functions
+ * with which to lock a stdio handle.  But something is wrong in the markup,
+ * either in the header or the library, such that we get undefined references
+ * to "_imp___lock_file" etc when linking.  Since we seem to have no other
+ * alternative, and the usage within the logging functions isn't critical,
+ * ignore FILE locking.
+ */
+
+static inline void qemu_flockfile(FILE *f)
+{
+}
+
+static inline void qemu_funlockfile(FILE *f)
+{
+}
 
 /* We wrap all the sockets functions so that we can
  * set errno based on WSAGetLastError()
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 0a88393d2b..f80d6d28e8 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -105,6 +105,8 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint);
 
 /*! Disables storing events in the queue */
 void replay_disable_events(void);
+/*! Enables storing events in the queue */
+void replay_enable_events(void);
 /*! Returns true when saving events is enabled */
 bool replay_events_enabled(void);
 /*! Adds bottom half event to the queue */
@@ -115,6 +117,8 @@ void replay_input_event(QemuConsole *src, InputEvent *evt);
 void replay_input_sync_event(void);
 /*! Adds block layer event to the queue */
 void replay_block_event(QEMUBH *bh, uint64_t id);
+/*! Returns ID for the next block event */
+uint64_t blkreplay_next_id(void);
 
 /* Character device */
 
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 762beaffbb..18833ec2f1 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -15,6 +15,7 @@
 #include "qemu/notify.h"
 #include "qemu/main-loop.h"
 #include "qemu/bitmap.h"
+#include "qemu/uuid.h"
 #include "qom/object.h"
 
 /* vl.c */
@@ -22,12 +23,8 @@
 extern const char *bios_name;
 
 extern const char *qemu_name;
-extern uint8_t qemu_uuid[];
+extern QemuUUID qemu_uuid;
 extern bool qemu_uuid_set;
-int qemu_uuid_parse(const char *str, uint8_t *uuid);
-
-#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
-#define UUID_NONE "00000000-0000-0000-0000-000000000000"
 
 bool runstate_check(RunState state);
 void runstate_set(RunState new_state);
@@ -192,7 +189,7 @@ extern int mem_prealloc;
  *
  * Note that cpu->get_arch_id() may be larger than MAX_CPUMASK_BITS.
  */
-#define MAX_CPUMASK_BITS 255
+#define MAX_CPUMASK_BITS 288
 
 #define MAX_OPTION_ROMS 16
 typedef struct QEMUOptionRom {
@@ -258,6 +255,7 @@ bool defaults_enabled(void);
 extern QemuOptsList qemu_legacy_drive_opts;
 extern QemuOptsList qemu_common_drive_opts;
 extern QemuOptsList qemu_drive_opts;
+extern QemuOptsList bdrv_runtime_opts;
 extern QemuOptsList qemu_chardev_opts;
 extern QemuOptsList qemu_device_opts;
 extern QemuOptsList qemu_netdev_opts;
diff --git a/include/trace-tcg.h b/include/trace-tcg.h
index edab4b159c..da68608c85 100644
--- a/include/trace-tcg.h
+++ b/include/trace-tcg.h
@@ -2,6 +2,5 @@
 #define TRACE_TCG_H
 
 #include "trace/generated-tcg-tracers.h"
-#include "trace/generated-events.h"
 
 #endif /* TRACE_TCG_H */
diff --git a/include/trace.h b/include/trace.h
index 9a01e4454b..ac9ff3dddd 100644
--- a/include/trace.h
+++ b/include/trace.h
@@ -2,6 +2,5 @@
 #define TRACE_H
 
 #include "trace/generated-tracers.h"
-#include "trace/generated-events.h"
 
 #endif /* TRACE_H */
diff --git a/include/ui/console.h b/include/ui/console.h
index 2703a3aa5a..e2589e2134 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -387,6 +387,7 @@ QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
 bool qemu_console_is_visible(QemuConsole *con);
 bool qemu_console_is_graphic(QemuConsole *con);
 bool qemu_console_is_fixedsize(QemuConsole *con);
+bool qemu_console_is_gl_blocked(QemuConsole *con);
 char *qemu_console_get_label(QemuConsole *con);
 int qemu_console_get_index(QemuConsole *con);
 uint32_t qemu_console_get_head(QemuConsole *con);
@@ -394,9 +395,7 @@ QemuUIInfo *qemu_console_get_ui_info(QemuConsole *con);
 int qemu_console_get_width(QemuConsole *con, int fallback);
 int qemu_console_get_height(QemuConsole *con, int fallback);
 
-void text_consoles_set_display(DisplayState *ds);
 void console_select(unsigned int index);
-void console_color_init(DisplayState *ds);
 void qemu_console_resize(QemuConsole *con, int width, int height);
 void qemu_console_copy(QemuConsole *con, int src_x, int src_y,
                        int dst_x, int dst_y, int w, int h);
diff --git a/include/ui/input.h b/include/ui/input.h
index 102d8a3341..d06a12dd4c 100644
--- a/include/ui/input.h
+++ b/include/ui/input.h
@@ -65,6 +65,4 @@ void qemu_input_check_mode_change(void);
 void qemu_add_mouse_mode_change_notifier(Notifier *notify);
 void qemu_remove_mouse_mode_change_notifier(Notifier *notify);
 
-int input_linux_init(void *opaque, QemuOpts *opts, Error **errp);
-
 #endif /* INPUT_H */
diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 568b64a0f6..184d4c373a 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -119,7 +119,10 @@ struct SimpleSpiceDisplay {
     /* opengl rendering */
     QEMUBH *gl_unblock_bh;
     QEMUTimer *gl_unblock_timer;
-    int dmabuf_fd;
+    ConsoleGLState *gls;
+    int gl_updates;
+    bool have_scanout;
+    bool have_surface;
 #endif
 };
 
@@ -144,8 +147,6 @@ void qemu_spice_destroy_update(SimpleSpiceDisplay *sdpy, SimpleSpiceUpdate *upda
 void qemu_spice_create_host_memslot(SimpleSpiceDisplay *ssd);
 void qemu_spice_create_host_primary(SimpleSpiceDisplay *ssd);
 void qemu_spice_destroy_host_primary(SimpleSpiceDisplay *ssd);
-void qemu_spice_vm_change_state_handler(void *opaque, int running,
-                                        RunState state);
 void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd);
 
 void qemu_spice_display_update(SimpleSpiceDisplay *ssd,
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 196a4f18f7..d7e03f6266 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -55,7 +55,7 @@ qio_channel_socket_new(void)
     sioc->fd = -1;
 
     ioc = QIO_CHANNEL(sioc);
-    ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN);
+    qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
 
 #ifdef WIN32
     ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL);
@@ -72,9 +72,6 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc,
                           int fd,
                           Error **errp)
 {
-    int val;
-    socklen_t len = sizeof(val);
-
     if (sioc->fd != -1) {
         error_setg(errp, "Socket is already open");
         return -1;
@@ -107,13 +104,9 @@ qio_channel_socket_set_fd(QIOChannelSocket *sioc,
 #ifndef WIN32
     if (sioc->localAddr.ss_family == AF_UNIX) {
         QIOChannel *ioc = QIO_CHANNEL(sioc);
-        ioc->features |= (1 << QIO_CHANNEL_FEATURE_FD_PASS);
+        qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
     }
 #endif /* WIN32 */
-    if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &val, &len) == 0 && val) {
-        QIOChannel *ioc = QIO_CHANNEL(sioc);
-        ioc->features |= (1 << QIO_CHANNEL_FEATURE_LISTEN);
-    }
 
     return 0;
 
@@ -220,6 +213,7 @@ int qio_channel_socket_listen_sync(QIOChannelSocket *ioc,
         close(fd);
         return -1;
     }
+    qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_LISTEN);
 
     return 0;
 }
@@ -380,7 +374,8 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
 
 #ifndef WIN32
     if (cioc->localAddr.ss_family == AF_UNIX) {
-        QIO_CHANNEL(cioc)->features |= (1 << QIO_CHANNEL_FEATURE_FD_PASS);
+        QIOChannel *ioc_local = QIO_CHANNEL(cioc);
+        qio_channel_set_feature(ioc_local, QIO_CHANNEL_FEATURE_FD_PASS);
     }
 #endif /* WIN32 */
 
@@ -403,7 +398,8 @@ static void qio_channel_socket_finalize(Object *obj)
     QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
 
     if (ioc->fd != -1) {
-        if (QIO_CHANNEL(ioc)->features & QIO_CHANNEL_FEATURE_LISTEN) {
+        QIOChannel *ioc_local = QIO_CHANNEL(ioc);
+        if (qio_channel_has_feature(ioc_local, QIO_CHANNEL_FEATURE_LISTEN)) {
             Error *err = NULL;
 
             socket_listen_cleanup(ioc->fd, &err);
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 9a8525c816..d24dc8c613 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -111,8 +111,8 @@ qio_channel_tls_new_client(QIOChannel *master,
     ioc = QIO_CHANNEL(tioc);
 
     tioc->master = master;
-    if (master->features & (1 << QIO_CHANNEL_FEATURE_SHUTDOWN)) {
-        ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN);
+    if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) {
+        qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
     }
     object_ref(OBJECT(master));
 
diff --git a/io/channel-websock.c b/io/channel-websock.c
index 533bd4b3b5..f45bced82a 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -497,8 +497,8 @@ qio_channel_websock_new_server(QIOChannel *master)
     ioc = QIO_CHANNEL(wioc);
 
     wioc->master = master;
-    if (master->features & (1 << QIO_CHANNEL_FEATURE_SHUTDOWN)) {
-        ioc->features |= (1 << QIO_CHANNEL_FEATURE_SHUTDOWN);
+    if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) {
+        qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
     }
     object_ref(OBJECT(master));
 
diff --git a/io/channel.c b/io/channel.c
index 923c4651ca..80924c1772 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -30,6 +30,21 @@ bool qio_channel_has_feature(QIOChannel *ioc,
 }
 
 
+void qio_channel_set_feature(QIOChannel *ioc,
+                             QIOChannelFeature feature)
+{
+    ioc->features |= (1 << feature);
+}
+
+
+void qio_channel_set_name(QIOChannel *ioc,
+                          const char *name)
+{
+    g_free(ioc->name);
+    ioc->name = g_strdup(name);
+}
+
+
 ssize_t qio_channel_readv_full(QIOChannel *ioc,
                                const struct iovec *iov,
                                size_t niov,
@@ -40,7 +55,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
 
     if ((fds || nfds) &&
-        !(ioc->features & (1 << QIO_CHANNEL_FEATURE_FD_PASS))) {
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
         error_setg_errno(errp, EINVAL,
                          "Channel does not support file descriptor passing");
         return -1;
@@ -60,7 +75,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
 
     if ((fds || nfds) &&
-        !(ioc->features & (1 << QIO_CHANNEL_FEATURE_FD_PASS))) {
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
         error_setg_errno(errp, EINVAL,
                          "Channel does not support file descriptor passing");
         return -1;
@@ -129,7 +144,13 @@ GSource *qio_channel_create_watch(QIOChannel *ioc,
                                   GIOCondition condition)
 {
     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
-    return klass->io_create_watch(ioc, condition);
+    GSource *ret = klass->io_create_watch(ioc, condition);
+
+    if (ioc->name) {
+        g_source_set_name(ret, ioc->name);
+    }
+
+    return ret;
 }
 
 
@@ -275,24 +296,24 @@ void qio_channel_wait(QIOChannel *ioc,
 }
 
 
-#ifdef _WIN32
 static void qio_channel_finalize(Object *obj)
 {
     QIOChannel *ioc = QIO_CHANNEL(obj);
 
+    g_free(ioc->name);
+
+#ifdef _WIN32
     if (ioc->event) {
         CloseHandle(ioc->event);
     }
-}
 #endif
+}
 
 static const TypeInfo qio_channel_info = {
     .parent = TYPE_OBJECT,
     .name = TYPE_QIO_CHANNEL,
     .instance_size = sizeof(QIOChannel),
-#ifdef _WIN32
     .instance_finalize = qio_channel_finalize,
-#endif
     .abstract = true,
     .class_size = sizeof(QIOChannelClass),
 };
diff --git a/io/trace-events b/io/trace-events
index d064665f44..e31b596ca1 100644
--- a/io/trace-events
+++ b/io/trace-events
@@ -1,11 +1,5 @@
 # See docs/tracing.txt for syntax documentation.
 
-# io/buffer.c
-buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
-buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
-buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
-buffer_free(const char *buf, size_t len) "%s: capacity %zd"
-
 # io/task.c
 qio_task_new(void *task, void *source, void *func, void *opaque) "Task new task=%p source=%p func=%p opaque=%p"
 qio_task_complete(void *task) "Task complete task=%p"
diff --git a/iothread.c b/iothread.c
index f183d380e6..bd70344811 100644
--- a/iothread.c
+++ b/iothread.c
@@ -16,10 +16,12 @@
 #include "qom/object_interfaces.h"
 #include "qemu/module.h"
 #include "block/aio.h"
+#include "block/block.h"
 #include "sysemu/iothread.h"
 #include "qmp-commands.h"
 #include "qemu/error-report.h"
 #include "qemu/rcu.h"
+#include "qemu/main-loop.h"
 
 typedef ObjectClass IOThreadClass;
 
@@ -28,44 +30,57 @@ typedef ObjectClass IOThreadClass;
 #define IOTHREAD_CLASS(klass) \
    OBJECT_CLASS_CHECK(IOThreadClass, klass, TYPE_IOTHREAD)
 
+static __thread IOThread *my_iothread;
+
+AioContext *qemu_get_current_aio_context(void)
+{
+    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
+}
+
 static void *iothread_run(void *opaque)
 {
     IOThread *iothread = opaque;
-    bool blocking;
 
     rcu_register_thread();
 
+    my_iothread = iothread;
     qemu_mutex_lock(&iothread->init_done_lock);
     iothread->thread_id = qemu_get_thread_id();
     qemu_cond_signal(&iothread->init_done_cond);
     qemu_mutex_unlock(&iothread->init_done_lock);
 
-    while (!iothread->stopping) {
-        aio_context_acquire(iothread->ctx);
-        blocking = true;
-        while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
-            /* Progress was made, keep going */
-            blocking = false;
-        }
-        aio_context_release(iothread->ctx);
+    while (!atomic_read(&iothread->stopping)) {
+        aio_poll(iothread->ctx, true);
     }
 
     rcu_unregister_thread();
     return NULL;
 }
 
-static void iothread_instance_finalize(Object *obj)
+static int iothread_stop(Object *object, void *opaque)
 {
-    IOThread *iothread = IOTHREAD(obj);
+    IOThread *iothread;
 
-    if (!iothread->ctx) {
-        return;
+    iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
+    if (!iothread || !iothread->ctx) {
+        return 0;
     }
     iothread->stopping = true;
     aio_notify(iothread->ctx);
     qemu_thread_join(&iothread->thread);
+    return 0;
+}
+
+static void iothread_instance_finalize(Object *obj)
+{
+    IOThread *iothread = IOTHREAD(obj);
+
+    iothread_stop(obj, NULL);
     qemu_cond_destroy(&iothread->init_done_cond);
     qemu_mutex_destroy(&iothread->init_done_lock);
+    if (!iothread->ctx) {
+        return;
+    }
     aio_context_unref(iothread->ctx);
 }
 
@@ -174,3 +189,22 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
     object_child_foreach(container, query_one_iothread, &prev);
     return head;
 }
+
+void iothread_stop_all(void)
+{
+    Object *container = object_get_objects_root();
+    BlockDriverState *bs;
+    BdrvNextIterator it;
+
+    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+        AioContext *ctx = bdrv_get_aio_context(bs);
+        if (ctx == qemu_get_aio_context()) {
+            continue;
+        }
+        aio_context_acquire(ctx);
+        bdrv_set_aio_context(bs, qemu_get_aio_context());
+        aio_context_release(ctx);
+    }
+
+    object_child_foreach(container, iothread_stop, NULL);
+}
diff --git a/kvm-all.c b/kvm-all.c
index ebf35b0c5b..330219e9dc 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -119,6 +119,7 @@ bool kvm_readonly_mem_allowed;
 bool kvm_vm_attributes_allowed;
 bool kvm_direct_msi_allowed;
 bool kvm_ioeventfd_any_length_allowed;
+bool kvm_msi_use_devid;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
     KVM_CAP_INFO(USER_MEMORY),
@@ -1275,6 +1276,10 @@ int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
     kroute.u.msi.address_lo = (uint32_t)msg.address;
     kroute.u.msi.address_hi = msg.address >> 32;
     kroute.u.msi.data = le32_to_cpu(msg.data);
+    if (kvm_msi_devid_required()) {
+        kroute.flags = KVM_MSI_VALID_DEVID;
+        kroute.u.msi.devid = pci_requester_id(dev);
+    }
     if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
         kvm_irqchip_release_virq(s, virq);
         return -EINVAL;
@@ -1308,6 +1313,10 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
     kroute.u.msi.address_lo = (uint32_t)msg.address;
     kroute.u.msi.address_hi = msg.address >> 32;
     kroute.u.msi.data = le32_to_cpu(msg.data);
+    if (kvm_msi_devid_required()) {
+        kroute.flags = KVM_MSI_VALID_DEVID;
+        kroute.u.msi.devid = pci_requester_id(dev);
+    }
     if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
         return -EINVAL;
     }
@@ -1847,10 +1856,8 @@ void kvm_flush_coalesced_mmio_buffer(void)
     s->coalesced_flush_in_progress = false;
 }
 
-static void do_kvm_cpu_synchronize_state(void *arg)
+static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 {
-    CPUState *cpu = arg;
-
     if (!cpu->kvm_vcpu_dirty) {
         kvm_arch_get_registers(cpu);
         cpu->kvm_vcpu_dirty = true;
@@ -1860,34 +1867,30 @@ static void do_kvm_cpu_synchronize_state(void *arg)
 void kvm_cpu_synchronize_state(CPUState *cpu)
 {
     if (!cpu->kvm_vcpu_dirty) {
-        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
+        run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
     }
 }
 
-static void do_kvm_cpu_synchronize_post_reset(void *arg)
+static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
 {
-    CPUState *cpu = arg;
-
     kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
     cpu->kvm_vcpu_dirty = false;
 }
 
 void kvm_cpu_synchronize_post_reset(CPUState *cpu)
 {
-    run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, cpu);
+    run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 }
 
-static void do_kvm_cpu_synchronize_post_init(void *arg)
+static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 {
-    CPUState *cpu = arg;
-
     kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
     cpu->kvm_vcpu_dirty = false;
 }
 
 void kvm_cpu_synchronize_post_init(CPUState *cpu)
 {
-    run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu);
+    run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 }
 
 int kvm_cpu_exec(CPUState *cpu)
@@ -2148,6 +2151,7 @@ void kvm_device_access(int fd, int group, uint64_t attr,
     }
 }
 
+/* Return 1 on success, 0 on failure */
 int kvm_has_sync_mmu(void)
 {
     return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
@@ -2190,20 +2194,6 @@ int kvm_has_intx_set_mask(void)
     return kvm_state->intx_set_mask;
 }
 
-void kvm_setup_guest_memory(void *start, size_t size)
-{
-    if (!kvm_has_sync_mmu()) {
-        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
-
-        if (ret) {
-            perror("qemu_madvise");
-            fprintf(stderr,
-                    "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
-            exit(1);
-        }
-    }
-}
-
 #ifdef KVM_CAP_SET_GUEST_DEBUG
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
                                                  target_ulong pc)
@@ -2225,15 +2215,15 @@ int kvm_sw_breakpoints_active(CPUState *cpu)
 
 struct kvm_set_guest_debug_data {
     struct kvm_guest_debug dbg;
-    CPUState *cpu;
     int err;
 };
 
-static void kvm_invoke_set_guest_debug(void *data)
+static void kvm_invoke_set_guest_debug(CPUState *cpu, run_on_cpu_data data)
 {
-    struct kvm_set_guest_debug_data *dbg_data = data;
+    struct kvm_set_guest_debug_data *dbg_data =
+        (struct kvm_set_guest_debug_data *) data.host_ptr;
 
-    dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG,
+    dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG,
                                    &dbg_data->dbg);
 }
 
@@ -2247,9 +2237,9 @@ int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
         data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
     }
     kvm_arch_update_guest_debug(cpu, &data.dbg);
-    data.cpu = cpu;
 
-    run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
+    run_on_cpu(cpu, kvm_invoke_set_guest_debug,
+               RUN_ON_CPU_HOST_PTR(&data));
     return data.err;
 }
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 64e23f6be0..b1b6b96c96 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -31,6 +31,7 @@ bool kvm_gsi_direct_mapping;
 bool kvm_allowed;
 bool kvm_readonly_mem_allowed;
 bool kvm_ioeventfd_any_length_allowed;
+bool kvm_msi_use_devid;
 
 int kvm_destroy_vcpu(CPUState *cpu)
 {
@@ -73,10 +74,6 @@ int kvm_has_many_ioeventfds(void)
     return 0;
 }
 
-void kvm_setup_guest_memory(void *start, size_t size)
-{
-}
-
 int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
 {
     return -ENOSYS;
diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index c98e4dc460..541268c946 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -139,8 +139,8 @@ struct kvm_arch_memory_slot {
 #define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
 
 #define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
-#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14)
 
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 7d82d1f9d5..fd5a2761a5 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h
index 09ae5dc2e9..ac63ca630b 100644
--- a/linux-headers/asm-s390/kvm.h
+++ b/linux-headers/asm-s390/kvm.h
@@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine {
 	__u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT	2
+#define KVM_S390_VM_CPU_MACHINE_FEAT	3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS	1024
+#define KVM_S390_VM_CPU_FEAT_ESOP	0
+#define KVM_S390_VM_CPU_FEAT_SIEF2	1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO	2
+#define KVM_S390_VM_CPU_FEAT_SIIF	3
+#define KVM_S390_VM_CPU_FEAT_GPERE	4
+#define KVM_S390_VM_CPU_FEAT_GSLS	5
+#define KVM_S390_VM_CPU_FEAT_IB		6
+#define KVM_S390_VM_CPU_FEAT_CEI	7
+#define KVM_S390_VM_CPU_FEAT_IBS	8
+#define KVM_S390_VM_CPU_FEAT_SKEY	9
+#define KVM_S390_VM_CPU_FEAT_CMMA	10
+#define KVM_S390_VM_CPU_FEAT_PFMFI	11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC	4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC		5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+	__u8 plo[32];		/* always */
+	__u8 ptff[16];		/* with TOD-clock steering */
+	__u8 kmac[16];		/* with MSA */
+	__u8 kmc[16];		/* with MSA */
+	__u8 km[16];		/* with MSA */
+	__u8 kimd[16];		/* with MSA */
+	__u8 klmd[16];		/* with MSA */
+	__u8 pckmo[16];		/* with MSA3 */
+	__u8 kmctr[16];		/* with MSA4 */
+	__u8 kmf[16];		/* with MSA4 */
+	__u8 kmo[16];		/* with MSA4 */
+	__u8 pcc[16];		/* with MSA4 */
+	__u8 ppno[16];		/* with MSA5 */
+	__u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index 0230779a84..e5aea761f8 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -306,9 +306,7 @@
 #define __NR_vmsplice (__X32_SYSCALL_BIT + 532)
 #define __NR_move_pages (__X32_SYSCALL_BIT + 533)
 #define __NR_preadv (__X32_SYSCALL_BIT + 534)
-#define __NR_preadv2 (__X32_SYSCALL_BIT + 534)
 #define __NR_pwritev (__X32_SYSCALL_BIT + 535)
-#define __NR_pwritev2 (__X32_SYSCALL_BIT + 535)
 #define __NR_rt_tgsigqueueinfo (__X32_SYSCALL_BIT + 536)
 #define __NR_recvmmsg (__X32_SYSCALL_BIT + 537)
 #define __NR_sendmmsg (__X32_SYSCALL_BIT + 538)
@@ -319,5 +317,7 @@
 #define __NR_io_setup (__X32_SYSCALL_BIT + 543)
 #define __NR_io_submit (__X32_SYSCALL_BIT + 544)
 #define __NR_execveat (__X32_SYSCALL_BIT + 545)
+#define __NR_preadv2 (__X32_SYSCALL_BIT + 546)
+#define __NR_pwritev2 (__X32_SYSCALL_BIT + 547)
 
 #endif /* _ASM_X86_UNISTD_X32_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index e60e21ba22..4806e069e7 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -866,6 +866,10 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PMU_V3 126
 #define KVM_CAP_VCPU_ATTRIBUTES 127
 #define KVM_CAP_MAX_VCPU_ID 128
+#define KVM_CAP_X2APIC_API 129
+#define KVM_CAP_S390_USER_INSTR0 130
+#define KVM_CAP_MSI_DEVID 131
+#define KVM_CAP_PPC_HTM 132
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -878,7 +882,10 @@ struct kvm_irq_routing_msi {
 	__u32 address_lo;
 	__u32 address_hi;
 	__u32 data;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 devid;
+	};
 };
 
 struct kvm_irq_routing_s390_adapter {
@@ -1024,12 +1031,14 @@ struct kvm_one_reg {
 	__u64 addr;
 };
 
+#define KVM_MSI_VALID_DEVID	(1U << 0)
 struct kvm_msi {
 	__u32 address_lo;
 	__u32 address_hi;
 	__u32 data;
 	__u32 flags;
-	__u8  pad[16];
+	__u32 devid;
+	__u8  pad[12];
 };
 
 struct kvm_arm_device_addr {
@@ -1074,6 +1083,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
 	KVM_DEV_TYPE_ARM_VGIC_V3,
 #define KVM_DEV_TYPE_ARM_VGIC_V3	KVM_DEV_TYPE_ARM_VGIC_V3
+	KVM_DEV_TYPE_ARM_VGIC_ITS,
+#define KVM_DEV_TYPE_ARM_VGIC_ITS	KVM_DEV_TYPE_ARM_VGIC_ITS
 	KVM_DEV_TYPE_MAX,
 };
 
@@ -1313,4 +1324,7 @@ struct kvm_assigned_msix_entry {
 	__u16 padding[3];
 };
 
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
 #endif /* __LINUX_KVM_H */
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index 571294cea0..ac7a1f136a 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -47,6 +47,32 @@ struct vhost_vring_addr {
 	__u64 log_guest_addr;
 };
 
+/* no alignment requirement */
+struct vhost_iotlb_msg {
+	__u64 iova;
+	__u64 size;
+	__u64 uaddr;
+#define VHOST_ACCESS_RO      0x1
+#define VHOST_ACCESS_WO      0x2
+#define VHOST_ACCESS_RW      0x3
+	__u8 perm;
+#define VHOST_IOTLB_MISS           1
+#define VHOST_IOTLB_UPDATE         2
+#define VHOST_IOTLB_INVALIDATE     3
+#define VHOST_IOTLB_ACCESS_FAIL    4
+	__u8 type;
+};
+
+#define VHOST_IOTLB_MSG 0x1
+
+struct vhost_msg {
+	int type;
+	union {
+		struct vhost_iotlb_msg iotlb;
+		__u8 padding[64];
+	};
+};
+
 struct vhost_memory_region {
 	__u64 guest_phys_addr;
 	__u64 memory_size; /* bytes */
@@ -146,6 +172,8 @@ struct vhost_memory {
 #define VHOST_F_LOG_ALL 26
 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
 #define VHOST_NET_F_VIRTIO_NET_HDR 27
+/* Vhost have device IOTLB */
+#define VHOST_F_DEVICE_IOTLB 63
 
 /* VHOST_SCSI specific definitions */
 
@@ -175,4 +203,9 @@ struct vhost_scsi_target {
 #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
 #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
 
+/* VHOST_VSOCK specific defines */
+
+#define VHOST_VSOCK_SET_GUEST_CID	_IOW(VHOST_VIRTIO, 0x60, __u64)
+#define VHOST_VSOCK_SET_RUNNING		_IOW(VHOST_VIRTIO, 0x61, int)
+
 #endif
diff --git a/linux-user/arm/target_syscall.h b/linux-user/arm/target_syscall.h
index cd021ff598..94e2a42cb2 100644
--- a/linux-user/arm/target_syscall.h
+++ b/linux-user/arm/target_syscall.h
@@ -32,5 +32,13 @@ struct target_pt_regs {
 #define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
+#define TARGET_WANT_OLD_SYS_SELECT
+
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUARMState *env)
+{
+    return 4 * 4096;
+}
 
 #endif /* ARM_TARGET_SYSCALL_H */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f807baf389..547053c27a 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -741,8 +741,12 @@ static uint32_t get_elf_hwcap(void)
        Altivec/FP/SPE support.  Anything else is just a bonus.  */
 #define GET_FEATURE(flag, feature)                                      \
     do { if (cpu->env.insns_flags & flag) { features |= feature; } } while (0)
-#define GET_FEATURE2(flag, feature)                                      \
-    do { if (cpu->env.insns_flags2 & flag) { features |= feature; } } while (0)
+#define GET_FEATURE2(flags, feature) \
+    do { \
+        if ((cpu->env.insns_flags2 & flags) == flags) { \
+            features |= feature; \
+        } \
+    } while (0)
     GET_FEATURE(PPC_64B, QEMU_PPC_FEATURE_64);
     GET_FEATURE(PPC_FLOAT, QEMU_PPC_FEATURE_HAS_FPU);
     GET_FEATURE(PPC_ALTIVEC, QEMU_PPC_FEATURE_HAS_ALTIVEC);
@@ -1838,6 +1842,8 @@ static void load_elf_image(const char *image_name, int image_fd,
     info->pt_dynamic_addr = 0;
 #endif
 
+    mmap_lock();
+
     /* Find the maximum size of the image and allocate an appropriate
        amount of memory to handle that.  */
     loaddr = -1, hiaddr = 0;
@@ -1998,6 +2004,8 @@ static void load_elf_image(const char *image_name, int image_fd,
         load_symbols(ehdr, image_fd, load_bias);
     }
 
+    mmap_unlock();
+
     close(image_fd);
     return;
 
@@ -2111,19 +2119,19 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias)
 
  found:
     /* Now know where the strtab and symtab are.  Snarf them.  */
-    s = malloc(sizeof(*s));
+    s = g_try_new(struct syminfo, 1);
     if (!s) {
         goto give_up;
     }
 
     i = shdr[str_idx].sh_size;
-    s->disas_strtab = strings = malloc(i);
+    s->disas_strtab = strings = g_try_malloc(i);
     if (!strings || pread(fd, strings, i, shdr[str_idx].sh_offset) != i) {
         goto give_up;
     }
 
     i = shdr[sym_idx].sh_size;
-    syms = malloc(i);
+    syms = g_try_malloc(i);
     if (!syms || pread(fd, syms, i, shdr[sym_idx].sh_offset) != i) {
         goto give_up;
     }
@@ -2157,7 +2165,7 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias)
        that we threw away.  Whether or not this has any effect on the
        memory allocation depends on the malloc implementation and how
        many symbols we managed to discard.  */
-    new_syms = realloc(syms, nsyms * sizeof(*syms));
+    new_syms = g_try_renew(struct elf_sym, syms, nsyms);
     if (new_syms == NULL) {
         goto give_up;
     }
@@ -2178,9 +2186,9 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias)
     return;
 
 give_up:
-    free(s);
-    free(strings);
-    free(syms);
+    g_free(s);
+    g_free(strings);
+    g_free(syms);
 }
 
 int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
@@ -2233,7 +2241,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
                we do not have the power to recompile these, we emulate
                the SVr4 behavior.  Sigh.  */
             target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC,
-                        MAP_FIXED | MAP_PRIVATE, -1, 0);
+                        MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
         }
     }
 
@@ -2718,7 +2726,6 @@ static int core_dump_filename(const TaskState *ts, char *buf,
                               size_t bufsize)
 {
     char timestamp[64];
-    char *filename = NULL;
     char *base_filename = NULL;
     struct timeval tv;
     struct tm tm;
@@ -2731,14 +2738,12 @@ static int core_dump_filename(const TaskState *ts, char *buf,
         return (-1);
     }
 
-    filename = strdup(ts->bprm->filename);
-    base_filename = strdup(basename(filename));
+    base_filename = g_path_get_basename(ts->bprm->filename);
     (void) strftime(timestamp, sizeof (timestamp), "%Y%m%d-%H%M%S",
                     localtime_r(&tv.tv_sec, &tm));
     (void) snprintf(buf, bufsize, "qemu_%s_%s_%d.core",
                     base_filename, timestamp, (int)getpid());
-    free(base_filename);
-    free(filename);
+    g_free(base_filename);
 
     return (0);
 }
@@ -3053,7 +3058,9 @@ static int elf_core_dump(int signr, const CPUArchState *env)
         phdr.p_align = ELF_EXEC_PAGESIZE;
 
         bswap_phdr(&phdr, 1);
-        dump_write(fd, &phdr, sizeof (phdr));
+        if (dump_write(fd, &phdr, sizeof(phdr)) != 0) {
+            goto out;
+        }
     }
 
     /*
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
index 42d1079a24..a35a560904 100644
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -95,7 +95,13 @@ static int target_pread(int fd, abi_ulong ptr, abi_ulong len,
     int ret;
 
     buf = lock_user(VERIFY_WRITE, ptr, len, 0);
+    if (!buf) {
+        return -EFAULT;
+    }
     ret = pread(fd, buf, len, offset);
+    if (ret < 0) {
+        ret = -errno;
+    }
     unlock_user(buf, ptr, len);
     return ret;
 }
diff --git a/linux-user/i386/target_syscall.h b/linux-user/i386/target_syscall.h
index b4e895fd9c..2854758134 100644
--- a/linux-user/i386/target_syscall.h
+++ b/linux-user/i386/target_syscall.h
@@ -153,5 +153,6 @@ struct target_vm86plus_struct {
 #define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
+#define TARGET_WANT_OLD_SYS_SELECT
 
 #endif /* I386_TARGET_SYSCALL_H */
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index 7e2c133ba1..1bad701481 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -120,6 +120,9 @@
                    MK_PTR(MK_STRUCT(STRUCT_fiemap)))
 #endif
 
+     IOCTL(FS_IOC_GETFLAGS, IOC_R, MK_PTR(TYPE_INT))
+     IOCTL(FS_IOC_SETFLAGS, IOC_W, MK_PTR(TYPE_INT))
+
   IOCTL(SIOCATMARK, IOC_R, MK_PTR(TYPE_INT))
   IOCTL(SIOCGIFNAME, IOC_RW, MK_PTR(TYPE_INT))
   IOCTL(SIOCGIFFLAGS, IOC_W | IOC_R, MK_PTR(MK_STRUCT(STRUCT_short_ifreq)))
diff --git a/linux-user/m68k/target_syscall.h b/linux-user/m68k/target_syscall.h
index db2be4f101..632ee4fcf8 100644
--- a/linux-user/m68k/target_syscall.h
+++ b/linux-user/m68k/target_syscall.h
@@ -24,6 +24,8 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define TARGET_WANT_OLD_SYS_SELECT
+
 void do_m68k_simcall(CPUM68KState *, int);
 
 #endif /* M68K_TARGET_SYSCALL_H */
diff --git a/linux-user/main.c b/linux-user/main.c
index f2f4d2f05a..75b199f274 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -107,21 +107,11 @@ int cpu_get_pic_interrupt(CPUX86State *env)
 /***********************************************************/
 /* Helper routines for implementing atomic operations.  */
 
-/* To implement exclusive operations we force all cpus to syncronise.
-   We don't require a full sync, only that no cpus are executing guest code.
-   The alternative is to map target atomic ops onto host equivalents,
-   which requires quite a lot of per host/target work.  */
-static pthread_mutex_t cpu_list_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_mutex_t exclusive_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t exclusive_cond = PTHREAD_COND_INITIALIZER;
-static pthread_cond_t exclusive_resume = PTHREAD_COND_INITIALIZER;
-static int pending_cpus;
-
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
 {
+    cpu_list_lock();
     qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
-    pthread_mutex_lock(&exclusive_lock);
     mmap_fork_start();
 }
 
@@ -137,93 +127,15 @@ void fork_end(int child)
                 QTAILQ_REMOVE(&cpus, cpu, node);
             }
         }
-        pending_cpus = 0;
-        pthread_mutex_init(&exclusive_lock, NULL);
-        pthread_mutex_init(&cpu_list_mutex, NULL);
-        pthread_cond_init(&exclusive_cond, NULL);
-        pthread_cond_init(&exclusive_resume, NULL);
         qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+        qemu_init_cpu_list();
         gdbserver_fork(thread_cpu);
     } else {
-        pthread_mutex_unlock(&exclusive_lock);
         qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        cpu_list_unlock();
     }
 }
 
-/* Wait for pending exclusive operations to complete.  The exclusive lock
-   must be held.  */
-static inline void exclusive_idle(void)
-{
-    while (pending_cpus) {
-        pthread_cond_wait(&exclusive_resume, &exclusive_lock);
-    }
-}
-
-/* Start an exclusive operation.
-   Must only be called from outside cpu_exec.   */
-static inline void start_exclusive(void)
-{
-    CPUState *other_cpu;
-
-    pthread_mutex_lock(&exclusive_lock);
-    exclusive_idle();
-
-    pending_cpus = 1;
-    /* Make all other cpus stop executing.  */
-    CPU_FOREACH(other_cpu) {
-        if (other_cpu->running) {
-            pending_cpus++;
-            cpu_exit(other_cpu);
-        }
-    }
-    if (pending_cpus > 1) {
-        pthread_cond_wait(&exclusive_cond, &exclusive_lock);
-    }
-}
-
-/* Finish an exclusive operation.  */
-static inline void __attribute__((unused)) end_exclusive(void)
-{
-    pending_cpus = 0;
-    pthread_cond_broadcast(&exclusive_resume);
-    pthread_mutex_unlock(&exclusive_lock);
-}
-
-/* Wait for exclusive ops to finish, and begin cpu execution.  */
-static inline void cpu_exec_start(CPUState *cpu)
-{
-    pthread_mutex_lock(&exclusive_lock);
-    exclusive_idle();
-    cpu->running = true;
-    pthread_mutex_unlock(&exclusive_lock);
-}
-
-/* Mark cpu as not executing, and release pending exclusive ops.  */
-static inline void cpu_exec_end(CPUState *cpu)
-{
-    pthread_mutex_lock(&exclusive_lock);
-    cpu->running = false;
-    if (pending_cpus > 1) {
-        pending_cpus--;
-        if (pending_cpus == 1) {
-            pthread_cond_signal(&exclusive_cond);
-        }
-    }
-    exclusive_idle();
-    pthread_mutex_unlock(&exclusive_lock);
-}
-
-void cpu_list_lock(void)
-{
-    pthread_mutex_lock(&cpu_list_mutex);
-}
-
-void cpu_list_unlock(void)
-{
-    pthread_mutex_unlock(&cpu_list_mutex);
-}
-
-
 #ifdef TARGET_I386
 /***********************************************************/
 /* CPUX86 core interface */
@@ -296,6 +208,8 @@ void cpu_loop(CPUX86State *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case 0x80:
             /* linux syscall from int $0x80 */
@@ -339,7 +253,7 @@ void cpu_loop(CPUX86State *env)
             info.si_errno = 0;
             info.si_code = TARGET_SI_KERNEL;
             info._sifields._sigfault._addr = 0;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP0D_GPF:
             /* XXX: potential problem if ABI32 */
@@ -353,7 +267,7 @@ void cpu_loop(CPUX86State *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_SI_KERNEL;
                 info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP0E_PAGE:
@@ -364,7 +278,7 @@ void cpu_loop(CPUX86State *env)
             else
                 info.si_code = TARGET_SEGV_ACCERR;
             info._sifields._sigfault._addr = env->cr[2];
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP00_DIVZ:
 #ifndef TARGET_X86_64
@@ -378,7 +292,7 @@ void cpu_loop(CPUX86State *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_FPE_INTDIV;
                 info._sifields._sigfault._addr = env->eip;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP01_DB:
@@ -398,7 +312,7 @@ void cpu_loop(CPUX86State *env)
                     info.si_code = TARGET_SI_KERNEL;
                     info._sifields._sigfault._addr = 0;
                 }
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP04_INTO:
@@ -413,7 +327,7 @@ void cpu_loop(CPUX86State *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_SI_KERNEL;
                 info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP06_ILLOP:
@@ -421,7 +335,7 @@ void cpu_loop(CPUX86State *env)
             info.si_errno = 0;
             info.si_code = TARGET_ILL_ILLOPN;
             info._sifields._sigfault._addr = env->eip;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
@@ -436,10 +350,13 @@ void cpu_loop(CPUX86State *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             pc = env->segs[R_CS].base + env->eip;
             EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
@@ -576,7 +493,7 @@ static void arm_kernel_cmpxchg64_helper(CPUARMState *env)
     /* XXX: check env->error_code */
     info.si_code = TARGET_SEGV_MAPERR;
     info._sifields._sigfault._addr = env->exception.vaddress;
-    queue_signal(env, info.si_signo, &info);
+    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 }
 
 /* Handle a jump to the kernel code page.  */
@@ -636,94 +553,6 @@ do_kernel_trap(CPUARMState *env)
     return 0;
 }
 
-/* Store exclusive handling for AArch32 */
-static int do_strex(CPUARMState *env)
-{
-    uint64_t val;
-    int size;
-    int rc = 1;
-    int segv = 0;
-    uint32_t addr;
-    start_exclusive();
-    if (env->exclusive_addr != env->exclusive_test) {
-        goto fail;
-    }
-    /* We know we're always AArch32 so the address is in uint32_t range
-     * unless it was the -1 exclusive-monitor-lost value (which won't
-     * match exclusive_test above).
-     */
-    assert(extract64(env->exclusive_addr, 32, 32) == 0);
-    addr = env->exclusive_addr;
-    size = env->exclusive_info & 0xf;
-    switch (size) {
-    case 0:
-        segv = get_user_u8(val, addr);
-        break;
-    case 1:
-        segv = get_user_data_u16(val, addr, env);
-        break;
-    case 2:
-    case 3:
-        segv = get_user_data_u32(val, addr, env);
-        break;
-    default:
-        abort();
-    }
-    if (segv) {
-        env->exception.vaddress = addr;
-        goto done;
-    }
-    if (size == 3) {
-        uint32_t valhi;
-        segv = get_user_data_u32(valhi, addr + 4, env);
-        if (segv) {
-            env->exception.vaddress = addr + 4;
-            goto done;
-        }
-        if (arm_cpu_bswap_data(env)) {
-            val = deposit64((uint64_t)valhi, 32, 32, val);
-        } else {
-            val = deposit64(val, 32, 32, valhi);
-        }
-    }
-    if (val != env->exclusive_val) {
-        goto fail;
-    }
-
-    val = env->regs[(env->exclusive_info >> 8) & 0xf];
-    switch (size) {
-    case 0:
-        segv = put_user_u8(val, addr);
-        break;
-    case 1:
-        segv = put_user_data_u16(val, addr, env);
-        break;
-    case 2:
-    case 3:
-        segv = put_user_data_u32(val, addr, env);
-        break;
-    }
-    if (segv) {
-        env->exception.vaddress = addr;
-        goto done;
-    }
-    if (size == 3) {
-        val = env->regs[(env->exclusive_info >> 12) & 0xf];
-        segv = put_user_data_u32(val, addr + 4, env);
-        if (segv) {
-            env->exception.vaddress = addr + 4;
-            goto done;
-        }
-    }
-    rc = 0;
-fail:
-    env->regs[15] += 4;
-    env->regs[(env->exclusive_info >> 4) & 0xf] = rc;
-done:
-    end_exclusive();
-    return segv;
-}
-
 void cpu_loop(CPUARMState *env)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -737,6 +566,8 @@ void cpu_loop(CPUARMState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case EXCP_UDEF:
             {
@@ -755,7 +586,7 @@ void cpu_loop(CPUARMState *env)
                     info.si_errno = 0;
                     info.si_code = TARGET_ILL_ILLOPN;
                     info._sifields._sigfault._addr = env->regs[15];
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 } else if (rc < 0) { /* FP exception */
                     int arm_fpe=0;
 
@@ -786,7 +617,7 @@ void cpu_loop(CPUARMState *env)
                       if (arm_fpe & BIT_IOC) info.si_code = TARGET_FPE_FLTINV;
 
                       info._sifields._sigfault._addr = env->regs[15];
-                      queue_signal(env, info.si_signo, &info);
+                      queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                     } else {
                       env->regs[15] += 4;
                     }
@@ -890,14 +721,12 @@ void cpu_loop(CPUARMState *env)
                 }
             }
             break;
+        case EXCP_SEMIHOST:
+            env->regs[0] = do_arm_semihosting(env);
+            break;
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
             break;
-        case EXCP_STREX:
-            if (!do_strex(env)) {
-                break;
-            }
-            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
         case EXCP_DATA_ABORT:
             addr = env->exception.vaddress;
@@ -907,7 +736,7 @@ void cpu_loop(CPUARMState *env)
                 /* XXX: check env->error_code */
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = addr;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_DEBUG:
@@ -921,7 +750,7 @@ void cpu_loop(CPUARMState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
@@ -932,6 +761,9 @@ void cpu_loop(CPUARMState *env)
         case EXCP_YIELD:
             /* nothing to do here for user-mode, just resume guest code */
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
         error:
             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
@@ -943,124 +775,6 @@ void cpu_loop(CPUARMState *env)
 
 #else
 
-/*
- * Handle AArch64 store-release exclusive
- *
- * rs = gets the status result of store exclusive
- * rt = is the register that is stored
- * rt2 = is the second register store (in STP)
- *
- */
-static int do_strex_a64(CPUARMState *env)
-{
-    uint64_t val;
-    int size;
-    bool is_pair;
-    int rc = 1;
-    int segv = 0;
-    uint64_t addr;
-    int rs, rt, rt2;
-
-    start_exclusive();
-    /* size | is_pair << 2 | (rs << 4) | (rt << 9) | (rt2 << 14)); */
-    size = extract32(env->exclusive_info, 0, 2);
-    is_pair = extract32(env->exclusive_info, 2, 1);
-    rs = extract32(env->exclusive_info, 4, 5);
-    rt = extract32(env->exclusive_info, 9, 5);
-    rt2 = extract32(env->exclusive_info, 14, 5);
-
-    addr = env->exclusive_addr;
-
-    if (addr != env->exclusive_test) {
-        goto finish;
-    }
-
-    switch (size) {
-    case 0:
-        segv = get_user_u8(val, addr);
-        break;
-    case 1:
-        segv = get_user_u16(val, addr);
-        break;
-    case 2:
-        segv = get_user_u32(val, addr);
-        break;
-    case 3:
-        segv = get_user_u64(val, addr);
-        break;
-    default:
-        abort();
-    }
-    if (segv) {
-        env->exception.vaddress = addr;
-        goto error;
-    }
-    if (val != env->exclusive_val) {
-        goto finish;
-    }
-    if (is_pair) {
-        if (size == 2) {
-            segv = get_user_u32(val, addr + 4);
-        } else {
-            segv = get_user_u64(val, addr + 8);
-        }
-        if (segv) {
-            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-            goto error;
-        }
-        if (val != env->exclusive_high) {
-            goto finish;
-        }
-    }
-    /* handle the zero register */
-    val = rt == 31 ? 0 : env->xregs[rt];
-    switch (size) {
-    case 0:
-        segv = put_user_u8(val, addr);
-        break;
-    case 1:
-        segv = put_user_u16(val, addr);
-        break;
-    case 2:
-        segv = put_user_u32(val, addr);
-        break;
-    case 3:
-        segv = put_user_u64(val, addr);
-        break;
-    }
-    if (segv) {
-        goto error;
-    }
-    if (is_pair) {
-        /* handle the zero register */
-        val = rt2 == 31 ? 0 : env->xregs[rt2];
-        if (size == 2) {
-            segv = put_user_u32(val, addr + 4);
-        } else {
-            segv = put_user_u64(val, addr + 8);
-        }
-        if (segv) {
-            env->exception.vaddress = addr + (size == 2 ? 4 : 8);
-            goto error;
-        }
-    }
-    rc = 0;
-finish:
-    env->pc += 4;
-    /* rs == 31 encodes a write to the ZR, thus throwing away
-     * the status return. This is rather silly but valid.
-     */
-    if (rs < 31) {
-        env->xregs[rs] = rc;
-    }
-error:
-    /* instruction faulted, PC does not advance */
-    /* either way a strex releases any exclusive lock we have */
-    env->exclusive_addr = -1;
-    end_exclusive();
-    return segv;
-}
-
 /* AArch64 main loop */
 void cpu_loop(CPUARMState *env)
 {
@@ -1073,6 +787,7 @@ void cpu_loop(CPUARMState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
 
         switch (trapnr) {
         case EXCP_SWI:
@@ -1099,13 +814,8 @@ void cpu_loop(CPUARMState *env)
             info.si_errno = 0;
             info.si_code = TARGET_ILL_ILLOPN;
             info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
-        case EXCP_STREX:
-            if (!do_strex_a64(env)) {
-                break;
-            }
-            /* fall through for segv */
         case EXCP_PREFETCH_ABORT:
         case EXCP_DATA_ABORT:
             info.si_signo = TARGET_SIGSEGV;
@@ -1113,7 +823,7 @@ void cpu_loop(CPUARMState *env)
             /* XXX: check env->error_code */
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = env->exception.vaddress;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_DEBUG:
         case EXCP_BKPT:
@@ -1122,7 +832,7 @@ void cpu_loop(CPUARMState *env)
                 info.si_signo = sig;
                 info.si_errno = 0;
                 info.si_code = TARGET_TRAP_BRKPT;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_SEMIHOST:
@@ -1131,6 +841,9 @@ void cpu_loop(CPUARMState *env)
         case EXCP_YIELD:
             /* nothing to do here for user-mode, just resume guest code */
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
             abort();
@@ -1138,8 +851,6 @@ void cpu_loop(CPUARMState *env)
         process_pending_signals(env);
         /* Exception return on AArch64 always clears the exclusive monitor,
          * so any return to running guest code implies this.
-         * A strex (successful or otherwise) also clears the monitor, so
-         * we don't need to specialcase EXCP_STREX.
          */
         env->exclusive_addr = -1;
     }
@@ -1161,6 +872,8 @@ void cpu_loop(CPUUniCore32State *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch (trapnr) {
         case UC32_EXCP_PRIV:
             {
@@ -1202,7 +915,7 @@ void cpu_loop(CPUUniCore32State *env)
             /* XXX: check env->error_code */
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = env->cp0.c4_faultaddr;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
@@ -1216,10 +929,13 @@ void cpu_loop(CPUUniCore32State *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             goto error;
         }
@@ -1366,6 +1082,7 @@ void cpu_loop (CPUSPARCState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
 
         /* Compute PSR before exposing state.  */
         if (env->cc_op != CC_OP_FLAGS) {
@@ -1431,7 +1148,7 @@ void cpu_loop (CPUSPARCState *env)
                 /* XXX: check env->error_code */
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = env->mmuregs[4];
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
 #else
@@ -1452,7 +1169,7 @@ void cpu_loop (CPUSPARCState *env)
                     info._sifields._sigfault._addr = env->dmmuregs[4];
                 else
                     info._sifields._sigfault._addr = cpu_tsptr(env)->tpc;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
 #ifndef TARGET_ABI32
@@ -1475,7 +1192,7 @@ void cpu_loop (CPUSPARCState *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_ILL_ILLOPC;
                 info._sifields._sigfault._addr = env->pc;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_DEBUG:
@@ -1488,10 +1205,13 @@ void cpu_loop (CPUSPARCState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -1638,6 +1358,8 @@ void cpu_loop(CPUPPCState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case POWERPC_EXCP_NONE:
             /* Just go on */
@@ -1651,11 +1373,10 @@ void cpu_loop(CPUPPCState *env)
                       "Aborting\n");
             break;
         case POWERPC_EXCP_DSI:      /* Data storage exception                */
-            EXCP_DUMP(env, "Invalid data memory access: 0x" TARGET_FMT_lx "\n",
-                      env->spr[SPR_DAR]);
             /* XXX: check this. Seems bugged */
             switch (env->error_code & 0xFF000000) {
             case 0x40000000:
+            case 0x42000000:
                 info.si_signo = TARGET_SIGSEGV;
                 info.si_errno = 0;
                 info.si_code = TARGET_SEGV_MAPERR;
@@ -1680,11 +1401,9 @@ void cpu_loop(CPUPPCState *env)
                 break;
             }
             info._sifields._sigfault._addr = env->nip;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_ISI:      /* Instruction storage exception         */
-            EXCP_DUMP(env, "Invalid instruction fetch: 0x\n" TARGET_FMT_lx
-                      "\n", env->spr[SPR_SRR0]);
             /* XXX: check this */
             switch (env->error_code & 0xFF000000) {
             case 0x40000000:
@@ -1708,27 +1427,25 @@ void cpu_loop(CPUPPCState *env)
                 break;
             }
             info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_EXTERNAL: /* External input                        */
             cpu_abort(cs, "External interrupt while in user mode. "
                       "Aborting\n");
             break;
         case POWERPC_EXCP_ALIGN:    /* Alignment exception                   */
-            EXCP_DUMP(env, "Unaligned memory access\n");
             /* XXX: check this */
             info.si_signo = TARGET_SIGBUS;
             info.si_errno = 0;
             info.si_code = TARGET_BUS_ADRALN;
             info._sifields._sigfault._addr = env->nip;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_PROGRAM:  /* Program exception                     */
         case POWERPC_EXCP_HV_EMU:   /* HV emulation                          */
             /* XXX: check this */
             switch (env->error_code & ~0xF) {
             case POWERPC_EXCP_FP:
-                EXCP_DUMP(env, "Floating point program exception\n");
                 info.si_signo = TARGET_SIGFPE;
                 info.si_errno = 0;
                 switch (env->error_code & 0xF) {
@@ -1764,7 +1481,6 @@ void cpu_loop(CPUPPCState *env)
                 }
                 break;
             case POWERPC_EXCP_INVAL:
-                EXCP_DUMP(env, "Invalid instruction\n");
                 info.si_signo = TARGET_SIGILL;
                 info.si_errno = 0;
                 switch (env->error_code & 0xF) {
@@ -1788,7 +1504,6 @@ void cpu_loop(CPUPPCState *env)
                 }
                 break;
             case POWERPC_EXCP_PRIV:
-                EXCP_DUMP(env, "Privilege violation\n");
                 info.si_signo = TARGET_SIGILL;
                 info.si_errno = 0;
                 switch (env->error_code & 0xF) {
@@ -1814,28 +1529,26 @@ void cpu_loop(CPUPPCState *env)
                           env->error_code);
                 break;
             }
-            info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            info._sifields._sigfault._addr = env->nip;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_FPU:      /* Floating-point unavailable exception  */
-            EXCP_DUMP(env, "No floating point allowed\n");
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = TARGET_ILL_COPROC;
-            info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            info._sifields._sigfault._addr = env->nip;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_SYSCALL:  /* System call exception                 */
             cpu_abort(cs, "Syscall exception while in user mode. "
                       "Aborting\n");
             break;
         case POWERPC_EXCP_APU:      /* Auxiliary processor unavailable       */
-            EXCP_DUMP(env, "No APU instruction allowed\n");
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = TARGET_ILL_COPROC;
-            info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            info._sifields._sigfault._addr = env->nip;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_DECR:     /* Decrementer exception                 */
             cpu_abort(cs, "Decrementer interrupt while in user mode. "
@@ -1858,12 +1571,11 @@ void cpu_loop(CPUPPCState *env)
                       "Aborting\n");
             break;
         case POWERPC_EXCP_SPEU:     /* SPE/embedded floating-point unavail.  */
-            EXCP_DUMP(env, "No SPE/floating-point instruction allowed\n");
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = TARGET_ILL_COPROC;
-            info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            info._sifields._sigfault._addr = env->nip;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_EFPDI:    /* Embedded floating-point data IRQ      */
             cpu_abort(cs, "Embedded floating-point data IRQ not handled\n");
@@ -1922,12 +1634,11 @@ void cpu_loop(CPUPPCState *env)
                       "while in user mode. Aborting\n");
             break;
         case POWERPC_EXCP_VPU:      /* Vector unavailable exception          */
-            EXCP_DUMP(env, "No Altivec instructions allowed\n");
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = TARGET_ILL_COPROC;
-            info._sifields._sigfault._addr = env->nip - 4;
-            queue_signal(env, info.si_signo, &info);
+            info._sifields._sigfault._addr = env->nip;
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case POWERPC_EXCP_PIT:      /* Programmable interval timer IRQ       */
             cpu_abort(cs, "Programmable interval timer interrupt "
@@ -2001,7 +1712,6 @@ void cpu_loop(CPUPPCState *env)
                              env->gpr[5], env->gpr[6], env->gpr[7],
                              env->gpr[8], 0, 0);
             if (ret == -TARGET_ERESTARTSYS) {
-                env->nip -= 4;
                 break;
             }
             if (ret == (target_ulong)(-TARGET_QEMU_ESIGRETURN)) {
@@ -2009,6 +1719,7 @@ void cpu_loop(CPUPPCState *env)
                    Avoid corrupting register state.  */
                 break;
             }
+            env->nip += 4;
             if (ret > (target_ulong)(-515)) {
                 env->crf[0] |= 0x1;
                 ret = -ret;
@@ -2021,7 +1732,7 @@ void cpu_loop(CPUPPCState *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = env->nip;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_DEBUG:
@@ -2033,15 +1744,18 @@ void cpu_loop(CPUPPCState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
-            cpu_abort(cs, "Unknown exception 0x%d. Aborting\n", trapnr);
+            cpu_abort(cs, "Unknown exception 0x%x. Aborting\n", trapnr);
             break;
         }
         process_pending_signals(env);
@@ -2384,8 +2098,8 @@ static const uint8_t mips_syscall_args[] = {
         MIPS_SYS(sys_dup3, 3)
         MIPS_SYS(sys_pipe2, 2)
         MIPS_SYS(sys_inotify_init1, 1)
-        MIPS_SYS(sys_preadv, 6)         /* 4330 */
-        MIPS_SYS(sys_pwritev, 6)
+        MIPS_SYS(sys_preadv, 5)         /* 4330 */
+        MIPS_SYS(sys_pwritev, 5)
         MIPS_SYS(sys_rt_tgsigqueueinfo, 4)
         MIPS_SYS(sys_perf_event_open, 5)
         MIPS_SYS(sys_accept4, 4)
@@ -2397,6 +2111,26 @@ static const uint8_t mips_syscall_args[] = {
         MIPS_SYS(sys_open_by_handle_at, 3) /* 4340 */
         MIPS_SYS(sys_clock_adjtime, 2)
         MIPS_SYS(sys_syncfs, 1)
+        MIPS_SYS(sys_sendmmsg, 4)
+        MIPS_SYS(sys_setns, 2)
+        MIPS_SYS(sys_process_vm_readv, 6) /* 345 */
+        MIPS_SYS(sys_process_vm_writev, 6)
+        MIPS_SYS(sys_kcmp, 5)
+        MIPS_SYS(sys_finit_module, 3)
+        MIPS_SYS(sys_sched_setattr, 2)
+        MIPS_SYS(sys_sched_getattr, 3)  /* 350 */
+        MIPS_SYS(sys_renameat2, 5)
+        MIPS_SYS(sys_seccomp, 3)
+        MIPS_SYS(sys_getrandom, 3)
+        MIPS_SYS(sys_memfd_create, 2)
+        MIPS_SYS(sys_bpf, 3)            /* 355 */
+        MIPS_SYS(sys_execveat, 5)
+        MIPS_SYS(sys_userfaultfd, 1)
+        MIPS_SYS(sys_membarrier, 2)
+        MIPS_SYS(sys_mlock2, 3)
+        MIPS_SYS(sys_copy_file_range, 6) /* 360 */
+        MIPS_SYS(sys_preadv2, 6)
+        MIPS_SYS(sys_pwritev2, 6)
 };
 #  undef MIPS_SYS
 # endif /* O32 */
@@ -2467,13 +2201,13 @@ static int do_break(CPUMIPSState *env, target_siginfo_t *info,
         info->si_signo = TARGET_SIGFPE;
         info->si_errno = 0;
         info->si_code = (code == BRK_OVERFLOW) ? FPE_INTOVF : FPE_INTDIV;
-        queue_signal(env, info->si_signo, &*info);
+        queue_signal(env, info->si_signo, QEMU_SI_FAULT, &*info);
         ret = 0;
         break;
     default:
         info->si_signo = TARGET_SIGTRAP;
         info->si_errno = 0;
-        queue_signal(env, info->si_signo, &*info);
+        queue_signal(env, info->si_signo, QEMU_SI_FAULT, &*info);
         ret = 0;
         break;
     }
@@ -2495,6 +2229,8 @@ void cpu_loop(CPUMIPSState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case EXCP_SYSCALL:
             env->active_tc.PC += 4;
@@ -2571,14 +2307,14 @@ void cpu_loop(CPUMIPSState *env)
             /* XXX: check env->error_code */
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = env->CP0_BadVAddr;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_CpU:
         case EXCP_RI:
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = 0;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_INTERRUPT:
             /* just indicate that signals should be handled asap */
@@ -2593,7 +2329,7 @@ void cpu_loop(CPUMIPSState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
@@ -2603,14 +2339,14 @@ void cpu_loop(CPUMIPSState *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = env->active_tc.PC;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_DSPDIS:
             info.si_signo = TARGET_SIGILL;
             info.si_errno = 0;
             info.si_code = TARGET_ILL_ILLOPC;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         /* The code below was inspired by the MIPS Linux kernel trap
          * handling code in arch/mips/kernel/traps.c.
@@ -2713,6 +2449,9 @@ void cpu_loop(CPUMIPSState *env)
                 }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
 error:
             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
@@ -2735,6 +2474,7 @@ void cpu_loop(CPUOpenRISCState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
         gdbsig = 0;
 
         switch (trapnr) {
@@ -2799,6 +2539,9 @@ void cpu_loop(CPUOpenRISCState *env)
         case EXCP_NR:
             qemu_log_mask(CPU_LOG_INT, "\nNR\n");
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
                      trapnr);
@@ -2829,6 +2572,7 @@ void cpu_loop(CPUSH4State *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
 
         switch (trapnr) {
         case 0x160:
@@ -2861,7 +2605,7 @@ void cpu_loop(CPUSH4State *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
@@ -2871,9 +2615,12 @@ void cpu_loop(CPUSH4State *env)
             info.si_errno = 0;
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = env->tea;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 	    break;
 
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -2895,6 +2642,8 @@ void cpu_loop(CPUCRISState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch (trapnr) {
         case 0xaa:
             {
@@ -2903,7 +2652,7 @@ void cpu_loop(CPUCRISState *env)
                 /* XXX: check env->error_code */
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = env->pregs[PR_EDA];
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
 	case EXCP_INTERRUPT:
@@ -2935,10 +2684,13 @@ void cpu_loop(CPUCRISState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -2960,6 +2712,8 @@ void cpu_loop(CPUMBState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch (trapnr) {
         case 0xaa:
             {
@@ -2968,7 +2722,7 @@ void cpu_loop(CPUMBState *env)
                 /* XXX: check env->error_code */
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = 0;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
 	case EXCP_INTERRUPT:
@@ -3017,7 +2771,7 @@ void cpu_loop(CPUMBState *env)
                     info.si_errno = 0;
                     info.si_code = TARGET_FPE_FLTDIV;
                     info._sifields._sigfault._addr = 0;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                     break;
                 case ESR_EC_FPU:
                     info.si_signo = TARGET_SIGFPE;
@@ -3029,7 +2783,7 @@ void cpu_loop(CPUMBState *env)
                         info.si_code = TARGET_FPE_FLTDIV;
                     }
                     info._sifields._sigfault._addr = 0;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                     break;
                 default:
                     printf ("Unhandled hw-exception: 0x%x\n",
@@ -3049,10 +2803,13 @@ void cpu_loop(CPUMBState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -3077,6 +2834,8 @@ void cpu_loop(CPUM68KState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch(trapnr) {
         case EXCP_ILLEGAL:
             {
@@ -3103,7 +2862,7 @@ void cpu_loop(CPUM68KState *env)
             info.si_errno = 0;
             info.si_code = TARGET_ILL_ILLOPN;
             info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_TRAP0:
             {
@@ -3137,7 +2896,7 @@ void cpu_loop(CPUM68KState *env)
                 /* XXX: check env->error_code */
                 info.si_code = TARGET_SEGV_MAPERR;
                 info._sifields._sigfault._addr = env->mmu.ar;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
         case EXCP_DEBUG:
@@ -3150,10 +2909,13 @@ void cpu_loop(CPUM68KState *env)
                     info.si_signo = sig;
                     info.si_errno = 0;
                     info.si_code = TARGET_TRAP_BRKPT;
-                    queue_signal(env, info.si_signo, &info);
+                    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                   }
             }
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
             abort();
@@ -3164,51 +2926,6 @@ void cpu_loop(CPUM68KState *env)
 #endif /* TARGET_M68K */
 
 #ifdef TARGET_ALPHA
-static void do_store_exclusive(CPUAlphaState *env, int reg, int quad)
-{
-    target_ulong addr, val, tmp;
-    target_siginfo_t info;
-    int ret = 0;
-
-    addr = env->lock_addr;
-    tmp = env->lock_st_addr;
-    env->lock_addr = -1;
-    env->lock_st_addr = 0;
-
-    start_exclusive();
-    mmap_lock();
-
-    if (addr == tmp) {
-        if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) {
-            goto do_sigsegv;
-        }
-
-        if (val == env->lock_value) {
-            tmp = env->ir[reg];
-            if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) {
-                goto do_sigsegv;
-            }
-            ret = 1;
-        }
-    }
-    env->ir[reg] = ret;
-    env->pc += 4;
-
-    mmap_unlock();
-    end_exclusive();
-    return;
-
- do_sigsegv:
-    mmap_unlock();
-    end_exclusive();
-
-    info.si_signo = TARGET_SIGSEGV;
-    info.si_errno = 0;
-    info.si_code = TARGET_SEGV_MAPERR;
-    info._sifields._sigfault._addr = addr;
-    queue_signal(env, TARGET_SIGSEGV, &info);
-}
-
 void cpu_loop(CPUAlphaState *env)
 {
     CPUState *cs = CPU(alpha_env_get_cpu(env));
@@ -3220,6 +2937,7 @@ void cpu_loop(CPUAlphaState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
 
         /* All of the traps imply a transition through PALcode, which
            implies an REI instruction has been executed.  Which means
@@ -3248,7 +2966,7 @@ void cpu_loop(CPUAlphaState *env)
             info.si_code = (page_get_flags(env->trap_arg0) & PAGE_VALID
                             ? TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR);
             info._sifields._sigfault._addr = env->trap_arg0;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_UNALIGN:
             env->lock_addr = -1;
@@ -3256,7 +2974,7 @@ void cpu_loop(CPUAlphaState *env)
             info.si_errno = 0;
             info.si_code = TARGET_BUS_ADRALN;
             info._sifields._sigfault._addr = env->trap_arg0;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_OPCDEC:
         do_sigill:
@@ -3265,7 +2983,7 @@ void cpu_loop(CPUAlphaState *env)
             info.si_errno = 0;
             info.si_code = TARGET_ILL_ILLOPC;
             info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_ARITH:
             env->lock_addr = -1;
@@ -3273,7 +2991,7 @@ void cpu_loop(CPUAlphaState *env)
             info.si_errno = 0;
             info.si_code = TARGET_FPE_FLTINV;
             info._sifields._sigfault._addr = env->pc;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
         case EXCP_FEN:
             /* No-op.  Linux simply re-enables the FPU.  */
@@ -3287,7 +3005,7 @@ void cpu_loop(CPUAlphaState *env)
                 info.si_errno = 0;
                 info.si_code = TARGET_TRAP_BRKPT;
                 info._sifields._sigfault._addr = env->pc;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 break;
             case 0x81:
                 /* BUGCHK */
@@ -3295,7 +3013,7 @@ void cpu_loop(CPUAlphaState *env)
                 info.si_errno = 0;
                 info.si_code = 0;
                 info._sifields._sigfault._addr = env->pc;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 break;
             case 0x83:
                 /* CALLSYS */
@@ -3367,7 +3085,7 @@ void cpu_loop(CPUAlphaState *env)
                 }
                 info.si_errno = 0;
                 info._sifields._sigfault._addr = env->pc;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
                 break;
             default:
                 goto do_sigill;
@@ -3379,16 +3097,15 @@ void cpu_loop(CPUAlphaState *env)
                 env->lock_addr = -1;
                 info.si_errno = 0;
                 info.si_code = TARGET_TRAP_BRKPT;
-                queue_signal(env, info.si_signo, &info);
+                queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             }
             break;
-        case EXCP_STL_C:
-        case EXCP_STQ_C:
-            do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C);
-            break;
         case EXCP_INTERRUPT:
             /* Just indicate that signals should be handled asap.  */
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             printf ("Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -3412,6 +3129,8 @@ void cpu_loop(CPUS390XState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch (trapnr) {
         case EXCP_INTERRUPT:
             /* Just indicate that signals should be handled asap.  */
@@ -3513,9 +3232,12 @@ void cpu_loop(CPUS390XState *env)
             info.si_errno = 0;
             info.si_code = n;
             info._sifields._sigfault._addr = addr;
-            queue_signal(env, info.si_signo, &info);
+            queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
             break;
 
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
             cpu_dump_state(cs, stderr, fprintf, 0);
@@ -3537,7 +3259,7 @@ static void gen_sigill_reg(CPUTLGState *env)
     info.si_errno = 0;
     info.si_code = TARGET_ILL_PRVREG;
     info._sifields._sigfault._addr = env->pc;
-    queue_signal(env, info.si_signo, &info);
+    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 }
 
 static void do_signal(CPUTLGState *env, int signo, int sigcode)
@@ -3561,7 +3283,7 @@ static void do_signal(CPUTLGState *env, int signo, int sigcode)
     }
     info.si_code = sigcode;
 
-    queue_signal(env, info.si_signo, &info);
+    queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
 }
 
 static void gen_sigsegv_maperr(CPUTLGState *env, target_ulong addr)
@@ -3721,6 +3443,8 @@ void cpu_loop(CPUTLGState *env)
         cpu_exec_start(cs);
         trapnr = cpu_exec(cs);
         cpu_exec_end(cs);
+        process_queued_cpu_work(cs);
+
         switch (trapnr) {
         case TILEGX_EXCP_SYSCALL:
         {
@@ -3768,6 +3492,9 @@ void cpu_loop(CPUTLGState *env)
         case TILEGX_EXCP_REG_UDN_ACCESS:
             gen_sigill_reg(env);
             break;
+        case EXCP_ATOMIC:
+            cpu_exec_step_atomic(cs);
+            break;
         default:
             fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
             g_assert_not_reached();
@@ -3780,6 +3507,16 @@ void cpu_loop(CPUTLGState *env)
 
 THREAD CPUState *thread_cpu;
 
+bool qemu_cpu_is_self(CPUState *cpu)
+{
+    return thread_cpu == cpu;
+}
+
+void qemu_cpu_kick(CPUState *cpu)
+{
+    cpu_exit(cpu);
+}
+
 void task_settid(TaskState *ts)
 {
     if (ts->ts_tid == 0) {
@@ -4000,7 +3737,7 @@ static void handle_arg_strace(const char *arg)
 static void handle_arg_version(const char *arg)
 {
     printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
-           ", " QEMU_COPYRIGHT "\n");
+           "\n" QEMU_COPYRIGHT "\n");
     exit(EXIT_SUCCESS);
 }
 
@@ -4222,6 +3959,8 @@ int main(int argc, char **argv, char **envp)
     int ret;
     int execfd;
 
+    module_call_init(MODULE_INIT_TRACE);
+    qemu_init_cpu_list();
     module_call_init(MODULE_INIT_QOM);
 
     if ((envlist = envlist_create()) == NULL) {
@@ -4626,10 +4365,11 @@ int main(int argc, char **argv, char **envp)
         int i;
 
 #if defined(TARGET_PPC64)
+        int flag = (env->insns_flags2 & PPC2_BOOKE206) ? MSR_CM : MSR_SF;
 #if defined(TARGET_ABI32)
-        env->msr &= ~((target_ulong)1 << MSR_SF);
+        env->msr &= ~((target_ulong)1 << flag);
 #else
-        env->msr |= (target_ulong)1 << MSR_SF;
+        env->msr |= (target_ulong)1 << flag;
 #endif
 #endif
         env->nip = regs->nip;
@@ -4810,7 +4550,6 @@ int main(int argc, char **argv, char **envp)
         }
         gdb_handlesig(cpu, 0);
     }
-    trace_init_vcpu_events();
     cpu_loop(env);
     /* never exits */
     return 0;
diff --git a/linux-user/microblaze/target_syscall.h b/linux-user/microblaze/target_syscall.h
index 0b6980c899..4141cbaa5e 100644
--- a/linux-user/microblaze/target_syscall.h
+++ b/linux-user/microblaze/target_syscall.h
@@ -53,4 +53,6 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define TARGET_WANT_NI_OLD_SELECT
+
 #endif
diff --git a/linux-user/mips/syscall_nr.h b/linux-user/mips/syscall_nr.h
index 6819f865ed..ced32806ea 100644
--- a/linux-user/mips/syscall_nr.h
+++ b/linux-user/mips/syscall_nr.h
@@ -256,7 +256,7 @@
 #define TARGET_NR_remap_file_pages	(TARGET_NR_Linux + 251)
 #define TARGET_NR_set_tid_address	(TARGET_NR_Linux + 252)
 #define TARGET_NR_restart_syscall	(TARGET_NR_Linux + 253)
-#define TARGET_NR_fadvise64		(TARGET_NR_Linux + 254)
+#define TARGET_NR_fadvise64_64          (TARGET_NR_Linux + 254)
 #define TARGET_NR_statfs64		(TARGET_NR_Linux + 255)
 #define TARGET_NR_fstatfs64		(TARGET_NR_Linux + 256)
 #define TARGET_NR_timer_create		(TARGET_NR_Linux + 257)
diff --git a/linux-user/mips/target_structs.h b/linux-user/mips/target_structs.h
index fbd995581e..909ba89708 100644
--- a/linux-user/mips/target_structs.h
+++ b/linux-user/mips/target_structs.h
@@ -45,4 +45,20 @@ struct target_shmid_ds {
     abi_ulong __unused2;
 };
 
+#define TARGET_SEMID64_DS
+
+/*
+ * The semid64_ds structure for the MIPS architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ */
+struct target_semid64_ds {
+    struct target_ipc_perm sem_perm;
+    abi_ulong sem_otime;
+    abi_ulong sem_ctime;
+    abi_ulong sem_nsems;
+    abi_ulong __unused1;
+    abi_ulong __unused2;
+};
+
 #endif
diff --git a/linux-user/mips/target_syscall.h b/linux-user/mips/target_syscall.h
index 2b4f390729..0b64b73714 100644
--- a/linux-user/mips/target_syscall.h
+++ b/linux-user/mips/target_syscall.h
@@ -221,6 +221,8 @@ struct target_pt_regs {
 #undef TARGET_ENOTRECOVERABLE
 #define TARGET_ENOTRECOVERABLE 166     /* State not recoverable */
 
+#undef TARGET_EDQUOT
+#define TARGET_EDQUOT          1133    /* Quota exceeded */
 
 #define UNAME_MACHINE "mips"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
@@ -230,4 +232,11 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUMIPSState *env)
+{
+    return 0x40000;
+}
+
 #endif /* MIPS_TARGET_SYSCALL_H */
diff --git a/linux-user/mips/termbits.h b/linux-user/mips/termbits.h
index d3a6cf8f91..a0bcad0946 100644
--- a/linux-user/mips/termbits.h
+++ b/linux-user/mips/termbits.h
@@ -219,8 +219,20 @@ struct target_termios {
 #define TARGET_TIOCSBRK	0x5427  /* BSD compatibility */
 #define TARGET_TIOCCBRK	0x5428  /* BSD compatibility */
 #define TARGET_TIOCGSID	0x7416  /* Return the session ID of FD */
+#define TARGET_TCGETS2          TARGET_IOR('T', 0x2A, struct termios2)
+#define TARGET_TCSETS2          TARGET_IOW('T', 0x2B, struct termios2)
+#define TARGET_TCSETSW2         TARGET_IOW('T', 0x2C, struct termios2)
+#define TARGET_TCSETSF2         TARGET_IOW('T', 0x2D, struct termios2)
+#define TARGET_TIOCGRS485       TARGET_IOR('T', 0x2E, struct serial_rs485)
+#define TARGET_TIOCSRS485       TARGET_IOWR('T', 0x2F, struct serial_rs485)
 #define TARGET_TIOCGPTN	TARGET_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
 #define TARGET_TIOCSPTLCK	TARGET_IOW('T',0x31, int)  /* Lock/unlock Pty */
+#define TARGET_TIOCGDEV         TARGET_IOR('T', 0x32, unsigned int)
+#define TARGET_TIOCSIG          TARGET_IOW('T', 0x36, int)
+#define TARGET_TIOCVHANGUP      0x5437
+#define TARGET_TIOCGPKT         TARGET_IOR('T', 0x38, int)
+#define TARGET_TIOCGPTLCK       TARGET_IOR('T', 0x39, int)
+#define TARGET_TIOCGEXCL        TARGET_IOR('T', 0x40, int)
 
 /* I hope the range from 0x5480 on is free ... */
 #define TARGET_TIOCSCTTY	0x5480		/* become controlling tty */
diff --git a/linux-user/mips64/target_syscall.h b/linux-user/mips64/target_syscall.h
index 8da9c1f9cc..6692917e2e 100644
--- a/linux-user/mips64/target_syscall.h
+++ b/linux-user/mips64/target_syscall.h
@@ -218,6 +218,8 @@ struct target_pt_regs {
 #undef TARGET_ENOTRECOVERABLE
 #define TARGET_ENOTRECOVERABLE 166     /* State not recoverable */
 
+#undef TARGET_EDQUOT
+#define TARGET_EDQUOT          1133    /* Quota exceeded */
 
 #define UNAME_MACHINE "mips64"
 #define UNAME_MINIMUM_RELEASE "2.6.32"
@@ -227,4 +229,11 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUMIPSState *env)
+{
+    return 0x40000;
+}
+
 #endif /* MIPS64_TARGET_SYSCALL_H */
diff --git a/linux-user/mips64/termbits.h b/linux-user/mips64/termbits.h
index d3a6cf8f91..d0a271e1d4 100644
--- a/linux-user/mips64/termbits.h
+++ b/linux-user/mips64/termbits.h
@@ -1,245 +1,2 @@
-/* from asm/termbits.h */
+#include "../mips/termbits.h"
 
-#define TARGET_NCCS 23
-
-struct target_termios {
-    unsigned int c_iflag;               /* input mode flags */
-    unsigned int c_oflag;               /* output mode flags */
-    unsigned int c_cflag;               /* control mode flags */
-    unsigned int c_lflag;               /* local mode flags */
-    unsigned char c_line;                    /* line discipline */
-    unsigned char c_cc[TARGET_NCCS];                /* control characters */
-};
-
-/* c_iflag bits */
-#define TARGET_IGNBRK  0000001
-#define TARGET_BRKINT  0000002
-#define TARGET_IGNPAR  0000004
-#define TARGET_PARMRK  0000010
-#define TARGET_INPCK   0000020
-#define TARGET_ISTRIP  0000040
-#define TARGET_INLCR   0000100
-#define TARGET_IGNCR   0000200
-#define TARGET_ICRNL   0000400
-#define TARGET_IUCLC   0001000
-#define TARGET_IXON    0002000
-#define TARGET_IXANY   0004000
-#define TARGET_IXOFF   0010000
-#define TARGET_IMAXBEL 0020000
-#define TARGET_IUTF8   0040000
-
-/* c_oflag bits */
-#define TARGET_OPOST   0000001
-#define TARGET_OLCUC   0000002
-#define TARGET_ONLCR   0000004
-#define TARGET_OCRNL   0000010
-#define TARGET_ONOCR   0000020
-#define TARGET_ONLRET  0000040
-#define TARGET_OFILL   0000100
-#define TARGET_OFDEL   0000200
-#define TARGET_NLDLY   0000400
-#define   TARGET_NL0   0000000
-#define   TARGET_NL1   0000400
-#define TARGET_CRDLY   0003000
-#define   TARGET_CR0   0000000
-#define   TARGET_CR1   0001000
-#define   TARGET_CR2   0002000
-#define   TARGET_CR3   0003000
-#define TARGET_TABDLY  0014000
-#define   TARGET_TAB0  0000000
-#define   TARGET_TAB1  0004000
-#define   TARGET_TAB2  0010000
-#define   TARGET_TAB3  0014000
-#define   TARGET_XTABS 0014000
-#define TARGET_BSDLY   0020000
-#define   TARGET_BS0   0000000
-#define   TARGET_BS1   0020000
-#define TARGET_VTDLY   0040000
-#define   TARGET_VT0   0000000
-#define   TARGET_VT1   0040000
-#define TARGET_FFDLY   0100000
-#define   TARGET_FF0   0000000
-#define   TARGET_FF1   0100000
-
-/* c_cflag bit meaning */
-#define TARGET_CBAUD   0010017
-#define  TARGET_B0     0000000         /* hang up */
-#define  TARGET_B50    0000001
-#define  TARGET_B75    0000002
-#define  TARGET_B110   0000003
-#define  TARGET_B134   0000004
-#define  TARGET_B150   0000005
-#define  TARGET_B200   0000006
-#define  TARGET_B300   0000007
-#define  TARGET_B600   0000010
-#define  TARGET_B1200  0000011
-#define  TARGET_B1800  0000012
-#define  TARGET_B2400  0000013
-#define  TARGET_B4800  0000014
-#define  TARGET_B9600  0000015
-#define  TARGET_B19200 0000016
-#define  TARGET_B38400 0000017
-#define TARGET_EXTA B19200
-#define TARGET_EXTB B38400
-#define TARGET_CSIZE   0000060
-#define   TARGET_CS5   0000000
-#define   TARGET_CS6   0000020
-#define   TARGET_CS7   0000040
-#define   TARGET_CS8   0000060
-#define TARGET_CSTOPB  0000100
-#define TARGET_CREAD   0000200
-#define TARGET_PARENB  0000400
-#define TARGET_PARODD  0001000
-#define TARGET_HUPCL   0002000
-#define TARGET_CLOCAL  0004000
-#define TARGET_CBAUDEX 0010000
-#define  TARGET_BOTHER   0010000
-#define  TARGET_B57600   0010001
-#define  TARGET_B115200  0010002
-#define  TARGET_B230400  0010003
-#define  TARGET_B460800  0010004
-#define  TARGET_B500000  0010005
-#define  TARGET_B576000  0010006
-#define  TARGET_B921600  0010007
-#define  TARGET_B1000000 0010010
-#define  TARGET_B1152000 0010011
-#define  TARGET_B1500000 0010012
-#define  TARGET_B2000000 0010013
-#define  TARGET_B2500000 0010014
-#define  TARGET_B3000000 0010015
-#define  TARGET_B3500000 0010016
-#define  TARGET_B4000000 0010017
-#define TARGET_CIBAUD    002003600000  /* input baud rate (not used) */
-#define TARGET_CMSPAR    010000000000  /* mark or space (stick) parity */
-#define TARGET_CRTSCTS   020000000000  /* flow control */
-
-/* c_lflag bits */
-#define TARGET_ISIG    0000001
-#define TARGET_ICANON  0000002
-#define TARGET_XCASE   0000004
-#define TARGET_ECHO    0000010
-#define TARGET_ECHOE   0000020
-#define TARGET_ECHOK   0000040
-#define TARGET_ECHONL  0000100
-#define TARGET_NOFLSH  0000200
-#define TARGET_IEXTEN  0000400
-#define TARGET_ECHOCTL 0001000
-#define TARGET_ECHOPRT 0002000
-#define TARGET_ECHOKE  0004000
-#define TARGET_FLUSHO  0010000
-#define TARGET_PENDIN  0040000
-#define TARGET_TOSTOP  0100000
-#define TARGET_ITOSTOP TARGET_TOSTOP
-
-/* c_cc character offsets */
-#define TARGET_VINTR	0
-#define TARGET_VQUIT	1
-#define TARGET_VERASE	2
-#define TARGET_VKILL	3
-#define TARGET_VMIN	4
-#define TARGET_VTIME	5
-#define TARGET_VEOL2	6
-#define TARGET_VSWTC	7
-#define TARGET_VSTART	8
-#define TARGET_VSTOP	9
-#define TARGET_VSUSP	10
-/* VDSUSP not supported */
-#define TARGET_VREPRINT	12
-#define TARGET_VDISCARD	13
-#define TARGET_VWERASE	14
-#define TARGET_VLNEXT	15
-#define TARGET_VEOF	16
-#define TARGET_VEOL	17
-
-/* ioctls */
-
-#define TARGET_TCGETA		0x5401
-#define TARGET_TCSETA		0x5402	/* Clashes with SNDCTL_TMR_START sound ioctl */
-#define TARGET_TCSETAW		0x5403
-#define TARGET_TCSETAF		0x5404
-
-#define TARGET_TCSBRK		0x5405
-#define TARGET_TCXONC		0x5406
-#define TARGET_TCFLSH		0x5407
-
-#define TARGET_TCGETS		0x540d
-#define TARGET_TCSETS		0x540e
-#define TARGET_TCSETSW		0x540f
-#define TARGET_TCSETSF		0x5410
-
-#define TARGET_TIOCEXCL	0x740d		/* set exclusive use of tty */
-#define TARGET_TIOCNXCL	0x740e		/* reset exclusive use of tty */
-#define TARGET_TIOCOUTQ	0x7472		/* output queue size */
-#define TARGET_TIOCSTI	0x5472		/* simulate terminal input */
-#define TARGET_TIOCMGET	0x741d		/* get all modem bits */
-#define TARGET_TIOCMBIS	0x741b		/* bis modem bits */
-#define TARGET_TIOCMBIC	0x741c		/* bic modem bits */
-#define TARGET_TIOCMSET	0x741a		/* set all modem bits */
-#define TARGET_TIOCPKT		0x5470		/* pty: set/clear packet mode */
-#define	 TARGET_TIOCPKT_DATA		0x00	/* data packet */
-#define	 TARGET_TIOCPKT_FLUSHREAD	0x01	/* flush packet */
-#define	 TARGET_TIOCPKT_FLUSHWRITE	0x02	/* flush packet */
-#define	 TARGET_TIOCPKT_STOP		0x04	/* stop output */
-#define	 TARGET_TIOCPKT_START		0x08	/* start output */
-#define	 TARGET_TIOCPKT_NOSTOP		0x10	/* no more ^S, ^Q */
-#define	 TARGET_TIOCPKT_DOSTOP		0x20	/* now do ^S ^Q */
-/* #define  TIOCPKT_IOCTL		0x40	state change of pty driver */
-#define TARGET_TIOCSWINSZ	TARGET_IOW('t', 103, struct winsize)	/* set window size */
-#define TARGET_TIOCGWINSZ	TARGET_IOR('t', 104, struct winsize)	/* get window size */
-#define TARGET_TIOCNOTTY	0x5471		/* void tty association */
-#define TARGET_TIOCSETD	0x7401
-#define TARGET_TIOCGETD	0x7400
-
-#define TARGET_FIOCLEX		0x6601
-#define TARGET_FIONCLEX	0x6602
-#define TARGET_FIOASYNC	0x667d
-#define TARGET_FIONBIO		0x667e
-#define TARGET_FIOQSIZE	0x667f
-
-#define TARGET_TIOCGLTC	0x7474			/* get special local chars */
-#define TARGET_TIOCSLTC	0x7475			/* set special local chars */
-#define TARGET_TIOCSPGRP	TARGET_IOW('t', 118, int)	/* set pgrp of tty */
-#define TARGET_TIOCGPGRP	TARGET_IOR('t', 119, int)	/* get pgrp of tty */
-#define TARGET_TIOCCONS	TARGET_IOW('t', 120, int)	/* become virtual console */
-
-#define TARGET_FIONREAD	0x467f
-#define TARGET_TIOCINQ		TARGET_FIONREAD
-
-#define TARGET_TIOCGETP        0x7408
-#define TARGET_TIOCSETP        0x7409
-#define TARGET_TIOCSETN        0x740a			/* TIOCSETP wo flush */
-
-/* #define TARGET_TIOCSETA	TARGET_IOW('t', 20, struct termios) set termios struct */
-/* #define TARGET_TIOCSETAW	TARGET_IOW('t', 21, struct termios) drain output, set */
-/* #define TARGET_TIOCSETAF	TARGET_IOW('t', 22, struct termios) drn out, fls in, set */
-/* #define TARGET_TIOCGETD	TARGET_IOR('t', 26, int)	get line discipline */
-/* #define TARGET_TIOCSETD	TARGET_IOW('t', 27, int)	set line discipline */
-						/* 127-124 compat */
-
-#define TARGET_TIOCSBRK	0x5427  /* BSD compatibility */
-#define TARGET_TIOCCBRK	0x5428  /* BSD compatibility */
-#define TARGET_TIOCGSID	0x7416  /* Return the session ID of FD */
-#define TARGET_TIOCGPTN	TARGET_IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
-#define TARGET_TIOCSPTLCK	TARGET_IOW('T',0x31, int)  /* Lock/unlock Pty */
-
-/* I hope the range from 0x5480 on is free ... */
-#define TARGET_TIOCSCTTY	0x5480		/* become controlling tty */
-#define TARGET_TIOCGSOFTCAR	0x5481
-#define TARGET_TIOCSSOFTCAR	0x5482
-#define TARGET_TIOCLINUX	0x5483
-#define TARGET_TIOCGSERIAL	0x5484
-#define TARGET_TIOCSSERIAL	0x5485
-#define TARGET_TCSBRKP		0x5486	/* Needed for POSIX tcsendbreak() */
-#define TARGET_TIOCSERCONFIG	0x5488
-#define TARGET_TIOCSERGWILD	0x5489
-#define TARGET_TIOCSERSWILD	0x548a
-#define TARGET_TIOCGLCKTRMIOS	0x548b
-#define TARGET_TIOCSLCKTRMIOS	0x548c
-#define TARGET_TIOCSERGSTRUCT	0x548d /* For debugging only */
-#define TARGET_TIOCSERGETLSR   0x548e /* Get line status register */
-#define TARGET_TIOCSERGETMULTI 0x548f /* Get multiport config  */
-#define TARGET_TIOCSERSETMULTI 0x5490 /* Set multiport config */
-#define TARGET_TIOCMIWAIT      0x5491 /* wait for a change on serial input line(s) */
-#define TARGET_TIOCGICOUNT     0x5492 /* read serial port inline interrupt counts */
-#define TARGET_TIOCGHAYESESP	0x5493 /* Get Hayes ESP configuration */
-#define TARGET_TIOCSHAYESESP	0x5494 /* Set Hayes ESP configuration */
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index c4371d943a..61685bf79e 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -17,8 +17,6 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include <linux/mman.h>
-#include <linux/unistd.h>
 
 #include "qemu.h"
 #include "qemu-common.h"
@@ -43,6 +41,11 @@ void mmap_unlock(void)
     }
 }
 
+bool have_mmap_lock(void)
+{
+    return mmap_lock_count > 0 ? true : false;
+}
+
 /* Grab lock to make sure things are in a consistent state after fork().  */
 void mmap_fork_start(void)
 {
@@ -681,10 +684,8 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
     mmap_lock();
 
     if (flags & MREMAP_FIXED) {
-        host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
-                                     old_size, new_size,
-                                     flags,
-                                     g2h(new_addr));
+        host_addr = mremap(g2h(old_addr), old_size, new_size,
+                           flags, g2h(new_addr));
 
         if (reserved_va && host_addr != MAP_FAILED) {
             /* If new and old addresses overlap then the above mremap will
@@ -700,10 +701,8 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
             errno = ENOMEM;
             host_addr = MAP_FAILED;
         } else {
-            host_addr = (void *) syscall(__NR_mremap, g2h(old_addr),
-                                         old_size, new_size,
-                                         flags | MREMAP_FIXED,
-                                         g2h(mmap_start));
+            host_addr = mremap(g2h(old_addr), old_size, new_size,
+                               flags | MREMAP_FIXED, g2h(mmap_start));
             if (reserved_va) {
                 mmap_reserve(old_addr, old_size);
             }
diff --git a/linux-user/openrisc/syscall_nr.h b/linux-user/openrisc/syscall_nr.h
index 6b1c7d265e..04059d020c 100644
--- a/linux-user/openrisc/syscall_nr.h
+++ b/linux-user/openrisc/syscall_nr.h
@@ -459,8 +459,6 @@
 #define TARGET_NR_getdents 1065
 #define __ARCH_WANT_SYS_GETDENTS
 #define TARGET_NR_futimesat 1066
-#define TARGET_NR_select 1067
-#define __ARCH_WANT_SYS_SELECT
 #define TARGET_NR_poll 1068
 #define TARGET_NR_epoll_wait 1069
 #define TARGET_NR_ustat 1070
diff --git a/linux-user/ppc/syscall_nr.h b/linux-user/ppc/syscall_nr.h
index 46ed8a68ce..afa36544f1 100644
--- a/linux-user/ppc/syscall_nr.h
+++ b/linux-user/ppc/syscall_nr.h
@@ -120,7 +120,9 @@
 #define TARGET_NR_sysinfo                116
 #define TARGET_NR_ipc                    117
 #define TARGET_NR_fsync                  118
+#if !defined(TARGET_PPC64)
 #define TARGET_NR_sigreturn              119
+#endif
 #define TARGET_NR_clone                  120
 #define TARGET_NR_setdomainname          121
 #define TARGET_NR_uname                  122
diff --git a/linux-user/ppc/target_syscall.h b/linux-user/ppc/target_syscall.h
index a8662f4856..afc0570410 100644
--- a/linux-user/ppc/target_syscall.h
+++ b/linux-user/ppc/target_syscall.h
@@ -74,5 +74,6 @@ struct target_revectored_struct {
 #define TARGET_MINSIGSTKSZ 2048
 #define TARGET_MLOCKALL_MCL_CURRENT 0x2000
 #define TARGET_MLOCKALL_MCL_FUTURE  0x4000
+#define TARGET_WANT_NI_OLD_SELECT
 
 #endif /* PPC_TARGET_SYSCALL_H */
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index bef465de4d..da73a01106 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -362,12 +362,23 @@ void print_syscall(int num,
                    abi_long arg1, abi_long arg2, abi_long arg3,
                    abi_long arg4, abi_long arg5, abi_long arg6);
 void print_syscall_ret(int num, abi_long arg1);
+/**
+ * print_taken_signal:
+ * @target_signum: target signal being taken
+ * @tinfo: target_siginfo_t which will be passed to the guest for the signal
+ *
+ * Print strace output indicating that this signal is being taken by the guest,
+ * in a format similar to:
+ * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
+ */
+void print_taken_signal(int target_signum, const target_siginfo_t *tinfo);
 extern int do_strace;
 
 /* signal.c */
 void process_pending_signals(CPUArchState *cpu_env);
 void signal_init(void);
-int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info);
+int queue_signal(CPUArchState *env, int sig, int si_type,
+                 target_siginfo_t *info);
 void host_to_target_siginfo(target_siginfo_t *tinfo, const siginfo_t *info);
 void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo);
 int target_to_host_signal(int sig);
@@ -548,7 +559,7 @@ static inline void *lock_user(int type, abi_ulong guest_addr, long len, int copy
 #ifdef DEBUG_REMAP
     {
         void *addr;
-        addr = malloc(len);
+        addr = g_malloc(len);
         if (copy)
             memcpy(addr, g2h(guest_addr), len);
         else
@@ -574,7 +585,7 @@ static inline void unlock_user(void *host_ptr, abi_ulong guest_addr,
         return;
     if (len > 0)
         memcpy(g2h(guest_addr), host_ptr, len);
-    free(host_ptr);
+    g_free(host_ptr);
 #endif
 }
 
diff --git a/linux-user/sh4/syscall_nr.h b/linux-user/sh4/syscall_nr.h
index 50099846d2..e99f73589d 100644
--- a/linux-user/sh4/syscall_nr.h
+++ b/linux-user/sh4/syscall_nr.h
@@ -84,7 +84,7 @@
 #define TARGET_NR_settimeofday	 79
 #define TARGET_NR_getgroups		 80
 #define TARGET_NR_setgroups		 81
-#define TARGET_NR_select		 82
+                                         /* 82 was sys_oldselect */
 #define TARGET_NR_symlink		 83
 #define TARGET_NR_oldlstat		 84
 #define TARGET_NR_readlink		 85
diff --git a/linux-user/sh4/target_syscall.h b/linux-user/sh4/target_syscall.h
index 78d5557124..2b5f75be13 100644
--- a/linux-user/sh4/target_syscall.h
+++ b/linux-user/sh4/target_syscall.h
@@ -19,4 +19,11 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 1
 #define TARGET_MLOCKALL_MCL_FUTURE  2
 
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUSH4State *env)
+{
+    return 0x4000;
+}
+
 #endif /* SH4_TARGET_SYSCALL_H */
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 9a4d894e3a..c750053edd 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -512,9 +512,43 @@ void signal_init(void)
     }
 }
 
+#if !(defined(TARGET_X86_64) || defined(TARGET_UNICORE32))
+/* Force a synchronously taken signal. The kernel force_sig() function
+ * also forces the signal to "not blocked, not ignored", but for QEMU
+ * that work is done in process_pending_signals().
+ */
+static void force_sig(int sig)
+{
+    CPUState *cpu = thread_cpu;
+    CPUArchState *env = cpu->env_ptr;
+    target_siginfo_t info;
+
+    info.si_signo = sig;
+    info.si_errno = 0;
+    info.si_code = TARGET_SI_KERNEL;
+    info._sifields._kill._pid = 0;
+    info._sifields._kill._uid = 0;
+    queue_signal(env, info.si_signo, QEMU_SI_KILL, &info);
+}
+
+/* Force a SIGSEGV if we couldn't write to memory trying to set
+ * up the signal frame. oldsig is the signal we were trying to handle
+ * at the point of failure.
+ */
+static void force_sigsegv(int oldsig)
+{
+    if (oldsig == SIGSEGV) {
+        /* Make sure we don't try to deliver the signal again; this will
+         * end up with handle_pending_signal() calling dump_core_and_abort().
+         */
+        sigact_table[oldsig - 1]._sa_handler = TARGET_SIG_DFL;
+    }
+    force_sig(TARGET_SIGSEGV);
+}
+#endif
 
 /* abort execution with signal */
-static void QEMU_NORETURN force_sig(int target_sig)
+static void QEMU_NORETURN dump_core_and_abort(int target_sig)
 {
     CPUState *cpu = thread_cpu;
     CPUArchState *env = cpu->env_ptr;
@@ -569,19 +603,15 @@ static void QEMU_NORETURN force_sig(int target_sig)
 
 /* queue a signal so that it will be send to the virtual CPU as soon
    as possible */
-int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
+int queue_signal(CPUArchState *env, int sig, int si_type,
+                 target_siginfo_t *info)
 {
     CPUState *cpu = ENV_GET_CPU(env);
     TaskState *ts = cpu->opaque;
 
     trace_user_queue_signal(env, sig);
 
-    /* Currently all callers define siginfo structures which
-     * use the _sifields._sigfault union member, so we can
-     * set the type here. If that changes we should push this
-     * out so the si_type is passed in by callers.
-     */
-    info->si_code = deposit32(info->si_code, 16, 16, QEMU_SI_FAULT);
+    info->si_code = deposit32(info->si_code, 16, 16, si_type);
 
     ts->sync_signal.info = *info;
     ts->sync_signal.pending = sig;
@@ -1015,10 +1045,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     return;
 
 give_sigsegv:
-    if (sig == TARGET_SIGSEGV) {
-        ka->_sa_handler = TARGET_SIG_DFL;
-    }
-    force_sig(TARGET_SIGSEGV /* , current */);
+    force_sigsegv(sig);
 }
 
 /* compare linux/arch/i386/kernel/signal.c:setup_rt_frame() */
@@ -1088,10 +1115,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     return;
 
 give_sigsegv:
-    if (sig == TARGET_SIGSEGV) {
-        ka->_sa_handler = TARGET_SIG_DFL;
-    }
-    force_sig(TARGET_SIGSEGV /* , current */);
+    force_sigsegv(sig);
 }
 
 static int
@@ -1165,7 +1189,7 @@ long do_sigreturn(CPUX86State *env)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUX86State *env)
@@ -1196,7 +1220,7 @@ long do_rt_sigreturn(CPUX86State *env)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_AARCH64)
@@ -1420,7 +1444,7 @@ static void target_setup_frame(int usig, struct target_sigaction *ka,
 
  give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(usig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -1466,7 +1490,7 @@ long do_rt_sigreturn(CPUARMState *env)
  badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_sigreturn(CPUARMState *env)
@@ -1772,7 +1796,7 @@ static void setup_frame_v1(int usig, struct target_sigaction *ka,
 
     trace_user_setup_frame(regs, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        return;
+        goto sigsegv;
     }
 
     setup_sigcontext(&frame->sc, regs, set->sig[0]);
@@ -1785,6 +1809,9 @@ static void setup_frame_v1(int usig, struct target_sigaction *ka,
                  frame_addr + offsetof(struct sigframe_v1, retcode));
 
     unlock_user_struct(frame, frame_addr, 1);
+    return;
+sigsegv:
+    force_sigsegv(usig);
 }
 
 static void setup_frame_v2(int usig, struct target_sigaction *ka,
@@ -1795,7 +1822,7 @@ static void setup_frame_v2(int usig, struct target_sigaction *ka,
 
     trace_user_setup_frame(regs, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        return;
+        goto sigsegv;
     }
 
     setup_sigframe_v2(&frame->uc, set, regs);
@@ -1804,6 +1831,9 @@ static void setup_frame_v2(int usig, struct target_sigaction *ka,
                  frame_addr + offsetof(struct sigframe_v2, retcode));
 
     unlock_user_struct(frame, frame_addr, 1);
+    return;
+sigsegv:
+    force_sigsegv(usig);
 }
 
 static void setup_frame(int usig, struct target_sigaction *ka,
@@ -1829,7 +1859,7 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka,
 
     trace_user_setup_rt_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        return /* 1 */;
+        goto sigsegv;
     }
 
     info_addr = frame_addr + offsetof(struct rt_sigframe_v1, info);
@@ -1859,6 +1889,9 @@ static void setup_rt_frame_v1(int usig, struct target_sigaction *ka,
     env->regs[2] = uc_addr;
 
     unlock_user_struct(frame, frame_addr, 1);
+    return;
+sigsegv:
+    force_sigsegv(usig);
 }
 
 static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
@@ -1871,7 +1904,7 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
 
     trace_user_setup_rt_frame(env, frame_addr);
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
-        return /* 1 */;
+        goto sigsegv;
     }
 
     info_addr = frame_addr + offsetof(struct rt_sigframe_v2, info);
@@ -1887,6 +1920,9 @@ static void setup_rt_frame_v2(int usig, struct target_sigaction *ka,
     env->regs[2] = uc_addr;
 
     unlock_user_struct(frame, frame_addr, 1);
+    return;
+sigsegv:
+    force_sigsegv(usig);
 }
 
 static void setup_rt_frame(int usig, struct target_sigaction *ka,
@@ -1976,8 +2012,8 @@ static long do_sigreturn_v1(CPUARMState *env)
     return -TARGET_QEMU_ESIGRETURN;
 
 badframe:
-    force_sig(TARGET_SIGSEGV /* , current */);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 static abi_ulong *restore_sigframe_v2_vfp(CPUARMState *env, abi_ulong *regspace)
@@ -2035,7 +2071,8 @@ static abi_ulong *restore_sigframe_v2_iwmmxt(CPUARMState *env,
     return (abi_ulong*)(iwmmxtframe + 1);
 }
 
-static int do_sigframe_return_v2(CPUARMState *env, target_ulong frame_addr,
+static int do_sigframe_return_v2(CPUARMState *env,
+                                 target_ulong context_addr,
                                  struct target_ucontext_v2 *uc)
 {
     sigset_t host_set;
@@ -2062,8 +2099,11 @@ static int do_sigframe_return_v2(CPUARMState *env, target_ulong frame_addr,
         }
     }
 
-    if (do_sigaltstack(frame_addr + offsetof(struct target_ucontext_v2, tuc_stack), 0, get_sp_from_cpustate(env)) == -EFAULT)
+    if (do_sigaltstack(context_addr
+                       + offsetof(struct target_ucontext_v2, tuc_stack),
+                       0, get_sp_from_cpustate(env)) == -EFAULT) {
         return 1;
+    }
 
 #if 0
     /* Send SIGTRAP if we're single-stepping */
@@ -2094,7 +2134,10 @@ static long do_sigreturn_v2(CPUARMState *env)
         goto badframe;
     }
 
-    if (do_sigframe_return_v2(env, frame_addr, &frame->uc)) {
+    if (do_sigframe_return_v2(env,
+                              frame_addr
+                              + offsetof(struct sigframe_v2, uc),
+                              &frame->uc)) {
         goto badframe;
     }
 
@@ -2103,8 +2146,8 @@ static long do_sigreturn_v2(CPUARMState *env)
 
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV /* , current */);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_sigreturn(CPUARMState *env)
@@ -2157,8 +2200,8 @@ static long do_rt_sigreturn_v1(CPUARMState *env)
 
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV /* , current */);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 static long do_rt_sigreturn_v2(CPUARMState *env)
@@ -2181,7 +2224,10 @@ static long do_rt_sigreturn_v2(CPUARMState *env)
         goto badframe;
     }
 
-    if (do_sigframe_return_v2(env, frame_addr, &frame->uc)) {
+    if (do_sigframe_return_v2(env,
+                              frame_addr
+                              + offsetof(struct rt_sigframe_v2, uc),
+                              &frame->uc)) {
         goto badframe;
     }
 
@@ -2190,8 +2236,8 @@ static long do_rt_sigreturn_v2(CPUARMState *env)
 
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
-    force_sig(TARGET_SIGSEGV /* , current */);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUARMState *env)
@@ -2445,7 +2491,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 #endif
 sigsegv:
     unlock_user(sf, sf_addr, sizeof(struct target_signal_frame));
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -2525,6 +2571,7 @@ long do_sigreturn(CPUSPARCState *env)
 segv_and_exit:
     unlock_user_struct(sf, sf_addr, 0);
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUSPARCState *env)
@@ -3037,7 +3084,7 @@ static void setup_frame(int sig, struct target_sigaction * ka,
     return;
 
 give_sigsegv:
-    force_sig(TARGET_SIGSEGV/*, current*/);
+    force_sigsegv(sig);
 }
 
 long do_sigreturn(CPUMIPSState *regs)
@@ -3082,8 +3129,8 @@ long do_sigreturn(CPUMIPSState *regs)
     return -TARGET_QEMU_ESIGRETURN;
 
 badframe:
-    force_sig(TARGET_SIGSEGV/*, current*/);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 # endif /* O32 */
 
@@ -3146,7 +3193,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV/*, current*/);
+    force_sigsegv(sig);
 }
 
 long do_rt_sigreturn(CPUMIPSState *env)
@@ -3179,8 +3226,8 @@ long do_rt_sigreturn(CPUMIPSState *env)
     return -TARGET_QEMU_ESIGRETURN;
 
 badframe:
-    force_sig(TARGET_SIGSEGV/*, current*/);
-    return 0;
+    force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_SH4)
@@ -3349,7 +3396,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -3409,7 +3456,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 long do_sigreturn(CPUSH4State *regs)
@@ -3446,7 +3493,7 @@ long do_sigreturn(CPUSH4State *regs)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUSH4State *regs)
@@ -3478,7 +3525,7 @@ long do_rt_sigreturn(CPUSH4State *regs)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 #elif defined(TARGET_MICROBLAZE)
 
@@ -3656,7 +3703,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     unlock_user_struct(frame, frame_addr, 1);
     return;
 badframe:
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -3697,6 +3744,7 @@ long do_sigreturn(CPUMBState *env)
     return -TARGET_QEMU_ESIGRETURN;
 badframe:
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUMBState *env)
@@ -3826,7 +3874,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     unlock_user_struct(frame, frame_addr, 1);
     return;
 badframe:
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -3864,6 +3912,7 @@ long do_sigreturn(CPUCRISState *env)
     return -TARGET_QEMU_ESIGRETURN;
 badframe:
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUCRISState *env)
@@ -4065,10 +4114,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    if (sig == TARGET_SIGSEGV) {
-        ka->_sa_handler = TARGET_SIG_DFL;
-    }
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 long do_sigreturn(CPUOpenRISCState *env)
@@ -4244,12 +4290,12 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->regs[5] = 0; // FIXME: no clue... current->thread.prot_addr;
 
     /* Place signal number on stack to allow backtrace from handler.  */
-    __put_user(env->regs[2], (int *) &frame->signo);
+    __put_user(env->regs[2], &frame->signo);
     unlock_user_struct(frame, frame_addr, 1);
     return;
 
 give_sigsegv:
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
@@ -4304,7 +4350,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     return;
 
 give_sigsegv:
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static int
@@ -4358,7 +4404,7 @@ long do_sigreturn(CPUS390XState *env)
 
 badframe:
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUS390XState *env)
@@ -4389,7 +4435,7 @@ long do_rt_sigreturn(CPUS390XState *env)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_PPC)
@@ -4408,7 +4454,12 @@ struct target_mcontext {
     target_ulong mc_gregs[48];
     /* Includes fpscr.  */
     uint64_t mc_fregs[33];
+#if defined(TARGET_PPC64)
+    /* Pointer to the vector regs */
+    target_ulong v_regs;
+#else
     target_ulong mc_pad[2];
+#endif
     /* We need to handle Altivec and SPE at the same time, which no
        kernel needs to do.  Fortunately, the kernel defines this bit to
        be Altivec-register-large all the time, rather than trying to
@@ -4418,15 +4469,30 @@ struct target_mcontext {
         uint32_t spe[33];
         /* Altivec vector registers.  The packing of VSCR and VRSAVE
            varies depending on whether we're PPC64 or not: PPC64 splits
-           them apart; PPC32 stuffs them together.  */
+           them apart; PPC32 stuffs them together.
+           We also need to account for the VSX registers on PPC64
+        */
 #if defined(TARGET_PPC64)
-#define QEMU_NVRREG 34
+#define QEMU_NVRREG (34 + 16)
+        /* On ppc64, this mcontext structure is naturally *unaligned*,
+         * or rather it is aligned on a 8 bytes boundary but not on
+         * a 16 bytes one. This pad fixes it up. This is also why the
+         * vector regs are referenced by the v_regs pointer above so
+         * any amount of padding can be added here
+         */
+        target_ulong pad;
 #else
+        /* On ppc32, we are already aligned to 16 bytes */
 #define QEMU_NVRREG 33
 #endif
-        ppc_avr_t altivec[QEMU_NVRREG];
+        /* We cannot use ppc_avr_t here as we do *not* want the implied
+         * 16-bytes alignment that would result from it. This would have
+         * the effect of making the whole struct target_mcontext aligned
+         * which breaks the layout of struct target_ucontext on ppc64.
+         */
+        uint64_t altivec[QEMU_NVRREG][2];
 #undef QEMU_NVRREG
-    } mc_vregs __attribute__((__aligned__(16)));
+    } mc_vregs;
 };
 
 /* See arch/powerpc/include/asm/sigcontext.h.  */
@@ -4580,6 +4646,16 @@ static target_ulong get_sigframe(struct target_sigaction *ka,
     return (oldsp - frame_size) & ~0xFUL;
 }
 
+#if ((defined(TARGET_WORDS_BIGENDIAN) && defined(HOST_WORDS_BIGENDIAN)) || \
+     (!defined(HOST_WORDS_BIGENDIAN) && !defined(TARGET_WORDS_BIGENDIAN)))
+#define PPC_VEC_HI      0
+#define PPC_VEC_LO      1
+#else
+#define PPC_VEC_HI      1
+#define PPC_VEC_LO      0
+#endif
+
+
 static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 {
     target_ulong msr = env->msr;
@@ -4606,18 +4682,33 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 
     /* Save Altivec registers if necessary.  */
     if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t *vrsave;
         for (i = 0; i < ARRAY_SIZE(env->avr); i++) {
             ppc_avr_t *avr = &env->avr[i];
-            ppc_avr_t *vreg = &frame->mc_vregs.altivec[i];
+            ppc_avr_t *vreg = (ppc_avr_t *)&frame->mc_vregs.altivec[i];
 
-            __put_user(avr->u64[0], &vreg->u64[0]);
-            __put_user(avr->u64[1], &vreg->u64[1]);
+            __put_user(avr->u64[PPC_VEC_HI], &vreg->u64[0]);
+            __put_user(avr->u64[PPC_VEC_LO], &vreg->u64[1]);
         }
         /* Set MSR_VR in the saved MSR value to indicate that
            frame->mc_vregs contains valid data.  */
         msr |= MSR_VR;
-        __put_user((uint32_t)env->spr[SPR_VRSAVE],
-                   &frame->mc_vregs.altivec[32].u32[3]);
+#if defined(TARGET_PPC64)
+        vrsave = (uint32_t *)&frame->mc_vregs.altivec[33];
+        /* 64-bit needs to put a pointer to the vectors in the frame */
+        __put_user(h2g(frame->mc_vregs.altivec), &frame->v_regs);
+#else
+        vrsave = (uint32_t *)&frame->mc_vregs.altivec[32];
+#endif
+        __put_user((uint32_t)env->spr[SPR_VRSAVE], vrsave);
+    }
+
+    /* Save VSX second halves */
+    if (env->insns_flags2 & PPC2_VSX) {
+        uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34];
+        for (i = 0; i < ARRAY_SIZE(env->vsr); i++) {
+            __put_user(env->vsr[i], &vsregs[i]);
+        }
     }
 
     /* Save floating point registers.  */
@@ -4697,17 +4788,39 @@ static void restore_user_regs(CPUPPCState *env,
 
     /* Restore Altivec registers if necessary.  */
     if (env->insns_flags & PPC_ALTIVEC) {
+        ppc_avr_t *v_regs;
+        uint32_t *vrsave;
+#if defined(TARGET_PPC64)
+        uint64_t v_addr;
+        /* 64-bit needs to recover the pointer to the vectors from the frame */
+        __get_user(v_addr, &frame->v_regs);
+        v_regs = g2h(v_addr);
+#else
+        v_regs = (ppc_avr_t *)frame->mc_vregs.altivec;
+#endif
         for (i = 0; i < ARRAY_SIZE(env->avr); i++) {
             ppc_avr_t *avr = &env->avr[i];
-            ppc_avr_t *vreg = &frame->mc_vregs.altivec[i];
+            ppc_avr_t *vreg = &v_regs[i];
 
-            __get_user(avr->u64[0], &vreg->u64[0]);
-            __get_user(avr->u64[1], &vreg->u64[1]);
+            __get_user(avr->u64[PPC_VEC_HI], &vreg->u64[0]);
+            __get_user(avr->u64[PPC_VEC_LO], &vreg->u64[1]);
         }
         /* Set MSR_VEC in the saved MSR value to indicate that
            frame->mc_vregs contains valid data.  */
-        __get_user(env->spr[SPR_VRSAVE],
-                   (target_ulong *)(&frame->mc_vregs.altivec[32].u32[3]));
+#if defined(TARGET_PPC64)
+        vrsave = (uint32_t *)&v_regs[33];
+#else
+        vrsave = (uint32_t *)&v_regs[32];
+#endif
+        __get_user(env->spr[SPR_VRSAVE], vrsave);
+    }
+
+    /* Restore VSX second halves */
+    if (env->insns_flags2 & PPC2_VSX) {
+        uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34];
+        for (i = 0; i < ARRAY_SIZE(env->vsr); i++) {
+            __get_user(env->vsr[i], &vsregs[i]);
+        }
     }
 
     /* Restore floating point registers.  */
@@ -4738,6 +4851,7 @@ static void restore_user_regs(CPUPPCState *env,
     }
 }
 
+#if !defined(TARGET_PPC64)
 static void setup_frame(int sig, struct target_sigaction *ka,
                         target_sigset_t *set, CPUPPCState *env)
 {
@@ -4745,9 +4859,6 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     struct target_sigcontext *sc;
     target_ulong frame_addr, newsp;
     int err = 0;
-#if defined(TARGET_PPC64)
-    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
-#endif
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     trace_user_setup_frame(env, frame_addr);
@@ -4757,11 +4868,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
     __put_user(ka->_sa_handler, &sc->handler);
     __put_user(set->sig[0], &sc->oldmask);
-#if TARGET_ABI_BITS == 64
-    __put_user(set->sig[0] >> 32, &sc->_unused[3]);
-#else
     __put_user(set->sig[1], &sc->_unused[3]);
-#endif
     __put_user(h2g(&frame->mctx), &sc->regs);
     __put_user(sig, &sc->signal);
 
@@ -4790,22 +4897,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[3] = sig;
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
 
-#if defined(TARGET_PPC64)
-    if (get_ppc64_abi(image) < 2) {
-        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
-        struct target_func_ptr *handler =
-            (struct target_func_ptr *)g2h(ka->_sa_handler);
-        env->nip = tswapl(handler->entry);
-        env->gpr[2] = tswapl(handler->toc);
-    } else {
-        /* ELFv2 PPC64 function pointers are entry points, but R12
-         * must also be set */
-        env->nip = tswapl((target_ulong) ka->_sa_handler);
-        env->gpr[12] = env->nip;
-    }
-#else
     env->nip = (target_ulong) ka->_sa_handler;
-#endif
 
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~(1ull << MSR_LE);
@@ -4815,8 +4907,9 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
 sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
+#endif /* !defined(TARGET_PPC64) */
 
 static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_siginfo_t *info,
@@ -4910,10 +5003,11 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 sigsegv:
     unlock_user_struct(rt_sf, rt_sf_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 
 }
 
+#if !defined(TARGET_PPC64)
 long do_sigreturn(CPUPPCState *env)
 {
     struct target_sigcontext *sc = NULL;
@@ -4948,8 +5042,9 @@ long do_sigreturn(CPUPPCState *env)
     unlock_user_struct(sr, sr_addr, 1);
     unlock_user_struct(sc, sc_addr, 1);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
+#endif /* !defined(TARGET_PPC64) */
 
 /* See arch/powerpc/kernel/signal_32.c.  */
 static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
@@ -5003,7 +5098,7 @@ long do_rt_sigreturn(CPUPPCState *env)
 sigsegv:
     unlock_user_struct(rt_sf, rt_sf_addr, 1);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_M68K)
@@ -5159,7 +5254,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     return;
 
 give_sigsegv:
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 static inline int target_rt_setup_ucontext(struct target_ucontext *uc,
@@ -5298,7 +5393,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
-    force_sig(TARGET_SIGSEGV);
+    force_sigsegv(sig);
 }
 
 long do_sigreturn(CPUM68KState *env)
@@ -5333,7 +5428,7 @@ long do_sigreturn(CPUM68KState *env)
 
 badframe:
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUM68KState *env)
@@ -5366,7 +5461,7 @@ long do_rt_sigreturn(CPUM68KState *env)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
-    return 0;
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_ALPHA)
@@ -5505,10 +5600,8 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
     if (err) {
 give_sigsegv:
-        if (sig == TARGET_SIGSEGV) {
-            ka->_sa_handler = TARGET_SIG_DFL;
-        }
-        force_sig(TARGET_SIGSEGV);
+        force_sigsegv(sig);
+        return;
     }
 
     env->ir[IR_RA] = r26;
@@ -5562,10 +5655,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     if (err) {
 give_sigsegv:
-        if (sig == TARGET_SIGSEGV) {
-            ka->_sa_handler = TARGET_SIG_DFL;
-        }
-        force_sig(TARGET_SIGSEGV);
+        force_sigsegv(sig);
+        return;
     }
 
     env->ir[IR_RA] = r26;
@@ -5599,6 +5690,7 @@ long do_sigreturn(CPUAlphaState *env)
 
 badframe:
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 long do_rt_sigreturn(CPUAlphaState *env)
@@ -5628,6 +5720,7 @@ long do_rt_sigreturn(CPUAlphaState *env)
 badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #elif defined(TARGET_TILEGX)
@@ -5762,10 +5855,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     return;
 
 give_sigsegv:
-    if (sig == TARGET_SIGSEGV) {
-        ka->_sa_handler = TARGET_SIG_DFL;
-    }
-    force_sig(TARGET_SIGSEGV /* , current */);
+    force_sigsegv(sig);
 }
 
 long do_rt_sigreturn(CPUTLGState *env)
@@ -5795,6 +5885,7 @@ long do_rt_sigreturn(CPUTLGState *env)
  badframe:
     unlock_user_struct(frame, frame_addr, 0);
     force_sig(TARGET_SIGSEGV);
+    return -TARGET_QEMU_ESIGRETURN;
 }
 
 #else
@@ -5849,6 +5940,10 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
         handler = sa->_sa_handler;
     }
 
+    if (do_strace) {
+        print_taken_signal(sig, &k->info);
+    }
+
     if (handler == TARGET_SIG_DFL) {
         /* default handler : ignore some signal. The other are job control or fatal */
         if (sig == TARGET_SIGTSTP || sig == TARGET_SIGTTIN || sig == TARGET_SIGTTOU) {
@@ -5857,12 +5952,12 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
                    sig != TARGET_SIGURG &&
                    sig != TARGET_SIGWINCH &&
                    sig != TARGET_SIGCONT) {
-            force_sig(sig);
+            dump_core_and_abort(sig);
         }
     } else if (handler == TARGET_SIG_IGN) {
         /* ignore sig */
     } else if (handler == TARGET_SIG_ERR) {
-        force_sig(sig);
+        dump_core_and_abort(sig);
     } else {
         /* compute the blocked signals during the handler execution */
         sigset_t *blocked_set;
@@ -5893,7 +5988,8 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig,
 #endif
         /* prepare the stack frame of the virtual CPU */
 #if defined(TARGET_ABI_MIPSN32) || defined(TARGET_ABI_MIPSN64) \
-    || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX)
+        || defined(TARGET_OPENRISC) || defined(TARGET_TILEGX) \
+        || defined(TARGET_PPC64)
         /* These targets do not have traditional signals.  */
         setup_rt_frame(sig, sa, &k->info, &target_old_set, cpu_env);
 #else
@@ -5921,6 +6017,7 @@ void process_pending_signals(CPUArchState *cpu_env)
         sigfillset(&set);
         sigprocmask(SIG_SETMASK, &set, 0);
 
+    restart_scan:
         sig = ts->sync_signal.pending;
         if (sig) {
             /* Synchronous signals are forced,
@@ -5948,8 +6045,10 @@ void process_pending_signals(CPUArchState *cpu_env)
                 (!sigismember(blocked_set,
                               target_to_host_signal_table[sig]))) {
                 handle_pending_signal(cpu_env, sig, &ts->sigtab[sig - 1]);
-                /* Restart scan from the beginning */
-                sig = 1;
+                /* Restart scan from the beginning, as handle_pending_signal
+                 * might have resulted in a new synchronous signal (eg SIGSEGV).
+                 */
+                goto restart_scan;
             }
         }
 
diff --git a/linux-user/sparc/target_syscall.h b/linux-user/sparc/target_syscall.h
index 326f674b4e..f97aa6b075 100644
--- a/linux-user/sparc/target_syscall.h
+++ b/linux-user/sparc/target_syscall.h
@@ -22,4 +22,20 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 0x2000
 #define TARGET_MLOCKALL_MCL_FUTURE  0x4000
 
+/* For SPARC SHMLBA is determined at runtime in the kernel, and
+ * libc has to runtime-detect it using the hwcaps (see glibc
+ * sysdeps/unix/sysv/linux/sparc/getshmlba; we follow the same
+ * logic here, though we know we're not the sparc v9 64-bit case).
+ */
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUSPARCState *env)
+{
+    if (!(env->def->features & CPU_FEATURE_FLUSH)) {
+        return 64 * 1024;
+    } else {
+        return 256 * 1024;
+    }
+}
+
 #endif /* SPARC_TARGET_SYSCALL_H */
diff --git a/linux-user/sparc64/target_syscall.h b/linux-user/sparc64/target_syscall.h
index b7e3bf82fb..2cbbaaed1b 100644
--- a/linux-user/sparc64/target_syscall.h
+++ b/linux-user/sparc64/target_syscall.h
@@ -23,4 +23,11 @@ struct target_pt_regs {
 #define TARGET_MLOCKALL_MCL_CURRENT 0x2000
 #define TARGET_MLOCKALL_MCL_FUTURE  0x4000
 
+#define TARGET_FORCE_SHMLBA
+
+static inline abi_ulong target_shmlba(CPUSPARCState *env)
+{
+    return MAX(TARGET_PAGE_SIZE, 16 * 1024);
+}
+
 #endif /* SPARC64_TARGET_SYSCALL_H */
diff --git a/linux-user/strace.c b/linux-user/strace.c
index cc10dc4703..489dbc9583 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -154,6 +154,100 @@ print_signal(abi_ulong arg, int last)
     gemu_log("%s%s", signal_name, get_comma(last));
 }
 
+static void print_si_code(int arg)
+{
+    const char *codename = NULL;
+
+    switch (arg) {
+    case SI_USER:
+        codename = "SI_USER";
+        break;
+    case SI_KERNEL:
+        codename = "SI_KERNEL";
+        break;
+    case SI_QUEUE:
+        codename = "SI_QUEUE";
+        break;
+    case SI_TIMER:
+        codename = "SI_TIMER";
+        break;
+    case SI_MESGQ:
+        codename = "SI_MESGQ";
+        break;
+    case SI_ASYNCIO:
+        codename = "SI_ASYNCIO";
+        break;
+    case SI_SIGIO:
+        codename = "SI_SIGIO";
+        break;
+    case SI_TKILL:
+        codename = "SI_TKILL";
+        break;
+    default:
+        gemu_log("%d", arg);
+        return;
+    }
+    gemu_log("%s", codename);
+}
+
+static void print_siginfo(const target_siginfo_t *tinfo)
+{
+    /* Print a target_siginfo_t in the format desired for printing
+     * signals being taken. We assume the target_siginfo_t is in the
+     * internal form where the top 16 bits of si_code indicate which
+     * part of the union is valid, rather than in the guest-visible
+     * form where the bottom 16 bits are sign-extended into the top 16.
+     */
+    int si_type = extract32(tinfo->si_code, 16, 16);
+    int si_code = sextract32(tinfo->si_code, 0, 16);
+
+    gemu_log("{si_signo=");
+    print_signal(tinfo->si_signo, 1);
+    gemu_log(", si_code=");
+    print_si_code(si_code);
+
+    switch (si_type) {
+    case QEMU_SI_KILL:
+        gemu_log(", si_pid = %u, si_uid = %u",
+                 (unsigned int)tinfo->_sifields._kill._pid,
+                 (unsigned int)tinfo->_sifields._kill._uid);
+        break;
+    case QEMU_SI_TIMER:
+        gemu_log(", si_timer1 = %u, si_timer2 = %u",
+                 tinfo->_sifields._timer._timer1,
+                 tinfo->_sifields._timer._timer2);
+        break;
+    case QEMU_SI_POLL:
+        gemu_log(", si_band = %d, si_fd = %d",
+                 tinfo->_sifields._sigpoll._band,
+                 tinfo->_sifields._sigpoll._fd);
+        break;
+    case QEMU_SI_FAULT:
+        gemu_log(", si_addr = ");
+        print_pointer(tinfo->_sifields._sigfault._addr, 1);
+        break;
+    case QEMU_SI_CHLD:
+        gemu_log(", si_pid = %u, si_uid = %u, si_status = %d"
+                 ", si_utime=" TARGET_ABI_FMT_ld
+                 ", si_stime=" TARGET_ABI_FMT_ld,
+                 (unsigned int)(tinfo->_sifields._sigchld._pid),
+                 (unsigned int)(tinfo->_sifields._sigchld._uid),
+                 tinfo->_sifields._sigchld._status,
+                 tinfo->_sifields._sigchld._utime,
+                 tinfo->_sifields._sigchld._stime);
+        break;
+    case QEMU_SI_RT:
+        gemu_log(", si_pid = %u, si_uid = %u, si_sigval = " TARGET_ABI_FMT_ld,
+                 (unsigned int)tinfo->_sifields._rt._pid,
+                 (unsigned int)tinfo->_sifields._rt._uid,
+                 tinfo->_sifields._rt._sigval.sival_ptr);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    gemu_log("}");
+}
+
 static void
 print_sockaddr(abi_ulong addr, abi_long addrlen)
 {
@@ -341,6 +435,69 @@ print_fdset(int n, abi_ulong target_fds_addr)
 }
 #endif
 
+#ifdef TARGET_NR_clock_adjtime
+/* IDs of the various system clocks */
+#define TARGET_CLOCK_REALTIME              0
+#define TARGET_CLOCK_MONOTONIC             1
+#define TARGET_CLOCK_PROCESS_CPUTIME_ID    2
+#define TARGET_CLOCK_THREAD_CPUTIME_ID     3
+#define TARGET_CLOCK_MONOTONIC_RAW         4
+#define TARGET_CLOCK_REALTIME_COARSE       5
+#define TARGET_CLOCK_MONOTONIC_COARSE      6
+#define TARGET_CLOCK_BOOTTIME              7
+#define TARGET_CLOCK_REALTIME_ALARM        8
+#define TARGET_CLOCK_BOOTTIME_ALARM        9
+#define TARGET_CLOCK_SGI_CYCLE             10
+#define TARGET_CLOCK_TAI                   11
+
+static void
+print_clockid(int clockid, int last)
+{
+    switch (clockid) {
+    case TARGET_CLOCK_REALTIME:
+        gemu_log("CLOCK_REALTIME");
+        break;
+    case TARGET_CLOCK_MONOTONIC:
+        gemu_log("CLOCK_MONOTONIC");
+        break;
+    case TARGET_CLOCK_PROCESS_CPUTIME_ID:
+        gemu_log("CLOCK_PROCESS_CPUTIME_ID");
+        break;
+    case TARGET_CLOCK_THREAD_CPUTIME_ID:
+        gemu_log("CLOCK_THREAD_CPUTIME_ID");
+        break;
+    case TARGET_CLOCK_MONOTONIC_RAW:
+        gemu_log("CLOCK_MONOTONIC_RAW");
+        break;
+    case TARGET_CLOCK_REALTIME_COARSE:
+        gemu_log("CLOCK_REALTIME_COARSE");
+        break;
+    case TARGET_CLOCK_MONOTONIC_COARSE:
+        gemu_log("CLOCK_MONOTONIC_COARSE");
+        break;
+    case TARGET_CLOCK_BOOTTIME:
+        gemu_log("CLOCK_BOOTTIME");
+        break;
+    case TARGET_CLOCK_REALTIME_ALARM:
+        gemu_log("CLOCK_REALTIME_ALARM");
+        break;
+    case TARGET_CLOCK_BOOTTIME_ALARM:
+        gemu_log("CLOCK_BOOTTIME_ALARM");
+        break;
+    case TARGET_CLOCK_SGI_CYCLE:
+        gemu_log("CLOCK_SGI_CYCLE");
+        break;
+    case TARGET_CLOCK_TAI:
+        gemu_log("CLOCK_TAI");
+        break;
+    default:
+        gemu_log("%d", clockid);
+        break;
+    }
+    gemu_log("%s", get_comma(last));
+}
+#endif
+
 /*
  * Sysycall specific output functions
  */
@@ -483,6 +640,52 @@ print_syscall_ret_newselect(const struct syscallname *name, abi_long ret)
 }
 #endif
 
+/* special meanings of adjtimex()' non-negative return values */
+#define TARGET_TIME_OK       0   /* clock synchronized, no leap second */
+#define TARGET_TIME_INS      1   /* insert leap second */
+#define TARGET_TIME_DEL      2   /* delete leap second */
+#define TARGET_TIME_OOP      3   /* leap second in progress */
+#define TARGET_TIME_WAIT     4   /* leap second has occurred */
+#define TARGET_TIME_ERROR    5   /* clock not synchronized */
+static void
+print_syscall_ret_adjtimex(const struct syscallname *name, abi_long ret)
+{
+    const char *errstr = NULL;
+
+    gemu_log(" = ");
+    if (ret < 0) {
+        gemu_log("-1 errno=%d", errno);
+        errstr = target_strerror(-ret);
+        if (errstr) {
+            gemu_log(" (%s)", errstr);
+        }
+    } else {
+        gemu_log(TARGET_ABI_FMT_ld, ret);
+        switch (ret) {
+        case TARGET_TIME_OK:
+            gemu_log(" TIME_OK (clock synchronized, no leap second)");
+            break;
+        case TARGET_TIME_INS:
+            gemu_log(" TIME_INS (insert leap second)");
+            break;
+        case TARGET_TIME_DEL:
+            gemu_log(" TIME_DEL (delete leap second)");
+            break;
+        case TARGET_TIME_OOP:
+            gemu_log(" TIME_OOP (leap second in progress)");
+            break;
+        case TARGET_TIME_WAIT:
+            gemu_log(" TIME_WAIT (leap second has occurred)");
+            break;
+        case TARGET_TIME_ERROR:
+            gemu_log(" TIME_ERROR (clock not synchronized)");
+            break;
+        }
+    }
+
+    gemu_log("\n");
+}
+
 UNUSED static struct flags access_flags[] = {
     FLAG_GENERIC(F_OK),
     FLAG_GENERIC(R_OK),
@@ -956,6 +1159,19 @@ print_chmod(const struct syscallname *name,
 }
 #endif
 
+#ifdef TARGET_NR_clock_adjtime
+static void
+print_clock_adjtime(const struct syscallname *name,
+    abi_long arg0, abi_long arg1, abi_long arg2,
+    abi_long arg3, abi_long arg4, abi_long arg5)
+{
+    print_syscall_prologue(name);
+    print_clockid(arg0, 0);
+    print_pointer(arg1, 1);
+    print_syscall_epilogue(name);
+}
+#endif
+
 #ifdef TARGET_NR_clone
 static void do_print_clone(unsigned int flags, abi_ulong newsp,
                            abi_ulong parent_tidptr, target_ulong newtls,
@@ -1535,29 +1751,32 @@ static void do_print_sockopt(const char *name, abi_long arg1)
 }
 
 #define PRINT_SOCKOP(name, func) \
-    [SOCKOP_##name] = { #name, func }
+    [TARGET_SYS_##name] = { #name, func }
 
 static struct {
     const char *name;
     void (*print)(const char *, abi_long);
 } scall[] = {
-    PRINT_SOCKOP(socket, do_print_socket),
-    PRINT_SOCKOP(bind, do_print_sockaddr),
-    PRINT_SOCKOP(connect, do_print_sockaddr),
-    PRINT_SOCKOP(listen, do_print_listen),
-    PRINT_SOCKOP(accept, do_print_sockaddr),
-    PRINT_SOCKOP(getsockname, do_print_sockaddr),
-    PRINT_SOCKOP(getpeername, do_print_sockaddr),
-    PRINT_SOCKOP(socketpair, do_print_socketpair),
-    PRINT_SOCKOP(send, do_print_sendrecv),
-    PRINT_SOCKOP(recv, do_print_sendrecv),
-    PRINT_SOCKOP(sendto, do_print_msgaddr),
-    PRINT_SOCKOP(recvfrom, do_print_msgaddr),
-    PRINT_SOCKOP(shutdown, do_print_shutdown),
-    PRINT_SOCKOP(sendmsg, do_print_msg),
-    PRINT_SOCKOP(recvmsg, do_print_msg),
-    PRINT_SOCKOP(setsockopt, do_print_sockopt),
-    PRINT_SOCKOP(getsockopt, do_print_sockopt),
+    PRINT_SOCKOP(SOCKET, do_print_socket),
+    PRINT_SOCKOP(BIND, do_print_sockaddr),
+    PRINT_SOCKOP(CONNECT, do_print_sockaddr),
+    PRINT_SOCKOP(LISTEN, do_print_listen),
+    PRINT_SOCKOP(ACCEPT, do_print_sockaddr),
+    PRINT_SOCKOP(GETSOCKNAME, do_print_sockaddr),
+    PRINT_SOCKOP(GETPEERNAME, do_print_sockaddr),
+    PRINT_SOCKOP(SOCKETPAIR, do_print_socketpair),
+    PRINT_SOCKOP(SEND, do_print_sendrecv),
+    PRINT_SOCKOP(RECV, do_print_sendrecv),
+    PRINT_SOCKOP(SENDTO, do_print_msgaddr),
+    PRINT_SOCKOP(RECVFROM, do_print_msgaddr),
+    PRINT_SOCKOP(SHUTDOWN, do_print_shutdown),
+    PRINT_SOCKOP(SETSOCKOPT, do_print_sockopt),
+    PRINT_SOCKOP(GETSOCKOPT, do_print_sockopt),
+    PRINT_SOCKOP(SENDMSG, do_print_msg),
+    PRINT_SOCKOP(RECVMSG, do_print_msg),
+    PRINT_SOCKOP(ACCEPT4, NULL),
+    PRINT_SOCKOP(RECVMMSG, NULL),
+    PRINT_SOCKOP(SENDMMSG, NULL),
 };
 
 static void
@@ -1684,6 +1903,78 @@ print_rt_sigprocmask(const struct syscallname *name,
 }
 #endif
 
+#ifdef TARGET_NR_syslog
+static void
+print_syslog_action(abi_ulong arg, int last)
+{
+    const char *type;
+
+    switch (arg) {
+        case TARGET_SYSLOG_ACTION_CLOSE: {
+            type = "SYSLOG_ACTION_CLOSE";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_OPEN: {
+            type = "SYSLOG_ACTION_OPEN";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_READ: {
+            type = "SYSLOG_ACTION_READ";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_READ_ALL: {
+            type = "SYSLOG_ACTION_READ_ALL";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_READ_CLEAR: {
+            type = "SYSLOG_ACTION_READ_CLEAR";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_CLEAR: {
+            type = "SYSLOG_ACTION_CLEAR";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_CONSOLE_OFF: {
+            type = "SYSLOG_ACTION_CONSOLE_OFF";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_CONSOLE_ON: {
+            type = "SYSLOG_ACTION_CONSOLE_ON";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_CONSOLE_LEVEL: {
+            type = "SYSLOG_ACTION_CONSOLE_LEVEL";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_SIZE_UNREAD: {
+            type = "SYSLOG_ACTION_SIZE_UNREAD";
+            break;
+        }
+        case TARGET_SYSLOG_ACTION_SIZE_BUFFER: {
+            type = "SYSLOG_ACTION_SIZE_BUFFER";
+            break;
+        }
+        default: {
+            print_raw_param("%ld", arg, last);
+            return;
+        }
+    }
+    gemu_log("%s%s", type, get_comma(last));
+}
+
+static void
+print_syslog(const struct syscallname *name,
+    abi_long arg0, abi_long arg1, abi_long arg2,
+    abi_long arg3, abi_long arg4, abi_long arg5)
+{
+    print_syscall_prologue(name);
+    print_syslog_action(arg0, 0);
+    print_pointer(arg1, 0);
+    print_raw_param("%d", arg2, 1);
+    print_syscall_epilogue(name);
+}
+#endif
+
 #ifdef TARGET_NR_mknod
 static void
 print_mknod(const struct syscallname *name,
@@ -2190,3 +2481,15 @@ print_syscall_ret(int num, abi_long ret)
             break;
         }
 }
+
+void print_taken_signal(int target_signum, const target_siginfo_t *tinfo)
+{
+    /* Print the strace output for a signal being taken:
+     * --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
+     */
+    gemu_log("--- ");
+    print_signal(target_signum, 1);
+    gemu_log(" ");
+    print_siginfo(tinfo);
+    gemu_log(" ---\n");
+}
diff --git a/linux-user/strace.list b/linux-user/strace.list
index aa967a2475..3b1282ec1a 100644
--- a/linux-user/strace.list
+++ b/linux-user/strace.list
@@ -6,6 +6,9 @@
 #ifdef TARGET_NR_accept
 { TARGET_NR_accept, "accept" , NULL, print_accept, NULL },
 #endif
+#ifdef TARGET_NR_accept4
+{ TARGET_NR_accept4, "accept4" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_access
 { TARGET_NR_access, "access" , NULL, print_access, NULL },
 #endif
@@ -16,7 +19,8 @@
 { TARGET_NR_add_key, "add_key" , NULL, NULL, NULL },
 #endif
 #ifdef TARGET_NR_adjtimex
-{ TARGET_NR_adjtimex, "adjtimex" , NULL, NULL, NULL },
+{ TARGET_NR_adjtimex, "adjtimex" , "%s(%p)", NULL,
+                      print_syscall_ret_adjtimex },
 #endif
 #ifdef TARGET_NR_afs_syscall
 { TARGET_NR_afs_syscall, "afs_syscall" , NULL, NULL, NULL },
@@ -39,6 +43,9 @@
 #ifdef TARGET_NR_bind
 { TARGET_NR_bind, "bind" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_bpf
+{ TARGET_NR_bpf, "bpf" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_break
 { TARGET_NR_break, "break" , NULL, NULL, NULL },
 #endif
@@ -72,6 +79,9 @@
 #ifdef TARGET_NR_chroot
 { TARGET_NR_chroot, "chroot" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_clock_adjtime
+{ TARGET_NR_clock_adjtime, "clock_adjtime" , NULL, print_clock_adjtime, NULL },
+#endif
 #ifdef TARGET_NR_clock_getres
 { TARGET_NR_clock_getres, "clock_getres" , NULL, NULL, NULL },
 #endif
@@ -123,18 +133,30 @@
 #ifdef TARGET_NR_epoll_ctl_old
 { TARGET_NR_epoll_ctl_old, "epoll_ctl_old" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_epoll_pwait
+{ TARGET_NR_epoll_pwait, "epoll_pwait" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_epoll_wait
 { TARGET_NR_epoll_wait, "epoll_wait" , NULL, NULL, NULL },
 #endif
 #ifdef TARGET_NR_epoll_wait_old
 { TARGET_NR_epoll_wait_old, "epoll_wait_old" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_eventfd
+{ TARGET_NR_eventfd, "eventfd" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_eventfd2
+{ TARGET_NR_eventfd2, "eventfd2" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_execv
 { TARGET_NR_execv, "execv" , NULL, print_execv, NULL },
 #endif
 #ifdef TARGET_NR_execve
 { TARGET_NR_execve, "execve" , NULL, print_execve, NULL },
 #endif
+#ifdef TARGET_NR_execveat
+{ TARGET_NR_execveat, "execveat" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_exec_with_loader
 { TARGET_NR_exec_with_loader, "exec_with_loader" , NULL, NULL, NULL },
 #endif
@@ -156,6 +178,15 @@
 #ifdef TARGET_NR_fadvise64_64
 { TARGET_NR_fadvise64_64, "fadvise64_64" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_fallocate
+{ TARGET_NR_fallocate, "fallocate" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_fanotify_init
+{ TARGET_NR_fanotify_init, "fanotify_init" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_fanotify_mark
+{ TARGET_NR_fanotify_mark, "fanotify_mark" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_fchdir
 { TARGET_NR_fchdir, "fchdir" , NULL, NULL, NULL },
 #endif
@@ -186,6 +217,9 @@
 #ifdef TARGET_NR_fgetxattr
 { TARGET_NR_fgetxattr, "fgetxattr" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_finit_module
+{ TARGET_NR_finit_module, "finit_module" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_flistxattr
 { TARGET_NR_flistxattr, "flistxattr" , NULL, NULL, NULL },
 #endif
@@ -231,6 +265,9 @@
 #ifdef TARGET_NR_futimesat
 { TARGET_NR_futimesat, "futimesat" , NULL, print_futimesat, NULL },
 #endif
+#ifdef TARGET_NR_getcpu
+{ TARGET_NR_getcpu, "getcpu" , "%s(%p,%d)", NULL, NULL },
+#endif
 #ifdef TARGET_NR_getcwd
 { TARGET_NR_getcwd, "getcwd" , "%s(%p,%d)", NULL, NULL },
 #endif
@@ -306,6 +343,9 @@
 #ifdef TARGET_NR_getpriority
 { TARGET_NR_getpriority, "getpriority", "%s(%#x,%#x)", NULL, NULL },
 #endif
+#ifdef TARGET_NR_getrandom
+{ TARGET_NR_getrandom, "getrandom", NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_getresgid
 { TARGET_NR_getresgid, "getresgid" , NULL, NULL, NULL },
 #endif
@@ -379,6 +419,9 @@
 #ifdef TARGET_NR_inotify_init
 { TARGET_NR_inotify_init, "inotify_init" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_inotify_init1
+{ TARGET_NR_inotify_init1, "inotify_init1" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_inotify_rm_watch
 { TARGET_NR_inotify_rm_watch, "inotify_rm_watch" , NULL, NULL, NULL },
 #endif
@@ -415,6 +458,9 @@
 #ifdef TARGET_NR_ipc
 { TARGET_NR_ipc, "ipc" , NULL, print_ipc, NULL },
 #endif
+#ifdef TARGET_NR_kcmp
+{ TARGET_NR_kcmp, "kcmp" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_kexec_load
 { TARGET_NR_kexec_load, "kexec_load" , NULL, NULL, NULL },
 #endif
@@ -484,6 +530,12 @@
 #ifdef TARGET_NR_mbind
 { TARGET_NR_mbind, "mbind" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_membarrier
+{ TARGET_NR_membarrier, "membarrier" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_memfd_create
+{ TARGET_NR_memfd_create, "memfd_create" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_memory_ordering
 { TARGET_NR_memory_ordering, "memory_ordering" , NULL, NULL, NULL },
 #endif
@@ -511,6 +563,9 @@
 #ifdef TARGET_NR_mlock
 { TARGET_NR_mlock, "mlock" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_mlock2
+{ TARGET_NR_mlock2, "mlock2" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_mlockall
 { TARGET_NR_mlockall, "mlockall" , NULL, NULL, NULL },
 #endif
@@ -583,6 +638,9 @@
 #ifdef TARGET_NR_munmap
 { TARGET_NR_munmap, "munmap" , NULL, print_munmap, NULL },
 #endif
+#ifdef TARGET_NR_name_to_handle_at
+{ TARGET_NR_name_to_handle_at, "name_to_handle_at" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_nanosleep
 { TARGET_NR_nanosleep, "nanosleep" , NULL, NULL, NULL },
 #endif
@@ -952,6 +1010,9 @@
 #ifdef TARGET_NR_pciconfig_write
 { TARGET_NR_pciconfig_write, "pciconfig_write" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_perf_event_open
+{ TARGET_NR_perf_event_open, "perf_event_open" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_perfctr
 { TARGET_NR_perfctr, "perfctr" , NULL, NULL, NULL },
 #endif
@@ -976,6 +1037,18 @@
 #ifdef TARGET_NR_pread64
 { TARGET_NR_pread64, "pread64" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_preadv
+{ TARGET_NR_preadv, "preadv" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_prlimit64
+{ TARGET_NR_prlimit64, "prlimit64" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_process_vm_readv
+{ TARGET_NR_process_vm_readv, "process_vm_readv" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_process_vm_writev
+{ TARGET_NR_process_vm_writev, "process_vm_writev" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_prof
 { TARGET_NR_prof, "prof" , NULL, NULL, NULL },
 #endif
@@ -994,6 +1067,9 @@
 #ifdef TARGET_NR_pwrite64
 { TARGET_NR_pwrite64, "pwrite64" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_pwritev
+{ TARGET_NR_pwritev, "pwritev" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_query_module
 { TARGET_NR_query_module, "query_module" , NULL, NULL, NULL },
 #endif
@@ -1027,6 +1103,9 @@
 #ifdef TARGET_NR_recvfrom
 { TARGET_NR_recvfrom, "recvfrom" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_recvmmsg
+{ TARGET_NR_recvmmsg, "recvmmsg" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_recvmsg
 { TARGET_NR_recvmsg, "recvmsg" , NULL, NULL, NULL },
 #endif
@@ -1042,9 +1121,18 @@
 #ifdef TARGET_NR_renameat
 { TARGET_NR_renameat, "renameat" , NULL, print_renameat, NULL },
 #endif
+#ifdef TARGET_NR_renameat2
+{ TARGET_NR_renameat2, "renameat2" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_request_key
 { TARGET_NR_request_key, "request_key" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_reserved177
+{ TARGET_NR_reserved177, "reserved177" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_reserved193
+{ TARGET_NR_reserved193, "reserved193" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_reserved221
 { TARGET_NR_reserved221, "reserved221" , NULL, NULL, NULL },
 #endif
@@ -1078,12 +1166,18 @@
 #ifdef TARGET_NR_rt_sigtimedwait
 { TARGET_NR_rt_sigtimedwait, "rt_sigtimedwait" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_rt_tgsigqueueinfo
+{ TARGET_NR_rt_tgsigqueueinfo, "rt_tgsigqueueinfo" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_sched_getaffinity
 { TARGET_NR_sched_getaffinity, "sched_getaffinity" , NULL, NULL, NULL },
 #endif
 #ifdef TARGET_NR_sched_get_affinity
 { TARGET_NR_sched_get_affinity, "sched_get_affinity" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_sched_getattr
+{ TARGET_NR_sched_getattr, "sched_getattr" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_sched_getparam
 { TARGET_NR_sched_getparam, "sched_getparam" , NULL, NULL, NULL },
 #endif
@@ -1102,6 +1196,9 @@
 #ifdef TARGET_NR_sched_setaffinity
 { TARGET_NR_sched_setaffinity, "sched_setaffinity" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_sched_setatt
+{ TARGET_NR_sched_setatt, "sched_setatt" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_sched_set_affinity
 { TARGET_NR_sched_set_affinity, "sched_set_affinity" , NULL, NULL, NULL },
 #endif
@@ -1114,6 +1211,9 @@
 #ifdef TARGET_NR_sched_yield
 { TARGET_NR_sched_yield, "sched_yield" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_seccomp
+{ TARGET_NR_seccomp, "seccomp" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_security
 { TARGET_NR_security, "security" , NULL, NULL, NULL },
 #endif
@@ -1141,6 +1241,9 @@
 #ifdef TARGET_NR_sendfile64
 { TARGET_NR_sendfile64, "sendfile64" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_sendmmsg
+{ TARGET_NR_sendmmsg, "sendmmsg" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_sendmsg
 { TARGET_NR_sendmsg, "sendmsg" , NULL, NULL, NULL },
 #endif
@@ -1280,6 +1383,12 @@
 #ifdef TARGET_NR_signal
 { TARGET_NR_signal, "signal" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_signalfd
+{ TARGET_NR_signalfd, "signalfd" , NULL, NULL, NULL },
+#endif
+#ifdef TARGET_NR_signalfd4
+{ TARGET_NR_signalfd4, "signalfd4" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_sigpending
 { TARGET_NR_sigpending, "sigpending" , NULL, NULL, NULL },
 #endif
@@ -1349,8 +1458,8 @@
 #ifdef TARGET_NR_sync
 { TARGET_NR_sync, "sync" , NULL, NULL, NULL },
 #endif
-#ifdef TARGET_NR_sync_file_range
-{ TARGET_NR_sync_file_range, "sync_file_range" , NULL, NULL, NULL },
+#ifdef TARGET_NR_syncfs
+{ TARGET_NR_syncfs, "syncfs" , "%s(%d)", NULL, NULL },
 #endif
 #ifdef TARGET_NR_syscall
 { TARGET_NR_syscall, "syscall" , NULL, NULL, NULL },
@@ -1377,7 +1486,7 @@
 { TARGET_NR_sys_kexec_load, "sys_kexec_load" , NULL, NULL, NULL },
 #endif
 #ifdef TARGET_NR_syslog
-{ TARGET_NR_syslog, "syslog" , NULL, NULL, NULL },
+{ TARGET_NR_syslog, "syslog" , NULL, print_syslog, NULL },
 #endif
 #ifdef TARGET_NR_sysmips
 { TARGET_NR_sysmips, "sysmips" , NULL, NULL, NULL },
@@ -1409,6 +1518,9 @@
 #ifdef TARGET_NR_timer_settime
 { TARGET_NR_timer_settime, "timer_settime" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_timerfd
+{ TARGET_NR_timerfd, "timerfd" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_timerfd_create
 { TARGET_NR_timerfd_create, "timerfd_create" , NULL, NULL, NULL },
 #endif
@@ -1460,6 +1572,9 @@
 #ifdef TARGET_NR_unshare
 { TARGET_NR_unshare, "unshare" , NULL, NULL, NULL },
 #endif
+#ifdef TARGET_NR_userfaultfd
+{ TARGET_NR_userfaultfd, "userfaultfd" , NULL, NULL, NULL },
+#endif
 #ifdef TARGET_NR_unused109
 { TARGET_NR_unused109, "unused109" , NULL, NULL, NULL },
 #endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index ca06943f3b..7b77503f94 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -35,6 +35,7 @@
 #include <sys/swap.h>
 #include <linux/capability.h>
 #include <sched.h>
+#include <sys/timex.h>
 #ifdef __ia64__
 int __clone2(int (*fn)(void *), void *child_stack_base,
              size_t stack_size, int flags, void *arg, ...);
@@ -42,11 +43,12 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <sys/socket.h>
 #include <sys/un.h>
 #include <sys/uio.h>
-#include <sys/poll.h>
+#include <poll.h>
 #include <sys/times.h>
 #include <sys/shm.h>
 #include <sys/sem.h>
 #include <sys/statfs.h>
+#include <time.h>
 #include <utime.h>
 #include <sys/sysinfo.h>
 #include <sys/signalfd.h>
@@ -112,8 +114,56 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
 
 #include "qemu.h"
 
-#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
-    CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
+#ifndef CLONE_IO
+#define CLONE_IO                0x80000000      /* Clone io context */
+#endif
+
+/* We can't directly call the host clone syscall, because this will
+ * badly confuse libc (breaking mutexes, for example). So we must
+ * divide clone flags into:
+ *  * flag combinations that look like pthread_create()
+ *  * flag combinations that look like fork()
+ *  * flags we can implement within QEMU itself
+ *  * flags we can't support and will return an error for
+ */
+/* For thread creation, all these flags must be present; for
+ * fork, none must be present.
+ */
+#define CLONE_THREAD_FLAGS                              \
+    (CLONE_VM | CLONE_FS | CLONE_FILES |                \
+     CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM)
+
+/* These flags are ignored:
+ * CLONE_DETACHED is now ignored by the kernel;
+ * CLONE_IO is just an optimisation hint to the I/O scheduler
+ */
+#define CLONE_IGNORED_FLAGS                     \
+    (CLONE_DETACHED | CLONE_IO)
+
+/* Flags for fork which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_FORK_FLAGS               \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID)
+
+/* Flags for thread creation which we can implement within QEMU itself */
+#define CLONE_OPTIONAL_THREAD_FLAGS                             \
+    (CLONE_SETTLS | CLONE_PARENT_SETTID |                       \
+     CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID | CLONE_PARENT)
+
+#define CLONE_INVALID_FORK_FLAGS                                        \
+    (~(CSIGNAL | CLONE_OPTIONAL_FORK_FLAGS | CLONE_IGNORED_FLAGS))
+
+#define CLONE_INVALID_THREAD_FLAGS                                      \
+    (~(CSIGNAL | CLONE_THREAD_FLAGS | CLONE_OPTIONAL_THREAD_FLAGS |     \
+       CLONE_IGNORED_FLAGS))
+
+/* CLONE_VFORK is special cased early in do_fork(). The other flag bits
+ * have almost all been allocated. We cannot support any of
+ * CLONE_NEWNS, CLONE_NEWCGROUP, CLONE_NEWUTS, CLONE_NEWIPC,
+ * CLONE_NEWUSER, CLONE_NEWPID, CLONE_NEWNET, CLONE_PTRACE, CLONE_UNTRACED.
+ * The checks against the invalid thread masks above will catch these.
+ * (The one remaining unallocated bit is 0x1000 which used to be CLONE_PID.)
+ */
 
 //#define DEBUG
 /* Define DEBUG_ERESTARTSYS to force every syscall to be restarted
@@ -256,6 +306,11 @@ _syscall3(int, ioprio_set, int, which, int, who, int, ioprio)
 _syscall3(int, getrandom, void *, buf, size_t, buflen, unsigned int, flags)
 #endif
 
+#if defined(TARGET_NR_kcmp) && defined(__NR_kcmp)
+_syscall5(int, kcmp, pid_t, pid1, pid_t, pid2, int, type,
+          unsigned long, idx1, unsigned long, idx2)
+#endif
+
 static bitmask_transtbl fcntl_flags_tbl[] = {
   { TARGET_O_ACCMODE,   TARGET_O_WRONLY,    O_ACCMODE,   O_WRONLY,    },
   { TARGET_O_ACCMODE,   TARGET_O_RDWR,      O_ACCMODE,   O_RDWR,      },
@@ -520,16 +575,7 @@ static int sys_getcwd1(char *buf, size_t size)
 }
 
 #ifdef TARGET_NR_utimensat
-#ifdef CONFIG_UTIMENSAT
-static int sys_utimensat(int dirfd, const char *pathname,
-    const struct timespec times[2], int flags)
-{
-    if (pathname == NULL)
-        return futimens(dirfd, times);
-    else
-        return utimensat(dirfd, pathname, times, flags);
-}
-#elif defined(__NR_utimensat)
+#if defined(__NR_utimensat)
 #define __NR_sys_utimensat __NR_utimensat
 _syscall4(int,sys_utimensat,int,dirfd,const char *,pathname,
           const struct timespec *,tsp,int,flags)
@@ -619,7 +665,7 @@ static inline int next_free_host_timer(void)
 static inline int regpairs_aligned(void *cpu_env) {
     return ((((CPUARMState *)cpu_env)->eabi) == 1) ;
 }
-#elif defined(TARGET_MIPS)
+#elif defined(TARGET_MIPS) && (TARGET_ABI_BITS == 32)
 static inline int regpairs_aligned(void *cpu_env) { return 1; }
 #elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
 /* SysV AVI for PPC32 expects 64bit parameters to be passed on odd/even pairs
@@ -718,6 +764,7 @@ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = {
     [ENAVAIL]		= TARGET_ENAVAIL,
     [EISNAM]		= TARGET_EISNAM,
     [EREMOTEIO]		= TARGET_EREMOTEIO,
+    [EDQUOT]            = TARGET_EDQUOT,
     [ESHUTDOWN]		= TARGET_ESHUTDOWN,
     [ETOOMANYREFS]	= TARGET_ETOOMANYREFS,
     [ETIMEDOUT]		= TARGET_ETIMEDOUT,
@@ -748,6 +795,9 @@ static uint16_t host_to_target_errno_table[ERRNO_TABLE_SIZE] = {
 #ifdef ENOTRECOVERABLE
     [ENOTRECOVERABLE]	= TARGET_ENOTRECOVERABLE,
 #endif
+#ifdef ENOMSG
+    [ENOMSG]            = TARGET_ENOMSG,
+#endif
 };
 
 static inline int host_to_target_errno(int err)
@@ -868,6 +918,10 @@ safe_syscall2(int, tkill, int, tid, int, sig)
 safe_syscall3(int, tgkill, int, tgid, int, pid, int, sig)
 safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt)
 safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, iovcnt)
+safe_syscall5(ssize_t, preadv, int, fd, const struct iovec *, iov, int, iovcnt,
+              unsigned long, pos_l, unsigned long, pos_h)
+safe_syscall5(ssize_t, pwritev, int, fd, const struct iovec *, iov, int, iovcnt,
+              unsigned long, pos_l, unsigned long, pos_h)
 safe_syscall3(int, connect, int, fd, const struct sockaddr *, addr,
               socklen_t, addrlen)
 safe_syscall6(ssize_t, sendto, int, fd, const void *, buf, size_t, len,
@@ -1405,6 +1459,29 @@ static abi_long do_select(int n,
 
     return ret;
 }
+
+#if defined(TARGET_WANT_OLD_SYS_SELECT)
+static abi_long do_old_select(abi_ulong arg1)
+{
+    struct target_sel_arg_struct *sel;
+    abi_ulong inp, outp, exp, tvp;
+    long nsel;
+
+    if (!lock_user_struct(VERIFY_READ, sel, arg1, 1)) {
+        return -TARGET_EFAULT;
+    }
+
+    nsel = tswapal(sel->n);
+    inp = tswapal(sel->inp);
+    outp = tswapal(sel->outp);
+    exp = tswapal(sel->exp);
+    tvp = tswapal(sel->tvp);
+
+    unlock_user_struct(sel, arg1, 0);
+
+    return do_select(nsel, inp, outp, exp, tvp);
+}
+#endif
 #endif
 
 static abi_long do_pipe2(int host_pipe[], int flags)
@@ -2528,6 +2605,7 @@ static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr)
     case RTA_GATEWAY:
         break;
     /* u32 */
+    case RTA_PRIORITY:
     case RTA_OIF:
         u32 = RTA_DATA(rtattr);
         *u32 = tswap32(*u32);
@@ -3119,7 +3197,7 @@ static abi_long do_getsockopt(int sockfd, int level, int optname,
 }
 
 static struct iovec *lock_iovec(int type, abi_ulong target_addr,
-                                int count, int copy)
+                                abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     struct iovec *vec;
@@ -3132,7 +3210,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
         errno = 0;
         return NULL;
     }
-    if (count < 0 || count > IOV_MAX) {
+    if (count > IOV_MAX) {
         errno = EINVAL;
         return NULL;
     }
@@ -3207,7 +3285,7 @@ static struct iovec *lock_iovec(int type, abi_ulong target_addr,
 }
 
 static void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
-                         int count, int copy)
+                         abi_ulong count, int copy)
 {
     struct target_iovec *target_vec;
     int i;
@@ -3462,7 +3540,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 {
     abi_long ret, len;
     struct msghdr msg;
-    int count;
+    abi_ulong count;
     struct iovec *vec;
     abi_ulong target_vec;
 
@@ -3472,7 +3550,14 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
         ret = target_to_host_sockaddr(fd, msg.msg_name,
                                       tswapal(msgp->msg_name),
                                       msg.msg_namelen);
-        if (ret) {
+        if (ret == -TARGET_EFAULT) {
+            /* For connected sockets msg_name and msg_namelen must
+             * be ignored, so returning EFAULT immediately is wrong.
+             * Instead, pass a bad msg_name to the host kernel, and
+             * let it decide whether to return EFAULT or not.
+             */
+            msg.msg_name = (void *)-1;
+        } else if (ret) {
             goto out2;
         }
     } else {
@@ -3485,6 +3570,15 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
 
     count = tswapal(msgp->msg_iovlen);
     target_vec = tswapal(msgp->msg_iov);
+
+    if (count > IOV_MAX) {
+        /* sendrcvmsg returns a different errno for this condition than
+         * readv/writev, so we must catch it here before lock_iovec() does.
+         */
+        ret = -TARGET_EMSGSIZE;
+        goto out2;
+    }
+
     vec = lock_iovec(send ? VERIFY_READ : VERIFY_WRITE,
                      target_vec, count, send);
     if (vec == NULL) {
@@ -3525,7 +3619,7 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp,
             }
             if (!is_error(ret)) {
                 msgp->msg_namelen = tswap32(msg.msg_namelen);
-                if (msg.msg_name != NULL) {
+                if (msg.msg_name != NULL && msg.msg_name != (void *)-1) {
                     ret = host_to_target_sockaddr(tswapal(msgp->msg_name),
                                     msg.msg_name, msg.msg_namelen);
                     if (ret) {
@@ -3814,89 +3908,94 @@ static abi_long do_recvfrom(int fd, abi_ulong msg, size_t len, int flags,
 }
 
 #ifdef TARGET_NR_socketcall
-/* do_socketcall() Must return target values and target errnos. */
+/* do_socketcall() must return target values and target errnos. */
 static abi_long do_socketcall(int num, abi_ulong vptr)
 {
-    static const unsigned ac[] = { /* number of arguments per call */
-        [SOCKOP_socket] = 3,      /* domain, type, protocol */
-        [SOCKOP_bind] = 3,        /* sockfd, addr, addrlen */
-        [SOCKOP_connect] = 3,     /* sockfd, addr, addrlen */
-        [SOCKOP_listen] = 2,      /* sockfd, backlog */
-        [SOCKOP_accept] = 3,      /* sockfd, addr, addrlen */
-        [SOCKOP_accept4] = 4,     /* sockfd, addr, addrlen, flags */
-        [SOCKOP_getsockname] = 3, /* sockfd, addr, addrlen */
-        [SOCKOP_getpeername] = 3, /* sockfd, addr, addrlen */
-        [SOCKOP_socketpair] = 4,  /* domain, type, protocol, tab */
-        [SOCKOP_send] = 4,        /* sockfd, msg, len, flags */
-        [SOCKOP_recv] = 4,        /* sockfd, msg, len, flags */
-        [SOCKOP_sendto] = 6,      /* sockfd, msg, len, flags, addr, addrlen */
-        [SOCKOP_recvfrom] = 6,    /* sockfd, msg, len, flags, addr, addrlen */
-        [SOCKOP_shutdown] = 2,    /* sockfd, how */
-        [SOCKOP_sendmsg] = 3,     /* sockfd, msg, flags */
-        [SOCKOP_recvmsg] = 3,     /* sockfd, msg, flags */
-        [SOCKOP_sendmmsg] = 4,    /* sockfd, msgvec, vlen, flags */
-        [SOCKOP_recvmmsg] = 4,    /* sockfd, msgvec, vlen, flags */
-        [SOCKOP_setsockopt] = 5,  /* sockfd, level, optname, optval, optlen */
-        [SOCKOP_getsockopt] = 5,  /* sockfd, level, optname, optval, optlen */
+    static const unsigned nargs[] = { /* number of arguments per operation */
+        [TARGET_SYS_SOCKET] = 3,      /* domain, type, protocol */
+        [TARGET_SYS_BIND] = 3,        /* fd, addr, addrlen */
+        [TARGET_SYS_CONNECT] = 3,     /* fd, addr, addrlen */
+        [TARGET_SYS_LISTEN] = 2,      /* fd, backlog */
+        [TARGET_SYS_ACCEPT] = 3,      /* fd, addr, addrlen */
+        [TARGET_SYS_GETSOCKNAME] = 3, /* fd, addr, addrlen */
+        [TARGET_SYS_GETPEERNAME] = 3, /* fd, addr, addrlen */
+        [TARGET_SYS_SOCKETPAIR] = 4,  /* domain, type, protocol, tab */
+        [TARGET_SYS_SEND] = 4,        /* fd, msg, len, flags */
+        [TARGET_SYS_RECV] = 4,        /* fd, msg, len, flags */
+        [TARGET_SYS_SENDTO] = 6,      /* fd, msg, len, flags, addr, addrlen */
+        [TARGET_SYS_RECVFROM] = 6,    /* fd, msg, len, flags, addr, addrlen */
+        [TARGET_SYS_SHUTDOWN] = 2,    /* fd, how */
+        [TARGET_SYS_SETSOCKOPT] = 5,  /* fd, level, optname, optval, optlen */
+        [TARGET_SYS_GETSOCKOPT] = 5,  /* fd, level, optname, optval, optlen */
+        [TARGET_SYS_SENDMSG] = 3,     /* fd, msg, flags */
+        [TARGET_SYS_RECVMSG] = 3,     /* fd, msg, flags */
+        [TARGET_SYS_ACCEPT4] = 4,     /* fd, addr, addrlen, flags */
+        [TARGET_SYS_RECVMMSG] = 4,    /* fd, msgvec, vlen, flags */
+        [TARGET_SYS_SENDMMSG] = 4,    /* fd, msgvec, vlen, flags */
     };
     abi_long a[6]; /* max 6 args */
+    unsigned i;
 
-    /* first, collect the arguments in a[] according to ac[] */
-    if (num >= 0 && num < ARRAY_SIZE(ac)) {
-        unsigned i;
-        assert(ARRAY_SIZE(a) >= ac[num]); /* ensure we have space for args */
-        for (i = 0; i < ac[num]; ++i) {
-            if (get_user_ual(a[i], vptr + i * sizeof(abi_long)) != 0) {
-                return -TARGET_EFAULT;
-            }
+    /* check the range of the first argument num */
+    /* (TARGET_SYS_SENDMMSG is the highest among TARGET_SYS_xxx) */
+    if (num < 1 || num > TARGET_SYS_SENDMMSG) {
+        return -TARGET_EINVAL;
+    }
+    /* ensure we have space for args */
+    if (nargs[num] > ARRAY_SIZE(a)) {
+        return -TARGET_EINVAL;
+    }
+    /* collect the arguments in a[] according to nargs[] */
+    for (i = 0; i < nargs[num]; ++i) {
+        if (get_user_ual(a[i], vptr + i * sizeof(abi_long)) != 0) {
+            return -TARGET_EFAULT;
         }
     }
-
-    /* now when we have the args, actually handle the call */
+    /* now when we have the args, invoke the appropriate underlying function */
     switch (num) {
-    case SOCKOP_socket: /* domain, type, protocol */
+    case TARGET_SYS_SOCKET: /* domain, type, protocol */
         return do_socket(a[0], a[1], a[2]);
-    case SOCKOP_bind: /* sockfd, addr, addrlen */
+    case TARGET_SYS_BIND: /* sockfd, addr, addrlen */
         return do_bind(a[0], a[1], a[2]);
-    case SOCKOP_connect: /* sockfd, addr, addrlen */
+    case TARGET_SYS_CONNECT: /* sockfd, addr, addrlen */
         return do_connect(a[0], a[1], a[2]);
-    case SOCKOP_listen: /* sockfd, backlog */
+    case TARGET_SYS_LISTEN: /* sockfd, backlog */
         return get_errno(listen(a[0], a[1]));
-    case SOCKOP_accept: /* sockfd, addr, addrlen */
+    case TARGET_SYS_ACCEPT: /* sockfd, addr, addrlen */
         return do_accept4(a[0], a[1], a[2], 0);
-    case SOCKOP_accept4: /* sockfd, addr, addrlen, flags */
-        return do_accept4(a[0], a[1], a[2], a[3]);
-    case SOCKOP_getsockname: /* sockfd, addr, addrlen */
+    case TARGET_SYS_GETSOCKNAME: /* sockfd, addr, addrlen */
         return do_getsockname(a[0], a[1], a[2]);
-    case SOCKOP_getpeername: /* sockfd, addr, addrlen */
+    case TARGET_SYS_GETPEERNAME: /* sockfd, addr, addrlen */
         return do_getpeername(a[0], a[1], a[2]);
-    case SOCKOP_socketpair: /* domain, type, protocol, tab */
+    case TARGET_SYS_SOCKETPAIR: /* domain, type, protocol, tab */
         return do_socketpair(a[0], a[1], a[2], a[3]);
-    case SOCKOP_send: /* sockfd, msg, len, flags */
+    case TARGET_SYS_SEND: /* sockfd, msg, len, flags */
         return do_sendto(a[0], a[1], a[2], a[3], 0, 0);
-    case SOCKOP_recv: /* sockfd, msg, len, flags */
+    case TARGET_SYS_RECV: /* sockfd, msg, len, flags */
         return do_recvfrom(a[0], a[1], a[2], a[3], 0, 0);
-    case SOCKOP_sendto: /* sockfd, msg, len, flags, addr, addrlen */
+    case TARGET_SYS_SENDTO: /* sockfd, msg, len, flags, addr, addrlen */
         return do_sendto(a[0], a[1], a[2], a[3], a[4], a[5]);
-    case SOCKOP_recvfrom: /* sockfd, msg, len, flags, addr, addrlen */
+    case TARGET_SYS_RECVFROM: /* sockfd, msg, len, flags, addr, addrlen */
         return do_recvfrom(a[0], a[1], a[2], a[3], a[4], a[5]);
-    case SOCKOP_shutdown: /* sockfd, how */
+    case TARGET_SYS_SHUTDOWN: /* sockfd, how */
         return get_errno(shutdown(a[0], a[1]));
-    case SOCKOP_sendmsg: /* sockfd, msg, flags */
+    case TARGET_SYS_SETSOCKOPT: /* sockfd, level, optname, optval, optlen */
+        return do_setsockopt(a[0], a[1], a[2], a[3], a[4]);
+    case TARGET_SYS_GETSOCKOPT: /* sockfd, level, optname, optval, optlen */
+        return do_getsockopt(a[0], a[1], a[2], a[3], a[4]);
+    case TARGET_SYS_SENDMSG: /* sockfd, msg, flags */
         return do_sendrecvmsg(a[0], a[1], a[2], 1);
-    case SOCKOP_recvmsg: /* sockfd, msg, flags */
+    case TARGET_SYS_RECVMSG: /* sockfd, msg, flags */
         return do_sendrecvmsg(a[0], a[1], a[2], 0);
-    case SOCKOP_sendmmsg: /* sockfd, msgvec, vlen, flags */
-        return do_sendrecvmmsg(a[0], a[1], a[2], a[3], 1);
-    case SOCKOP_recvmmsg: /* sockfd, msgvec, vlen, flags */
+    case TARGET_SYS_ACCEPT4: /* sockfd, addr, addrlen, flags */
+        return do_accept4(a[0], a[1], a[2], a[3]);
+    case TARGET_SYS_RECVMMSG: /* sockfd, msgvec, vlen, flags */
         return do_sendrecvmmsg(a[0], a[1], a[2], a[3], 0);
-    case SOCKOP_setsockopt: /* sockfd, level, optname, optval, optlen */
-        return do_setsockopt(a[0], a[1], a[2], a[3], a[4]);
-    case SOCKOP_getsockopt: /* sockfd, level, optname, optval, optlen */
-        return do_getsockopt(a[0], a[1], a[2], a[3], a[4]);
+    case TARGET_SYS_SENDMMSG: /* sockfd, msgvec, vlen, flags */
+        return do_sendrecvmmsg(a[0], a[1], a[2], a[3], 1);
     default:
         gemu_log("Unsupported socketcall: %d\n", num);
-        return -TARGET_ENOSYS;
+        return -TARGET_EINVAL;
     }
 }
 #endif
@@ -4568,12 +4667,34 @@ static inline abi_long do_shmctl(int shmid, int cmd, abi_long buf)
     return ret;
 }
 
-static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
+#ifndef TARGET_FORCE_SHMLBA
+/* For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
+{
+    return TARGET_PAGE_SIZE;
+}
+#endif
+
+static inline abi_ulong do_shmat(CPUArchState *cpu_env,
+                                 int shmid, abi_ulong shmaddr, int shmflg)
 {
     abi_long raddr;
     void *host_raddr;
     struct shmid_ds shm_info;
     int i,ret;
+    abi_ulong shmlba;
 
     /* find out the length of the shared memory segment */
     ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
@@ -4582,6 +4703,16 @@ static inline abi_ulong do_shmat(int shmid, abi_ulong shmaddr, int shmflg)
         return ret;
     }
 
+    shmlba = target_shmlba(cpu_env);
+
+    if (shmaddr & (shmlba - 1)) {
+        if (shmflg & SHM_RND) {
+            shmaddr &= ~(shmlba - 1);
+        } else {
+            return -TARGET_EINVAL;
+        }
+    }
+
     mmap_lock();
 
     if (shmaddr)
@@ -4640,7 +4771,8 @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
 #ifdef TARGET_NR_ipc
 /* ??? This only works with linear mappings.  */
 /* do_ipc() must return target values and target errnos. */
-static abi_long do_ipc(unsigned int call, abi_long first,
+static abi_long do_ipc(CPUArchState *cpu_env,
+                       unsigned int call, abi_long first,
                        abi_long second, abi_long third,
                        abi_long ptr, abi_long fifth)
 {
@@ -4709,7 +4841,7 @@ static abi_long do_ipc(unsigned int call, abi_long first,
         default:
         {
             abi_ulong raddr;
-            raddr = do_shmat(first, ptr, second);
+            raddr = do_shmat(cpu_env, first, ptr, second);
             if (is_error(raddr))
                 return get_errno(raddr);
             if (put_user_ual(raddr, third))
@@ -4994,13 +5126,18 @@ static abi_long do_ioctl_dm(const IOCTLEntry *ie, uint8_t *buf_temp, int fd,
 
     guest_data = arg + host_dm->data_start;
     if ((guest_data - arg) < 0) {
-        ret = -EINVAL;
+        ret = -TARGET_EINVAL;
         goto out;
     }
     guest_data_size = host_dm->data_size - host_dm->data_start;
     host_data = (char*)host_dm + host_dm->data_start;
 
     argptr = lock_user(VERIFY_READ, guest_data, guest_data_size, 1);
+    if (!argptr) {
+        ret = -TARGET_EFAULT;
+        goto out;
+    }
+
     switch (ie->host_cmd) {
     case DM_REMOVE_ALL:
     case DM_LIST_DEVICES:
@@ -5966,9 +6103,10 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
     TaskState *ts;
     CPUState *new_cpu;
     CPUArchState *new_env;
-    unsigned int nptl_flags;
     sigset_t sigmask;
 
+    flags &= ~CLONE_IGNORED_FLAGS;
+
     /* Emulate vfork() with fork() */
     if (flags & CLONE_VFORK)
         flags &= ~(CLONE_VFORK | CLONE_VM);
@@ -5978,6 +6116,11 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         new_thread_info info;
         pthread_attr_t attr;
 
+        if (((flags & CLONE_THREAD_FLAGS) != CLONE_THREAD_FLAGS) ||
+            (flags & CLONE_INVALID_THREAD_FLAGS)) {
+            return -TARGET_EINVAL;
+        }
+
         ts = g_new0(TaskState, 1);
         init_task_state(ts);
         /* we create a new CPU instance. */
@@ -5989,15 +6132,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         ts->bprm = parent_ts->bprm;
         ts->info = parent_ts->info;
         ts->signal_mask = parent_ts->signal_mask;
-        nptl_flags = flags;
-        flags &= ~CLONE_NPTL_FLAGS2;
 
-        if (nptl_flags & CLONE_CHILD_CLEARTID) {
+        if (flags & CLONE_CHILD_CLEARTID) {
             ts->child_tidptr = child_tidptr;
         }
 
-        if (nptl_flags & CLONE_SETTLS)
+        if (flags & CLONE_SETTLS) {
             cpu_set_tls (new_env, newtls);
+        }
 
         /* Grab a mutex so that thread setup appears atomic.  */
         pthread_mutex_lock(&clone_lock);
@@ -6007,10 +6149,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_lock(&info.mutex);
         pthread_cond_init(&info.cond, NULL);
         info.env = new_env;
-        if (nptl_flags & CLONE_CHILD_SETTID)
+        if (flags & CLONE_CHILD_SETTID) {
             info.child_tidptr = child_tidptr;
-        if (nptl_flags & CLONE_PARENT_SETTID)
+        }
+        if (flags & CLONE_PARENT_SETTID) {
             info.parent_tidptr = parent_tidptr;
+        }
 
         ret = pthread_attr_init(&attr);
         ret = pthread_attr_setstacksize(&attr, NEW_STACK_SIZE);
@@ -6020,6 +6164,14 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         sigfillset(&sigmask);
         sigprocmask(SIG_BLOCK, &sigmask, &info.sigmask);
 
+        /* If this is our first additional thread, we need to ensure we
+         * generate code for parallel execution and flush old translations.
+         */
+        if (!parallel_cpus) {
+            parallel_cpus = true;
+            tb_flush(cpu);
+        }
+
         ret = pthread_create(&info.thread, &attr, clone_func, &info);
         /* TODO: Free new CPU state if thread creation failed.  */
 
@@ -6029,8 +6181,6 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
             /* Wait for the child to initialize.  */
             pthread_cond_wait(&info.cond, &info.mutex);
             ret = info.tid;
-            if (flags & CLONE_PARENT_SETTID)
-                put_user_u32(ret, parent_tidptr);
         } else {
             ret = -1;
         }
@@ -6040,7 +6190,12 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp,
         pthread_mutex_unlock(&clone_lock);
     } else {
         /* if no CLONE_VM, we consider it is a fork */
-        if ((flags & ~(CSIGNAL | CLONE_NPTL_FLAGS2)) != 0) {
+        if (flags & CLONE_INVALID_FORK_FLAGS) {
+            return -TARGET_EINVAL;
+        }
+
+        /* We can't support custom termination signals */
+        if ((flags & CSIGNAL) != TARGET_SIGCHLD) {
             return -TARGET_EINVAL;
         }
 
@@ -6643,6 +6798,77 @@ static inline abi_long host_to_target_itimerspec(abi_ulong target_addr,
     return 0;
 }
 
+static inline abi_long target_to_host_timex(struct timex *host_tx,
+                                            abi_long target_addr)
+{
+    struct target_timex *target_tx;
+
+    if (!lock_user_struct(VERIFY_READ, target_tx, target_addr, 1)) {
+        return -TARGET_EFAULT;
+    }
+
+    __get_user(host_tx->modes, &target_tx->modes);
+    __get_user(host_tx->offset, &target_tx->offset);
+    __get_user(host_tx->freq, &target_tx->freq);
+    __get_user(host_tx->maxerror, &target_tx->maxerror);
+    __get_user(host_tx->esterror, &target_tx->esterror);
+    __get_user(host_tx->status, &target_tx->status);
+    __get_user(host_tx->constant, &target_tx->constant);
+    __get_user(host_tx->precision, &target_tx->precision);
+    __get_user(host_tx->tolerance, &target_tx->tolerance);
+    __get_user(host_tx->time.tv_sec, &target_tx->time.tv_sec);
+    __get_user(host_tx->time.tv_usec, &target_tx->time.tv_usec);
+    __get_user(host_tx->tick, &target_tx->tick);
+    __get_user(host_tx->ppsfreq, &target_tx->ppsfreq);
+    __get_user(host_tx->jitter, &target_tx->jitter);
+    __get_user(host_tx->shift, &target_tx->shift);
+    __get_user(host_tx->stabil, &target_tx->stabil);
+    __get_user(host_tx->jitcnt, &target_tx->jitcnt);
+    __get_user(host_tx->calcnt, &target_tx->calcnt);
+    __get_user(host_tx->errcnt, &target_tx->errcnt);
+    __get_user(host_tx->stbcnt, &target_tx->stbcnt);
+    __get_user(host_tx->tai, &target_tx->tai);
+
+    unlock_user_struct(target_tx, target_addr, 0);
+    return 0;
+}
+
+static inline abi_long host_to_target_timex(abi_long target_addr,
+                                            struct timex *host_tx)
+{
+    struct target_timex *target_tx;
+
+    if (!lock_user_struct(VERIFY_WRITE, target_tx, target_addr, 0)) {
+        return -TARGET_EFAULT;
+    }
+
+    __put_user(host_tx->modes, &target_tx->modes);
+    __put_user(host_tx->offset, &target_tx->offset);
+    __put_user(host_tx->freq, &target_tx->freq);
+    __put_user(host_tx->maxerror, &target_tx->maxerror);
+    __put_user(host_tx->esterror, &target_tx->esterror);
+    __put_user(host_tx->status, &target_tx->status);
+    __put_user(host_tx->constant, &target_tx->constant);
+    __put_user(host_tx->precision, &target_tx->precision);
+    __put_user(host_tx->tolerance, &target_tx->tolerance);
+    __put_user(host_tx->time.tv_sec, &target_tx->time.tv_sec);
+    __put_user(host_tx->time.tv_usec, &target_tx->time.tv_usec);
+    __put_user(host_tx->tick, &target_tx->tick);
+    __put_user(host_tx->ppsfreq, &target_tx->ppsfreq);
+    __put_user(host_tx->jitter, &target_tx->jitter);
+    __put_user(host_tx->shift, &target_tx->shift);
+    __put_user(host_tx->stabil, &target_tx->stabil);
+    __put_user(host_tx->jitcnt, &target_tx->jitcnt);
+    __put_user(host_tx->calcnt, &target_tx->calcnt);
+    __put_user(host_tx->errcnt, &target_tx->errcnt);
+    __put_user(host_tx->stbcnt, &target_tx->stbcnt);
+    __put_user(host_tx->tai, &target_tx->tai);
+
+    unlock_user_struct(target_tx, target_addr, 1);
+    return 0;
+}
+
+
 static inline abi_long target_to_host_sigevent(struct sigevent *host_sevp,
                                                abi_ulong target_addr)
 {
@@ -7349,13 +7575,16 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             break;
         }
 
+        cpu_list_lock();
+
         if (CPU_NEXT(first_cpu)) {
             TaskState *ts;
 
-            cpu_list_lock();
             /* Remove the CPU from the list.  */
             QTAILQ_REMOVE(&cpus, cpu, node);
+
             cpu_list_unlock();
+
             ts = cpu->opaque;
             if (ts->child_tidptr) {
                 put_user_u32(0, ts->child_tidptr);
@@ -7368,6 +7597,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             rcu_unregister_thread();
             pthread_exit(NULL);
         }
+
+        cpu_list_unlock();
 #ifdef TARGET_GPROF
         _mcleanup();
 #endif
@@ -7871,6 +8102,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         sync();
         ret = 0;
         break;
+#if defined(TARGET_NR_syncfs) && defined(CONFIG_SYNCFS)
+    case TARGET_NR_syncfs:
+        ret = get_errno(syncfs(arg1));
+        break;
+#endif
     case TARGET_NR_kill:
         ret = get_errno(safe_kill(arg1, target_to_host_signal(arg2)));
         break;
@@ -8565,24 +8801,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #if defined(TARGET_NR_select)
     case TARGET_NR_select:
-#if defined(TARGET_S390X) || defined(TARGET_ALPHA)
-        ret = do_select(arg1, arg2, arg3, arg4, arg5);
+#if defined(TARGET_WANT_NI_OLD_SELECT)
+        /* some architectures used to have old_select here
+         * but now ENOSYS it.
+         */
+        ret = -TARGET_ENOSYS;
+#elif defined(TARGET_WANT_OLD_SYS_SELECT)
+        ret = do_old_select(arg1);
 #else
-        {
-            struct target_sel_arg_struct *sel;
-            abi_ulong inp, outp, exp, tvp;
-            long nsel;
-
-            if (!lock_user_struct(VERIFY_READ, sel, arg1, 1))
-                goto efault;
-            nsel = tswapal(sel->n);
-            inp = tswapal(sel->inp);
-            outp = tswapal(sel->outp);
-            exp = tswapal(sel->exp);
-            tvp = tswapal(sel->tvp);
-            unlock_user_struct(sel, arg1, 0);
-            ret = do_select(nsel, inp, outp, exp, tvp);
-        }
+        ret = do_select(arg1, arg2, arg3, arg4, arg5);
 #endif
         break;
 #endif
@@ -9117,14 +9344,52 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = do_setsockopt(arg1, arg2, arg3, arg4, (socklen_t) arg5);
         break;
 #endif
-
+#if defined(TARGET_NR_syslog)
     case TARGET_NR_syslog:
-        if (!(p = lock_user_string(arg2)))
-            goto efault;
-        ret = get_errno(sys_syslog((int)arg1, p, (int)arg3));
-        unlock_user(p, arg2, 0);
-        break;
+        {
+            int len = arg2;
 
+            switch (arg1) {
+            case TARGET_SYSLOG_ACTION_CLOSE:         /* Close log */
+            case TARGET_SYSLOG_ACTION_OPEN:          /* Open log */
+            case TARGET_SYSLOG_ACTION_CLEAR:         /* Clear ring buffer */
+            case TARGET_SYSLOG_ACTION_CONSOLE_OFF:   /* Disable logging */
+            case TARGET_SYSLOG_ACTION_CONSOLE_ON:    /* Enable logging */
+            case TARGET_SYSLOG_ACTION_CONSOLE_LEVEL: /* Set messages level */
+            case TARGET_SYSLOG_ACTION_SIZE_UNREAD:   /* Number of chars */
+            case TARGET_SYSLOG_ACTION_SIZE_BUFFER:   /* Size of the buffer */
+                {
+                    ret = get_errno(sys_syslog((int)arg1, NULL, (int)arg3));
+                }
+                break;
+            case TARGET_SYSLOG_ACTION_READ:          /* Read from log */
+            case TARGET_SYSLOG_ACTION_READ_CLEAR:    /* Read/clear msgs */
+            case TARGET_SYSLOG_ACTION_READ_ALL:      /* Read last messages */
+                {
+                    ret = -TARGET_EINVAL;
+                    if (len < 0) {
+                        goto fail;
+                    }
+                    ret = 0;
+                    if (len == 0) {
+                        break;
+                    }
+                    p = lock_user(VERIFY_WRITE, arg2, arg3, 0);
+                    if (!p) {
+                        ret = -TARGET_EFAULT;
+                        goto fail;
+                    }
+                    ret = get_errno(sys_syslog((int)arg1, p, (int)arg3));
+                    unlock_user(p, arg2, arg3);
+                }
+                break;
+            default:
+                ret = -EINVAL;
+                break;
+            }
+        }
+        break;
+#endif
     case TARGET_NR_setitimer:
         {
             struct itimerval value, ovalue, *pvalue;
@@ -9292,8 +9557,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         break;
 #ifdef TARGET_NR_ipc
     case TARGET_NR_ipc:
-	ret = do_ipc(arg1, arg2, arg3, arg4, arg5, arg6);
-	break;
+        ret = do_ipc(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+        break;
 #endif
 #ifdef TARGET_NR_semget
     case TARGET_NR_semget:
@@ -9342,7 +9607,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_shmat
     case TARGET_NR_shmat:
-        ret = do_shmat(arg1, arg2, arg3);
+        ret = do_shmat(cpu_env, arg1, arg2, arg3);
         break;
 #endif
 #ifdef TARGET_NR_shmdt
@@ -9420,7 +9685,37 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #endif
     case TARGET_NR_adjtimex:
-        goto unimplemented;
+        {
+            struct timex host_buf;
+
+            if (target_to_host_timex(&host_buf, arg1) != 0) {
+                goto efault;
+            }
+            ret = get_errno(adjtimex(&host_buf));
+            if (!is_error(ret)) {
+                if (host_to_target_timex(arg1, &host_buf) != 0) {
+                    goto efault;
+                }
+            }
+        }
+        break;
+#if defined(TARGET_NR_clock_adjtime) && defined(CONFIG_CLOCK_ADJTIME)
+    case TARGET_NR_clock_adjtime:
+        {
+            struct timex htx, *phtx = &htx;
+
+            if (target_to_host_timex(phtx, arg2) != 0) {
+                goto efault;
+            }
+            ret = get_errno(clock_adjtime(arg1, phtx));
+            if (!is_error(ret) && phtx) {
+                if (host_to_target_timex(arg2, phtx) != 0) {
+                    goto efault;
+                }
+            }
+        }
+        break;
+#endif
 #ifdef TARGET_NR_create_module
     case TARGET_NR_create_module:
 #endif
@@ -9654,6 +9949,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             pfd = NULL;
             target_pfd = NULL;
             if (nfds) {
+                if (nfds > (INT_MAX / sizeof(struct target_pollfd))) {
+                    ret = -TARGET_EINVAL;
+                    break;
+                }
+
                 target_pfd = lock_user(VERIFY_WRITE, arg1,
                                        sizeof(struct target_pollfd) * nfds, 1);
                 if (!target_pfd) {
@@ -9771,6 +10071,32 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             }
         }
         break;
+#if defined(TARGET_NR_preadv)
+    case TARGET_NR_preadv:
+        {
+            struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
+            if (vec != NULL) {
+                ret = get_errno(safe_preadv(arg1, vec, arg3, arg4, arg5));
+                unlock_iovec(vec, arg2, arg3, 1);
+            } else {
+                ret = -host_to_target_errno(errno);
+           }
+        }
+        break;
+#endif
+#if defined(TARGET_NR_pwritev)
+    case TARGET_NR_pwritev:
+        {
+            struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+            if (vec != NULL) {
+                ret = get_errno(safe_pwritev(arg1, vec, arg3, arg4, arg5));
+                unlock_iovec(vec, arg2, arg3, 0);
+            } else {
+                ret = -host_to_target_errno(errno);
+           }
+        }
+        break;
+#endif
     case TARGET_NR_getsid:
         ret = get_errno(getsid(arg1));
         break;
@@ -10527,7 +10853,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                     info.si_code = si_code;
                     info._sifields._sigfault._addr
                         = ((CPUArchState *)cpu_env)->pc;
-                    queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+                    queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                                 QEMU_SI_FAULT, &info);
                 }
             }
             break;
@@ -11243,22 +11570,28 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #if defined(TARGET_NR_mq_open) && defined(__NR_mq_open)
     case TARGET_NR_mq_open:
         {
-            struct mq_attr posix_mq_attr, *attrp;
+            struct mq_attr posix_mq_attr;
+            int host_flags;
 
+            host_flags = target_to_host_bitmask(arg2, fcntl_flags_tbl);
+            if (copy_from_user_mq_attr(&posix_mq_attr, arg4) != 0) {
+                goto efault;
+            }
             p = lock_user_string(arg1 - 1);
-            if (arg4 != 0) {
-                copy_from_user_mq_attr (&posix_mq_attr, arg4);
-                attrp = &posix_mq_attr;
-            } else {
-                attrp = 0;
+            if (!p) {
+                goto efault;
             }
-            ret = get_errno(mq_open(p, arg2, arg3, attrp));
+            ret = get_errno(mq_open(p, host_flags, arg3, &posix_mq_attr));
             unlock_user (p, arg1, 0);
         }
         break;
 
     case TARGET_NR_mq_unlink:
         p = lock_user_string(arg1 - 1);
+        if (!p) {
+            ret = -TARGET_EFAULT;
+            break;
+        }
         ret = get_errno(mq_unlink(p));
         unlock_user (p, arg1, 0);
         break;
@@ -11494,13 +11827,23 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         int maxevents = arg3;
         int timeout = arg4;
 
+        if (maxevents <= 0 || maxevents > TARGET_EP_MAX_EVENTS) {
+            ret = -TARGET_EINVAL;
+            break;
+        }
+
         target_ep = lock_user(VERIFY_WRITE, arg2,
                               maxevents * sizeof(struct target_epoll_event), 1);
         if (!target_ep) {
             goto efault;
         }
 
-        ep = alloca(maxevents * sizeof(struct epoll_event));
+        ep = g_try_new(struct epoll_event, maxevents);
+        if (!ep) {
+            unlock_user(target_ep, arg2, 0);
+            ret = -TARGET_ENOMEM;
+            break;
+        }
 
         switch (num) {
 #if defined(TARGET_NR_epoll_pwait)
@@ -11518,8 +11861,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 target_set = lock_user(VERIFY_READ, arg5,
                                        sizeof(target_sigset_t), 1);
                 if (!target_set) {
-                    unlock_user(target_ep, arg2, 0);
-                    goto efault;
+                    ret = -TARGET_EFAULT;
+                    break;
                 }
                 target_to_host_sigset(set, target_set);
                 unlock_user(target_set, arg5, 0);
@@ -11547,8 +11890,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
                 target_ep[i].events = tswap32(ep[i].events);
                 target_ep[i].data.u64 = tswap64(ep[i].data.u64);
             }
+            unlock_user(target_ep, arg2,
+                        ret * sizeof(struct target_epoll_event));
+        } else {
+            unlock_user(target_ep, arg2, 0);
         }
-        unlock_user(target_ep, arg2, ret * sizeof(struct target_epoll_event));
+        g_free(ep);
         break;
     }
 #endif
@@ -11606,7 +11953,8 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
             info.si_errno = 0;
             info.si_code = TARGET_SEGV_MAPERR;
             info._sifields._sigfault._addr = arg6;
-            queue_signal((CPUArchState *)cpu_env, info.si_signo, &info);
+            queue_signal((CPUArchState *)cpu_env, info.si_signo,
+                         QEMU_SI_FAULT, &info);
             ret = 0xdeadbeef;
 
         }
@@ -11808,6 +12156,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
         ret = get_errno(unshare(arg1));
         break;
 #endif
+#if defined(TARGET_NR_kcmp) && defined(__NR_kcmp)
+    case TARGET_NR_kcmp:
+        ret = get_errno(kcmp(arg1, arg2, arg3, arg4, arg5));
+        break;
+#endif
 
     default:
     unimplemented:
diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
index 783565463f..0b15466743 100644
--- a/linux-user/syscall_defs.h
+++ b/linux-user/syscall_defs.h
@@ -9,26 +9,28 @@
 
 #include "syscall_nr.h"
 
-#define SOCKOP_socket           1
-#define SOCKOP_bind             2
-#define SOCKOP_connect          3
-#define SOCKOP_listen           4
-#define SOCKOP_accept           5
-#define SOCKOP_getsockname      6
-#define SOCKOP_getpeername      7
-#define SOCKOP_socketpair       8
-#define SOCKOP_send             9
-#define SOCKOP_recv             10
-#define SOCKOP_sendto           11
-#define SOCKOP_recvfrom         12
-#define SOCKOP_shutdown         13
-#define SOCKOP_setsockopt       14
-#define SOCKOP_getsockopt       15
-#define SOCKOP_sendmsg          16
-#define SOCKOP_recvmsg          17
-#define SOCKOP_accept4          18
-#define SOCKOP_recvmmsg         19
-#define SOCKOP_sendmmsg         20
+
+/* socket operations for socketcall() */
+#define TARGET_SYS_SOCKET       1         /* socket()              */
+#define TARGET_SYS_BIND         2         /* bind()                */
+#define TARGET_SYS_CONNECT      3         /* connect()             */
+#define TARGET_SYS_LISTEN       4         /* listen()              */
+#define TARGET_SYS_ACCEPT       5         /* accept()              */
+#define TARGET_SYS_GETSOCKNAME  6         /* getsockname()         */
+#define TARGET_SYS_GETPEERNAME  7         /* getpeername()         */
+#define TARGET_SYS_SOCKETPAIR   8         /* socketpair()          */
+#define TARGET_SYS_SEND         9         /* send()                */
+#define TARGET_SYS_RECV         10        /* recv()                */
+#define TARGET_SYS_SENDTO       11        /* sendto()              */
+#define TARGET_SYS_RECVFROM     12        /* recvfrom()            */
+#define TARGET_SYS_SHUTDOWN     13        /* shutdown()            */
+#define TARGET_SYS_SETSOCKOPT   14        /* setsockopt()          */
+#define TARGET_SYS_GETSOCKOPT   15        /* getsockopt()          */
+#define TARGET_SYS_SENDMSG      16        /* sendmsg()             */
+#define TARGET_SYS_RECVMSG      17        /* recvmsg()             */
+#define TARGET_SYS_ACCEPT4      18        /* accept4()             */
+#define TARGET_SYS_RECVMMSG     19        /* recvmmsg()            */
+#define TARGET_SYS_SENDMMSG     20        /* sendmmsg()            */
 
 #define IPCOP_semop		1
 #define IPCOP_semget		2
@@ -207,6 +209,34 @@ struct target_itimerspec {
     struct target_timespec it_value;
 };
 
+struct target_timex {
+    abi_uint modes;              /* Mode selector */
+    abi_long offset;             /* Time offset */
+    abi_long freq;               /* Frequency offset */
+    abi_long maxerror;           /* Maximum error (microseconds) */
+    abi_long esterror;           /* Estimated error (microseconds) */
+    abi_int status;              /* Clock command/status */
+    abi_long constant;           /* PLL (phase-locked loop) time constant */
+    abi_long precision;          /* Clock precision (microseconds, ro) */
+    abi_long tolerance;          /* Clock freq. tolerance (ppm, ro) */
+    struct target_timeval time;  /* Current time */
+    abi_long tick;               /* Microseconds between clock ticks */
+    abi_long ppsfreq;            /* PPS (pulse per second) frequency */
+    abi_long jitter;             /* PPS jitter (ro); nanoseconds */
+    abi_int shift;               /* PPS interval duration (seconds) */
+    abi_long stabil;             /* PPS stability */
+    abi_long jitcnt;             /* PPS jitter limit exceeded (ro) */
+    abi_long calcnt;             /* PPS calibration intervals */
+    abi_long errcnt;             /* PPS calibration errors */
+    abi_long stbcnt;             /* PPS stability limit exceeded */
+    abi_int tai;                 /* TAI offset */
+
+    /* Further padding bytes to allow for future expansion */
+    abi_int:32; abi_int:32; abi_int:32; abi_int:32;
+    abi_int:32; abi_int:32; abi_int:32; abi_int:32;
+    abi_int:32; abi_int:32; abi_int:32;
+};
+
 typedef abi_long target_clock_t;
 
 #define TARGET_HZ 100
@@ -898,7 +928,11 @@ struct target_pollfd {
 #define TARGET_KDSETLED        0x4B32	/* set led state [lights, not flags] */
 #define TARGET_KDSIGACCEPT     0x4B4E
 
+#if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SH4)
+#define TARGET_SIOCATMARK      TARGET_IOR('s', 7, int)
+#else
 #define TARGET_SIOCATMARK      0x8905
+#endif
 
 /* Networking ioctls */
 #define TARGET_SIOCADDRT       0x890B          /* add routing table entry */
@@ -998,6 +1032,12 @@ struct target_pollfd {
 
 #define TARGET_FIBMAP     TARGET_IO(0x00,1)  /* bmap access */
 #define TARGET_FIGETBSZ   TARGET_IO(0x00,2)  /* get the block size used for bmap */
+/* Note that the ioctl numbers claim type "long" but the actual type
+ * used by the kernel is "int".
+ */
+#define TARGET_FS_IOC_GETFLAGS TARGET_IOR('f', 1, long)
+#define TARGET_FS_IOC_SETFLAGS TARGET_IOW('f', 2, long)
+
 #define TARGET_FS_IOC_FIEMAP TARGET_IOWR('f',11,struct fiemap)
 
 /* cdrom commands */
@@ -2154,7 +2194,7 @@ struct target_statfs64 {
 #define TARGET_F_SETLK         6
 #define TARGET_F_SETLKW        7
 #define TARGET_F_SETOWN        24       /*  for sockets. */
-#define TARGET_F_GETOWN        25       /*  for sockets. */
+#define TARGET_F_GETOWN        23       /*  for sockets. */
 #else
 #define TARGET_F_GETLK         5
 #define TARGET_F_SETLK         6
@@ -2323,14 +2363,20 @@ struct target_flock {
     short l_whence;
     abi_long l_start;
     abi_long l_len;
+#if defined(TARGET_MIPS)
+    abi_long l_sysid;
+#endif
     int l_pid;
+#if defined(TARGET_MIPS)
+    abi_long pad[4];
+#endif
 };
 
 struct target_flock64 {
     short  l_type;
     short  l_whence;
-#if defined(TARGET_PPC) || defined(TARGET_X86_64) || defined(TARGET_MIPS) \
-    || defined(TARGET_SPARC) || defined(TARGET_HPPA) \
+#if defined(TARGET_PPC) || defined(TARGET_X86_64) \
+    || defined(TARGET_MIPS) || defined(TARGET_SPARC) \
     || defined(TARGET_MICROBLAZE) || defined(TARGET_TILEGX)
     int __pad;
 #endif
@@ -2579,6 +2625,9 @@ struct target_epoll_event {
     abi_uint events;
     target_epoll_data_t data;
 } TARGET_EPOLL_PACKED;
+
+#define TARGET_EP_MAX_EVENTS (INT_MAX / sizeof(struct target_epoll_event))
+
 #endif
 struct target_rlimit64 {
     uint64_t rlim_cur;
@@ -2609,15 +2658,19 @@ typedef int32_t target_timer_t;
 
 struct target_sigevent {
     target_sigval_t sigev_value;
-    int32_t sigev_signo;
-    int32_t sigev_notify;
+    abi_int sigev_signo;
+    abi_int sigev_notify;
     union {
-        int32_t _pad[TARGET_SIGEV_PAD_SIZE];
-        int32_t _tid;
+        abi_int _pad[TARGET_SIGEV_PAD_SIZE];
+        abi_int _tid;
 
+        /* The kernel (and thus QEMU) never looks at these;
+         * they're only used as part of the ABI between a
+         * userspace program and libc.
+         */
         struct {
-            void (*_function)(sigval_t);
-            void *_attribute;
+            abi_ulong _function;
+            abi_ulong _attribute;
         } _sigev_thread;
     } _sigev_un;
 };
@@ -2633,4 +2686,29 @@ struct target_user_cap_data {
     uint32_t inheritable;
 };
 
+/* from kernel's include/linux/syslog.h */
+
+/* Close the log.  Currently a NOP. */
+#define TARGET_SYSLOG_ACTION_CLOSE          0
+/* Open the log. Currently a NOP. */
+#define TARGET_SYSLOG_ACTION_OPEN           1
+/* Read from the log. */
+#define TARGET_SYSLOG_ACTION_READ           2
+/* Read all messages remaining in the ring buffer. */
+#define TARGET_SYSLOG_ACTION_READ_ALL       3
+/* Read and clear all messages remaining in the ring buffer */
+#define TARGET_SYSLOG_ACTION_READ_CLEAR     4
+/* Clear ring buffer. */
+#define TARGET_SYSLOG_ACTION_CLEAR          5
+/* Disable printk's to console */
+#define TARGET_SYSLOG_ACTION_CONSOLE_OFF    6
+/* Enable printk's to console */
+#define TARGET_SYSLOG_ACTION_CONSOLE_ON     7
+/* Set level of messages printed to console */
+#define TARGET_SYSLOG_ACTION_CONSOLE_LEVEL  8
+/* Return number of unread characters in the log buffer */
+#define TARGET_SYSLOG_ACTION_SIZE_UNREAD    9
+/* Return size of the log buffer */
+#define TARGET_SYSLOG_ACTION_SIZE_BUFFER   10
+
 #endif
diff --git a/linux-user/tilegx/syscall_nr.h b/linux-user/tilegx/syscall_nr.h
index 8e30cd1ae9..c104b94230 100644
--- a/linux-user/tilegx/syscall_nr.h
+++ b/linux-user/tilegx/syscall_nr.h
@@ -311,7 +311,6 @@
 #define TARGET_NR_creat                         1064
 #define TARGET_NR_getdents                      1065
 #define TARGET_NR_futimesat                     1066
-#define TARGET_NR_select                        1067
 #define TARGET_NR_poll                          1068
 #define TARGET_NR_epoll_wait                    1069
 #define TARGET_NR_ustat                         1070
diff --git a/main-loop.c b/main-loop.c
index 6a7f8d30bd..ad10bca211 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -161,9 +161,11 @@ int qemu_init_main_loop(Error **errp)
     qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
     gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     src = aio_get_g_source(qemu_aio_context);
+    g_source_set_name(src, "aio-context");
     g_source_attach(src, NULL);
     g_source_unref(src);
     src = iohandler_get_g_source();
+    g_source_set_name(src, "io-handler");
     g_source_attach(src, NULL);
     g_source_unref(src);
     return 0;
@@ -232,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout)
     if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
         static bool notified;
 
-        if (!notified && !qtest_driver()) {
+        if (!notified && !qtest_enabled() && !qtest_driver()) {
             fprintf(stderr,
                     "main-loop: WARNING: I/O thread spun for %d iterations\n",
                     MAX_MAIN_LOOP_SPIN);
diff --git a/memory.c b/memory.c
index 74a50e9b03..1792957cda 100644
--- a/memory.c
+++ b/memory.c
@@ -101,13 +101,6 @@ static AddrRange addrrange_intersection(AddrRange r1, AddrRange r2)
 
 enum ListenerDirection { Forward, Reverse };
 
-static bool memory_listener_match(MemoryListener *listener,
-                                  MemoryRegionSection *section)
-{
-    return !listener->address_space_filter
-        || listener->address_space_filter == section->address_space;
-}
-
 #define MEMORY_LISTENER_CALL_GLOBAL(_callback, _direction, _args...)    \
     do {                                                                \
         MemoryListener *_listener;                                      \
@@ -133,24 +126,23 @@ static bool memory_listener_match(MemoryListener *listener,
         }                                                               \
     } while (0)
 
-#define MEMORY_LISTENER_CALL(_callback, _direction, _section, _args...) \
+#define MEMORY_LISTENER_CALL(_as, _callback, _direction, _section, _args...) \
     do {                                                                \
         MemoryListener *_listener;                                      \
+        struct memory_listeners_as *list = &(_as)->listeners;           \
                                                                         \
         switch (_direction) {                                           \
         case Forward:                                                   \
-            QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
-                if (_listener->_callback                                \
-                    && memory_listener_match(_listener, _section)) {    \
+            QTAILQ_FOREACH(_listener, list, link_as) {                  \
+                if (_listener->_callback) {                             \
                     _listener->_callback(_listener, _section, ##_args); \
                 }                                                       \
             }                                                           \
             break;                                                      \
         case Reverse:                                                   \
-            QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
-                                   memory_listeners, link) {            \
-                if (_listener->_callback                                \
-                    && memory_listener_match(_listener, _section)) {    \
+            QTAILQ_FOREACH_REVERSE(_listener, list, memory_listeners_as, \
+                                   link_as) {                           \
+                if (_listener->_callback) {                             \
                     _listener->_callback(_listener, _section, ##_args); \
                 }                                                       \
             }                                                           \
@@ -162,14 +154,10 @@ static bool memory_listener_match(MemoryListener *listener,
 
 /* No need to ref/unref .mr, the FlatRange keeps it alive.  */
 #define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...)  \
-    MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
-        .mr = (fr)->mr,                                                 \
-        .address_space = (as),                                          \
-        .offset_within_region = (fr)->offset_in_region,                 \
-        .size = (fr)->addr.size,                                        \
-        .offset_within_address_space = int128_get64((fr)->addr.start),  \
-        .readonly = (fr)->readonly,                                     \
-              }), ##_args)
+    do {                                                                \
+        MemoryRegionSection mrs = section_from_flat_range(fr, as);      \
+        MEMORY_LISTENER_CALL(as, callback, dir, &mrs, ##_args);         \
+    } while(0)
 
 struct CoalescedMemoryRange {
     AddrRange addr;
@@ -249,6 +237,19 @@ typedef struct AddressSpaceOps AddressSpaceOps;
 #define FOR_EACH_FLAT_RANGE(var, view)          \
     for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
 
+static inline MemoryRegionSection
+section_from_flat_range(FlatRange *fr, AddressSpace *as)
+{
+    return (MemoryRegionSection) {
+        .mr = fr->mr,
+        .address_space = as,
+        .offset_within_region = fr->offset_in_region,
+        .size = fr->addr.size,
+        .offset_within_address_space = int128_get64(fr->addr.start),
+        .readonly = fr->readonly,
+    };
+}
+
 static bool flatrange_equal(FlatRange *a, FlatRange *b)
 {
     return a->mr == b->mr
@@ -744,7 +745,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
                 .size = fd->addr.size,
             };
-            MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
+            MEMORY_LISTENER_CALL(as, eventfd_del, Forward, &section,
                                  fd->match_data, fd->data, fd->e);
             ++iold;
         } else if (inew < fds_new_nb
@@ -757,7 +758,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
                 .size = fd->addr.size,
             };
-            MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
+            MEMORY_LISTENER_CALL(as, eventfd_add, Reverse, &section,
                                  fd->match_data, fd->data, fd->e);
             ++inew;
         } else {
@@ -948,11 +949,6 @@ static void memory_region_destructor_ram(MemoryRegion *mr)
     qemu_ram_free(mr->ram_block);
 }
 
-static void memory_region_destructor_rom_device(MemoryRegion *mr)
-{
-    qemu_ram_free(mr->ram_block);
-}
-
 static bool memory_region_need_escape(char c)
 {
     return c == '/' || c == '[' || c == '\\' || c == ']';
@@ -1143,6 +1139,71 @@ const MemoryRegionOps unassigned_mem_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static uint64_t memory_region_ram_device_read(void *opaque,
+                                              hwaddr addr, unsigned size)
+{
+    MemoryRegion *mr = opaque;
+    uint64_t data = (uint64_t)~0;
+
+    switch (size) {
+    case 1:
+        data = *(uint8_t *)(mr->ram_block->host + addr);
+        break;
+    case 2:
+        data = *(uint16_t *)(mr->ram_block->host + addr);
+        break;
+    case 4:
+        data = *(uint32_t *)(mr->ram_block->host + addr);
+        break;
+    case 8:
+        data = *(uint64_t *)(mr->ram_block->host + addr);
+        break;
+    }
+
+    trace_memory_region_ram_device_read(get_cpu_index(), mr, addr, data, size);
+
+    return data;
+}
+
+static void memory_region_ram_device_write(void *opaque, hwaddr addr,
+                                           uint64_t data, unsigned size)
+{
+    MemoryRegion *mr = opaque;
+
+    trace_memory_region_ram_device_write(get_cpu_index(), mr, addr, data, size);
+
+    switch (size) {
+    case 1:
+        *(uint8_t *)(mr->ram_block->host + addr) = (uint8_t)data;
+        break;
+    case 2:
+        *(uint16_t *)(mr->ram_block->host + addr) = (uint16_t)data;
+        break;
+    case 4:
+        *(uint32_t *)(mr->ram_block->host + addr) = (uint32_t)data;
+        break;
+    case 8:
+        *(uint64_t *)(mr->ram_block->host + addr) = data;
+        break;
+    }
+}
+
+static const MemoryRegionOps ram_device_mem_ops = {
+    .read = memory_region_ram_device_read,
+    .write = memory_region_ram_device_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = true,
+    },
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+        .unaligned = true,
+    },
+};
+
 bool memory_region_access_valid(MemoryRegion *mr,
                                 hwaddr addr,
                                 unsigned size,
@@ -1370,9 +1431,16 @@ void memory_region_init_ram_ptr(MemoryRegion *mr,
     mr->ram_block = qemu_ram_alloc_from_ptr(size, ptr, mr, &error_fatal);
 }
 
-void memory_region_set_skip_dump(MemoryRegion *mr)
+void memory_region_init_ram_device_ptr(MemoryRegion *mr,
+                                       Object *owner,
+                                       const char *name,
+                                       uint64_t size,
+                                       void *ptr)
 {
-    mr->skip_dump = true;
+    memory_region_init_ram_ptr(mr, owner, name, size, ptr);
+    mr->ram_device = true;
+    mr->ops = &ram_device_mem_ops;
+    mr->opaque = mr;
 }
 
 void memory_region_init_alias(MemoryRegion *mr,
@@ -1416,7 +1484,7 @@ void memory_region_init_rom_device(MemoryRegion *mr,
     mr->opaque = opaque;
     mr->terminates = true;
     mr->rom_device = true;
-    mr->destructor = memory_region_destructor_rom_device;
+    mr->destructor = memory_region_destructor_ram;
     mr->ram_block = qemu_ram_alloc(size, mr, errp);
 }
 
@@ -1429,7 +1497,8 @@ void memory_region_init_iommu(MemoryRegion *mr,
     memory_region_init(mr, owner, name, size);
     mr->iommu_ops = ops,
     mr->terminates = true;  /* then re-forwards */
-    notifier_list_init(&mr->iommu_notify);
+    QLIST_INIT(&mr->iommu_notify);
+    mr->iommu_notify_flags = IOMMU_NOTIFIER_NONE;
 }
 
 static void memory_region_finalize(Object *obj)
@@ -1505,15 +1574,15 @@ const char *memory_region_name(const MemoryRegion *mr)
     return mr->name;
 }
 
-bool memory_region_is_skip_dump(MemoryRegion *mr)
+bool memory_region_is_ram_device(MemoryRegion *mr)
 {
-    return mr->skip_dump;
+    return mr->ram_device;
 }
 
 uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
 {
     uint8_t mask = mr->dirty_log_mask;
-    if (global_dirty_log) {
+    if (global_dirty_log && mr->ram_block) {
         mask |= (1 << DIRTY_MEMORY_MIGRATION);
     }
     return mask;
@@ -1524,13 +1593,31 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client)
     return memory_region_get_dirty_log_mask(mr) & (1 << client);
 }
 
-void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
+static void memory_region_update_iommu_notify_flags(MemoryRegion *mr)
 {
-    if (mr->iommu_ops->notify_started &&
-        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
-        mr->iommu_ops->notify_started(mr);
+    IOMMUNotifierFlag flags = IOMMU_NOTIFIER_NONE;
+    IOMMUNotifier *iommu_notifier;
+
+    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+        flags |= iommu_notifier->notifier_flags;
     }
-    notifier_list_add(&mr->iommu_notify, n);
+
+    if (flags != mr->iommu_notify_flags &&
+        mr->iommu_ops->notify_flag_changed) {
+        mr->iommu_ops->notify_flag_changed(mr, mr->iommu_notify_flags,
+                                           flags);
+    }
+
+    mr->iommu_notify_flags = flags;
+}
+
+void memory_region_register_iommu_notifier(MemoryRegion *mr,
+                                           IOMMUNotifier *n)
+{
+    /* We need to register for at least one bitfield */
+    assert(n->notifier_flags != IOMMU_NOTIFIER_NONE);
+    QLIST_INSERT_HEAD(&mr->iommu_notify, n, node);
+    memory_region_update_iommu_notify_flags(mr);
 }
 
 uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
@@ -1542,7 +1629,8 @@ uint64_t memory_region_iommu_get_min_page_size(MemoryRegion *mr)
     return TARGET_PAGE_SIZE;
 }
 
-void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
+void memory_region_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n,
+                                bool is_write)
 {
     hwaddr addr, granularity;
     IOMMUTLBEntry iotlb;
@@ -1563,20 +1651,32 @@ void memory_region_iommu_replay(MemoryRegion *mr, Notifier *n, bool is_write)
     }
 }
 
-void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
+void memory_region_unregister_iommu_notifier(MemoryRegion *mr,
+                                             IOMMUNotifier *n)
 {
-    notifier_remove(n);
-    if (mr->iommu_ops->notify_stopped &&
-        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
-        mr->iommu_ops->notify_stopped(mr);
-    }
+    QLIST_REMOVE(n, node);
+    memory_region_update_iommu_notify_flags(mr);
 }
 
 void memory_region_notify_iommu(MemoryRegion *mr,
                                 IOMMUTLBEntry entry)
 {
+    IOMMUNotifier *iommu_notifier;
+    IOMMUNotifierFlag request_flags;
+
     assert(memory_region_is_iommu(mr));
-    notifier_list_notify(&mr->iommu_notify, &entry);
+
+    if (entry.perm & IOMMU_RW) {
+        request_flags = IOMMU_NOTIFIER_MAP;
+    } else {
+        request_flags = IOMMU_NOTIFIER_UNMAP;
+    }
+
+    QLIST_FOREACH(iommu_notifier, &mr->iommu_notify, node) {
+        if (iommu_notifier->notifier_flags & request_flags) {
+            iommu_notifier->notify(iommu_notifier, &entry);
+        }
+    }
 }
 
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
@@ -1625,14 +1725,26 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr,
 
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
+    MemoryListener *listener;
     AddressSpace *as;
+    FlatView *view;
     FlatRange *fr;
 
-    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        FlatView *view = address_space_get_flatview(as);
+    /* If the same address space has multiple log_sync listeners, we
+     * visit that address space's FlatView multiple times.  But because
+     * log_sync listeners are rare, it's still cheaper than walking each
+     * address space once.
+     */
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (!listener->log_sync) {
+            continue;
+        }
+        as = listener->address_space;
+        view = address_space_get_flatview(as);
         FOR_EACH_FLAT_RANGE(fr, view) {
             if (fr->mr == mr) {
-                MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
+                MemoryRegionSection mrs = section_from_flat_range(fr, as);
+                listener->log_sync(listener, &mrs);
             }
         }
         flatview_unref(view);
@@ -1749,7 +1861,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
                 .size = fr->addr.size,
             };
 
-            MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
+            MEMORY_LISTENER_CALL(as, coalesced_mmio_del, Reverse, &section,
                                  int128_get64(fr->addr.start),
                                  int128_get64(fr->addr.size));
             QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
@@ -1760,7 +1872,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
                     continue;
                 }
                 tmp = addrrange_intersection(tmp, fr->addr);
-                MEMORY_LISTENER_CALL(coalesced_mmio_add, Forward, &section,
+                MEMORY_LISTENER_CALL(as, coalesced_mmio_add, Forward, &section,
                                      int128_get64(tmp.start),
                                      int128_get64(tmp.size));
             }
@@ -2140,16 +2252,27 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr)
     return mr && mr != container;
 }
 
-void address_space_sync_dirty_bitmap(AddressSpace *as)
+void memory_global_dirty_log_sync(void)
 {
+    MemoryListener *listener;
+    AddressSpace *as;
     FlatView *view;
     FlatRange *fr;
 
-    view = address_space_get_flatview(as);
-    FOR_EACH_FLAT_RANGE(fr, view) {
-        MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
+    QTAILQ_FOREACH(listener, &memory_listeners, link) {
+        if (!listener->log_sync) {
+            continue;
+        }
+        as = listener->address_space;
+        view = address_space_get_flatview(as);
+        FOR_EACH_FLAT_RANGE(fr, view) {
+            if (fr->dirty_log_mask) {
+                MemoryRegionSection mrs = section_from_flat_range(fr, as);
+                listener->log_sync(listener, &mrs);
+            }
+        }
+        flatview_unref(view);
     }
-    flatview_unref(view);
 }
 
 void memory_global_dirty_log_start(void)
@@ -2182,11 +2305,6 @@ static void listener_add_address_space(MemoryListener *listener,
     FlatView *view;
     FlatRange *fr;
 
-    if (listener->address_space_filter
-        && listener->address_space_filter != as) {
-        return;
-    }
-
     if (listener->begin) {
         listener->begin(listener);
     }
@@ -2219,12 +2337,11 @@ static void listener_add_address_space(MemoryListener *listener,
     flatview_unref(view);
 }
 
-void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
+void memory_listener_register(MemoryListener *listener, AddressSpace *as)
 {
     MemoryListener *other = NULL;
-    AddressSpace *as;
 
-    listener->address_space_filter = filter;
+    listener->address_space = as;
     if (QTAILQ_EMPTY(&memory_listeners)
         || listener->priority >= QTAILQ_LAST(&memory_listeners,
                                              memory_listeners)->priority) {
@@ -2238,14 +2355,26 @@ void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
         QTAILQ_INSERT_BEFORE(other, listener, link);
     }
 
-    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        listener_add_address_space(listener, as);
+    if (QTAILQ_EMPTY(&as->listeners)
+        || listener->priority >= QTAILQ_LAST(&as->listeners,
+                                             memory_listeners)->priority) {
+        QTAILQ_INSERT_TAIL(&as->listeners, listener, link_as);
+    } else {
+        QTAILQ_FOREACH(other, &as->listeners, link_as) {
+            if (listener->priority < other->priority) {
+                break;
+            }
+        }
+        QTAILQ_INSERT_BEFORE(other, listener, link_as);
     }
+
+    listener_add_address_space(listener, as);
 }
 
 void memory_listener_unregister(MemoryListener *listener)
 {
     QTAILQ_REMOVE(&memory_listeners, listener, link);
+    QTAILQ_REMOVE(&listener->address_space->listeners, listener, link_as);
 }
 
 void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
@@ -2259,6 +2388,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
     flatview_init(as->current_map);
     as->ioeventfd_nb = 0;
     as->ioeventfds = NULL;
+    QTAILQ_INIT(&as->listeners);
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
     as->name = g_strdup(name ? name : "anonymous");
     address_space_init_dispatch(as);
@@ -2268,14 +2398,10 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 
 static void do_address_space_destroy(AddressSpace *as)
 {
-    MemoryListener *listener;
     bool do_free = as->malloced;
 
     address_space_destroy_dispatch(as);
-
-    QTAILQ_FOREACH(listener, &memory_listeners, link) {
-        assert(listener->address_space_filter != as);
-    }
+    assert(QTAILQ_EMPTY(&as->listeners));
 
     flatview_unref(as->current_map);
     g_free(as->name);
diff --git a/memory_mapping.c b/memory_mapping.c
index e3e0d95172..6a39d71da2 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -206,7 +206,7 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
 
     /* we only care about RAM */
     if (!memory_region_is_ram(section->mr) ||
-        memory_region_is_skip_dump(section->mr)) {
+        memory_region_is_ram_device(section->mr)) {
         return;
     }
 
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 30ad945918..3f3e237142 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,5 +1,7 @@
 common-obj-y += migration.o socket.o fd.o exec.o
 common-obj-y += tls.o
+common-obj-y += colo-comm.o
+common-obj-$(CONFIG_COLO) += colo.o colo-failover.o
 common-obj-y += vmstate.o
 common-obj-y += qemu-file.o
 common-obj-y += qemu-file-channel.o
diff --git a/migration/colo-comm.c b/migration/colo-comm.c
new file mode 100644
index 0000000000..20b60ec384
--- /dev/null
+++ b/migration/colo-comm.c
@@ -0,0 +1,72 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <migration/colo.h>
+#include "trace.h"
+
+typedef struct {
+     bool colo_requested;
+} COLOInfo;
+
+static COLOInfo colo_info;
+
+COLOMode get_colo_mode(void)
+{
+    if (migration_in_colo_state()) {
+        return COLO_MODE_PRIMARY;
+    } else if (migration_incoming_in_colo_state()) {
+        return COLO_MODE_SECONDARY;
+    } else {
+        return COLO_MODE_UNKNOWN;
+    }
+}
+
+static void colo_info_pre_save(void *opaque)
+{
+    COLOInfo *s = opaque;
+
+    s->colo_requested = migrate_colo_enabled();
+}
+
+static bool colo_info_need(void *opaque)
+{
+   return migrate_colo_enabled();
+}
+
+static const VMStateDescription colo_state = {
+    .name = "COLOState",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = colo_info_pre_save,
+    .needed = colo_info_need,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(colo_requested, COLOInfo),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+void colo_info_init(void)
+{
+    vmstate_register(NULL, 0, &colo_state, &colo_info);
+}
+
+bool migration_incoming_enable_colo(void)
+{
+    return colo_info.colo_requested;
+}
+
+void migration_incoming_exit_colo(void)
+{
+    colo_info.colo_requested = false;
+}
diff --git a/migration/colo-failover.c b/migration/colo-failover.c
new file mode 100644
index 0000000000..cc229f5ab1
--- /dev/null
+++ b/migration/colo-failover.c
@@ -0,0 +1,83 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "migration/colo.h"
+#include "migration/failover.h"
+#include "qmp-commands.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+
+static QEMUBH *failover_bh;
+static FailoverStatus failover_state;
+
+static void colo_failover_bh(void *opaque)
+{
+    int old_state;
+
+    qemu_bh_delete(failover_bh);
+    failover_bh = NULL;
+
+    old_state = failover_set_state(FAILOVER_STATUS_REQUIRE,
+                                   FAILOVER_STATUS_ACTIVE);
+    if (old_state != FAILOVER_STATUS_REQUIRE) {
+        error_report("Unknown error for failover, old_state = %s",
+                    FailoverStatus_lookup[old_state]);
+        return;
+    }
+
+    colo_do_failover(NULL);
+}
+
+void failover_request_active(Error **errp)
+{
+   if (failover_set_state(FAILOVER_STATUS_NONE,
+        FAILOVER_STATUS_REQUIRE) != FAILOVER_STATUS_NONE) {
+        error_setg(errp, "COLO failover is already actived");
+        return;
+    }
+    failover_bh = qemu_bh_new(colo_failover_bh, NULL);
+    qemu_bh_schedule(failover_bh);
+}
+
+void failover_init_state(void)
+{
+    failover_state = FAILOVER_STATUS_NONE;
+}
+
+FailoverStatus failover_set_state(FailoverStatus old_state,
+                    FailoverStatus new_state)
+{
+    FailoverStatus old;
+
+    old = atomic_cmpxchg(&failover_state, old_state, new_state);
+    if (old == old_state) {
+        trace_colo_failover_set_state(FailoverStatus_lookup[new_state]);
+    }
+    return old;
+}
+
+FailoverStatus failover_get_state(void)
+{
+    return atomic_read(&failover_state);
+}
+
+void qmp_x_colo_lost_heartbeat(Error **errp)
+{
+    if (get_colo_mode() == COLO_MODE_UNKNOWN) {
+        error_setg(errp, QERR_FEATURE_DISABLED, "colo");
+        return;
+    }
+
+    failover_request_active(errp);
+}
diff --git a/migration/colo.c b/migration/colo.c
new file mode 100644
index 0000000000..93c85c538b
--- /dev/null
+++ b/migration/colo.c
@@ -0,0 +1,529 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "migration/colo.h"
+#include "io/channel-buffer.h"
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "migration/failover.h"
+
+#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
+
+bool colo_supported(void)
+{
+    return true;
+}
+
+bool migration_in_colo_state(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return (s->state == MIGRATION_STATUS_COLO);
+}
+
+bool migration_incoming_in_colo_state(void)
+{
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
+    return mis && (mis->state == MIGRATION_STATUS_COLO);
+}
+
+static bool colo_runstate_is_stopped(void)
+{
+    return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
+}
+
+static void secondary_vm_do_failover(void)
+{
+    int old_state;
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
+    migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
+                      MIGRATION_STATUS_COMPLETED);
+
+    if (!autostart) {
+        error_report("\"-S\" qemu option will be ignored in secondary side");
+        /* recover runstate to normal migration finish state */
+        autostart = true;
+    }
+
+    old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
+                                   FAILOVER_STATUS_COMPLETED);
+    if (old_state != FAILOVER_STATUS_ACTIVE) {
+        error_report("Incorrect state (%s) while doing failover for "
+                     "secondary VM", FailoverStatus_lookup[old_state]);
+        return;
+    }
+    /* For Secondary VM, jump to incoming co */
+    if (mis->migration_incoming_co) {
+        qemu_coroutine_enter(mis->migration_incoming_co);
+    }
+}
+
+static void primary_vm_do_failover(void)
+{
+    MigrationState *s = migrate_get_current();
+    int old_state;
+
+    migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
+                      MIGRATION_STATUS_COMPLETED);
+
+    old_state = failover_set_state(FAILOVER_STATUS_ACTIVE,
+                                   FAILOVER_STATUS_COMPLETED);
+    if (old_state != FAILOVER_STATUS_ACTIVE) {
+        error_report("Incorrect state (%s) while doing failover for Primary VM",
+                     FailoverStatus_lookup[old_state]);
+        return;
+    }
+}
+
+void colo_do_failover(MigrationState *s)
+{
+    /* Make sure VM stopped while failover happened. */
+    if (!colo_runstate_is_stopped()) {
+        vm_stop_force_state(RUN_STATE_COLO);
+    }
+
+    if (get_colo_mode() == COLO_MODE_PRIMARY) {
+        primary_vm_do_failover();
+    } else {
+        secondary_vm_do_failover();
+    }
+}
+
+static void colo_send_message(QEMUFile *f, COLOMessage msg,
+                              Error **errp)
+{
+    int ret;
+
+    if (msg >= COLO_MESSAGE__MAX) {
+        error_setg(errp, "%s: Invalid message", __func__);
+        return;
+    }
+    qemu_put_be32(f, msg);
+    qemu_fflush(f);
+
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Can't send COLO message");
+    }
+    trace_colo_send_message(COLOMessage_lookup[msg]);
+}
+
+static void colo_send_message_value(QEMUFile *f, COLOMessage msg,
+                                    uint64_t value, Error **errp)
+{
+    Error *local_err = NULL;
+    int ret;
+
+    colo_send_message(f, msg, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    qemu_put_be64(f, value);
+    qemu_fflush(f);
+
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to send value for message:%s",
+                         COLOMessage_lookup[msg]);
+    }
+}
+
+static COLOMessage colo_receive_message(QEMUFile *f, Error **errp)
+{
+    COLOMessage msg;
+    int ret;
+
+    msg = qemu_get_be32(f);
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Can't receive COLO message");
+        return msg;
+    }
+    if (msg >= COLO_MESSAGE__MAX) {
+        error_setg(errp, "%s: Invalid message", __func__);
+        return msg;
+    }
+    trace_colo_receive_message(COLOMessage_lookup[msg]);
+    return msg;
+}
+
+static void colo_receive_check_message(QEMUFile *f, COLOMessage expect_msg,
+                                       Error **errp)
+{
+    COLOMessage msg;
+    Error *local_err = NULL;
+
+    msg = colo_receive_message(f, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (msg != expect_msg) {
+        error_setg(errp, "Unexpected COLO message %d, expected %d",
+                          msg, expect_msg);
+    }
+}
+
+static uint64_t colo_receive_message_value(QEMUFile *f, uint32_t expect_msg,
+                                           Error **errp)
+{
+    Error *local_err = NULL;
+    uint64_t value;
+    int ret;
+
+    colo_receive_check_message(f, expect_msg, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return 0;
+    }
+
+    value = qemu_get_be64(f);
+    ret = qemu_file_get_error(f);
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Failed to get value for COLO message: %s",
+                         COLOMessage_lookup[expect_msg]);
+    }
+    return value;
+}
+
+static int colo_do_checkpoint_transaction(MigrationState *s,
+                                          QIOChannelBuffer *bioc,
+                                          QEMUFile *fb)
+{
+    Error *local_err = NULL;
+    int ret = -1;
+
+    colo_send_message(s->to_dst_file, COLO_MESSAGE_CHECKPOINT_REQUEST,
+                      &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    colo_receive_check_message(s->rp_state.from_dst_file,
+                    COLO_MESSAGE_CHECKPOINT_REPLY, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    /* Reset channel-buffer directly */
+    qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+    bioc->usage = 0;
+
+    qemu_mutex_lock_iothread();
+    if (failover_get_state() != FAILOVER_STATUS_NONE) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+    vm_stop_force_state(RUN_STATE_COLO);
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("run", "stop");
+    /*
+     * Failover request bh could be called after vm_stop_force_state(),
+     * So we need check failover_request_is_active() again.
+     */
+    if (failover_get_state() != FAILOVER_STATUS_NONE) {
+        goto out;
+    }
+
+    /* Disable block migration */
+    s->params.blk = 0;
+    s->params.shared = 0;
+    qemu_savevm_state_header(fb);
+    qemu_savevm_state_begin(fb, &s->params);
+    qemu_mutex_lock_iothread();
+    qemu_savevm_state_complete_precopy(fb, false);
+    qemu_mutex_unlock_iothread();
+
+    qemu_fflush(fb);
+
+    colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    /*
+     * We need the size of the VMstate data in Secondary side,
+     * With which we can decide how much data should be read.
+     */
+    colo_send_message_value(s->to_dst_file, COLO_MESSAGE_VMSTATE_SIZE,
+                            bioc->usage, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    qemu_put_buffer(s->to_dst_file, bioc->data, bioc->usage);
+    qemu_fflush(s->to_dst_file);
+    ret = qemu_file_get_error(s->to_dst_file);
+    if (ret < 0) {
+        goto out;
+    }
+
+    colo_receive_check_message(s->rp_state.from_dst_file,
+                       COLO_MESSAGE_VMSTATE_RECEIVED, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    colo_receive_check_message(s->rp_state.from_dst_file,
+                       COLO_MESSAGE_VMSTATE_LOADED, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    ret = 0;
+
+    qemu_mutex_lock_iothread();
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("stop", "run");
+
+out:
+    if (local_err) {
+        error_report_err(local_err);
+    }
+    return ret;
+}
+
+static void colo_process_checkpoint(MigrationState *s)
+{
+    QIOChannelBuffer *bioc;
+    QEMUFile *fb = NULL;
+    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    Error *local_err = NULL;
+    int ret;
+
+    failover_init_state();
+
+    s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
+    if (!s->rp_state.from_dst_file) {
+        error_report("Open QEMUFile from_dst_file failed");
+        goto out;
+    }
+
+    /*
+     * Wait for Secondary finish loading VM states and enter COLO
+     * restore.
+     */
+    colo_receive_check_message(s->rp_state.from_dst_file,
+                       COLO_MESSAGE_CHECKPOINT_READY, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
+    fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
+    object_unref(OBJECT(bioc));
+
+    qemu_mutex_lock_iothread();
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("stop", "run");
+
+    while (s->state == MIGRATION_STATUS_COLO) {
+        if (failover_get_state() != FAILOVER_STATUS_NONE) {
+            error_report("failover request");
+            goto out;
+        }
+
+        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+        if (current_time - checkpoint_time <
+            s->parameters.x_checkpoint_delay) {
+            int64_t delay_ms;
+
+            delay_ms = s->parameters.x_checkpoint_delay -
+                       (current_time - checkpoint_time);
+            g_usleep(delay_ms * 1000);
+        }
+        ret = colo_do_checkpoint_transaction(s, bioc, fb);
+        if (ret < 0) {
+            goto out;
+        }
+        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    }
+
+out:
+    /* Throw the unreported error message after exited from loop */
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    if (fb) {
+        qemu_fclose(fb);
+    }
+
+    if (s->rp_state.from_dst_file) {
+        qemu_fclose(s->rp_state.from_dst_file);
+    }
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+    qemu_mutex_unlock_iothread();
+    migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                      MIGRATION_STATUS_COLO);
+    colo_process_checkpoint(s);
+    qemu_mutex_lock_iothread();
+}
+
+static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request,
+                                     Error **errp)
+{
+    COLOMessage msg;
+    Error *local_err = NULL;
+
+    msg = colo_receive_message(f, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    switch (msg) {
+    case COLO_MESSAGE_CHECKPOINT_REQUEST:
+        *checkpoint_request = 1;
+        break;
+    default:
+        *checkpoint_request = 0;
+        error_setg(errp, "Got unknown COLO message: %d", msg);
+        break;
+    }
+}
+
+void *colo_process_incoming_thread(void *opaque)
+{
+    MigrationIncomingState *mis = opaque;
+    QEMUFile *fb = NULL;
+    QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */
+    uint64_t total_size;
+    uint64_t value;
+    Error *local_err = NULL;
+
+    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+                      MIGRATION_STATUS_COLO);
+
+    failover_init_state();
+
+    mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
+    if (!mis->to_src_file) {
+        error_report("COLO incoming thread: Open QEMUFile to_src_file failed");
+        goto out;
+    }
+    /*
+     * Note: the communication between Primary side and Secondary side
+     * should be sequential, we set the fd to unblocked in migration incoming
+     * coroutine, and here we are in the COLO incoming thread, so it is ok to
+     * set the fd back to blocked.
+     */
+    qemu_file_set_blocking(mis->from_src_file, true);
+
+    bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
+    fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
+    object_unref(OBJECT(bioc));
+
+    colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
+                      &local_err);
+    if (local_err) {
+        goto out;
+    }
+
+    while (mis->state == MIGRATION_STATUS_COLO) {
+        int request = 0;
+
+        colo_wait_handle_message(mis->from_src_file, &request, &local_err);
+        if (local_err) {
+            goto out;
+        }
+        assert(request);
+        if (failover_get_state() != FAILOVER_STATUS_NONE) {
+            error_report("failover request");
+            goto out;
+        }
+
+        /* FIXME: This is unnecessary for periodic checkpoint mode */
+        colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
+                     &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        colo_receive_check_message(mis->from_src_file,
+                           COLO_MESSAGE_VMSTATE_SEND, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        value = colo_receive_message_value(mis->from_src_file,
+                                 COLO_MESSAGE_VMSTATE_SIZE, &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        /*
+         * Read VM device state data into channel buffer,
+         * It's better to re-use the memory allocated.
+         * Here we need to handle the channel buffer directly.
+         */
+        if (value > bioc->capacity) {
+            bioc->capacity = value;
+            bioc->data = g_realloc(bioc->data, bioc->capacity);
+        }
+        total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value);
+        if (total_size != value) {
+            error_report("Got %" PRIu64 " VMState data, less than expected"
+                        " %" PRIu64, total_size, value);
+            goto out;
+        }
+        bioc->usage = total_size;
+        qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL);
+
+        colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED,
+                     &local_err);
+        if (local_err) {
+            goto out;
+        }
+
+        qemu_mutex_lock_iothread();
+        qemu_system_reset(VMRESET_SILENT);
+        if (qemu_loadvm_state(fb) < 0) {
+            error_report("COLO: loadvm failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+        qemu_mutex_unlock_iothread();
+
+        colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED,
+                     &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+
+out:
+    /* Throw the unreported error message after exited from loop */
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    if (fb) {
+        qemu_fclose(fb);
+    }
+
+    if (mis->to_src_file) {
+        qemu_fclose(mis->to_src_file);
+    }
+    migration_incoming_exit_colo();
+
+    return NULL;
+}
diff --git a/migration/exec.c b/migration/exec.c
index 2af63cced6..9157721dfe 100644
--- a/migration/exec.c
+++ b/migration/exec.c
@@ -38,6 +38,7 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error
         return;
     }
 
+    qio_channel_set_name(ioc, "migration-exec-outgoing");
     migration_channel_connect(s, ioc, NULL);
     object_unref(OBJECT(ioc));
 }
@@ -64,6 +65,7 @@ void exec_start_incoming_migration(const char *command, Error **errp)
         return;
     }
 
+    qio_channel_set_name(ioc, "migration-exec-incoming");
     qio_channel_add_watch(ioc,
                           G_IO_IN,
                           exec_accept_incoming_migration,
diff --git a/migration/fd.c b/migration/fd.c
index 84a10fd68f..58cb51a9e6 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -38,6 +38,7 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **
         return;
     }
 
+    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-outgoing");
     migration_channel_connect(s, ioc, NULL);
     object_unref(OBJECT(ioc));
 }
@@ -65,6 +66,7 @@ void fd_start_incoming_migration(const char *infd, Error **errp)
         return;
     }
 
+    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-fd-incoming");
     qio_channel_add_watch(ioc,
                           G_IO_IN,
                           fd_accept_incoming_migration,
diff --git a/migration/migration.c b/migration/migration.c
index 955d5ee38c..f498ab84f2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -36,6 +36,7 @@
 #include "exec/address-spaces.h"
 #include "io/channel-buffer.h"
 #include "io/channel-tls.h"
+#include "migration/colo.h"
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
 
@@ -44,6 +45,10 @@
 #define BUFFER_DELAY     100
 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
 
+/* Time in milliseconds we are allowed to stop the source,
+ * for sending the last part */
+#define DEFAULT_MIGRATE_SET_DOWNTIME 300
+
 /* Default compression thread count */
 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
 /* Default decompression thread count, usually decompression is at
@@ -58,6 +63,11 @@
 /* Migration XBZRLE default cache size */
 #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
 
+/* The delay time (in ms) between two COLO checkpoints
+ * Note: Please change this default value to 10000 when we support hybrid mode.
+ */
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
+
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -80,7 +90,6 @@ MigrationState *migrate_get_current(void)
     static bool once;
     static MigrationState current_migration = {
         .state = MIGRATION_STATUS_NONE,
-        .bandwidth_limit = MAX_THROTTLE,
         .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
         .mbps = -1,
         .parameters = {
@@ -89,6 +98,9 @@ MigrationState *migrate_get_current(void)
             .decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
             .cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
             .cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
+            .max_bandwidth = MAX_THROTTLE,
+            .downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
+            .x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
         },
     };
 
@@ -401,6 +413,18 @@ static void process_incoming_migration_co(void *opaque)
         /* Else if something went wrong then just fall out of the normal exit */
     }
 
+    /* we get COLO info, and know if we are in COLO mode */
+    if (!ret && migration_incoming_enable_colo()) {
+        mis->migration_incoming_co = qemu_coroutine_self();
+        qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
+             colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
+        mis->have_colo_incoming_thread = true;
+        qemu_coroutine_yield();
+
+        /* Wait checkpoint incoming thread exit before free resource */
+        qemu_thread_join(&mis->colo_incoming_thread);
+    }
+
     qemu_fclose(f);
     free_xbzrle_decoded_buf();
 
@@ -517,17 +541,6 @@ void migrate_send_rp_pong(MigrationIncomingState *mis,
     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
 }
 
-/* amount of nanoseconds we are willing to wait for migration to be down.
- * the choice of nanoseconds is because it is the maximum resolution that
- * get_clock() can achieve. It is an internal measure. All user-visible
- * units must be in seconds */
-static uint64_t max_downtime = 300000000;
-
-uint64_t migrate_max_downtime(void)
-{
-    return max_downtime;
-}
-
 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
 {
     MigrationCapabilityStatusList *head = NULL;
@@ -537,6 +550,9 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
 
     caps = NULL; /* silence compiler warning */
     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+        if (i == MIGRATION_CAPABILITY_X_COLO && !colo_supported()) {
+            continue;
+        }
         if (head == NULL) {
             head = g_malloc0(sizeof(*caps));
             caps = head;
@@ -559,13 +575,26 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
     MigrationState *s = migrate_get_current();
 
     params = g_malloc0(sizeof(*params));
+    params->has_compress_level = true;
     params->compress_level = s->parameters.compress_level;
+    params->has_compress_threads = true;
     params->compress_threads = s->parameters.compress_threads;
+    params->has_decompress_threads = true;
     params->decompress_threads = s->parameters.decompress_threads;
+    params->has_cpu_throttle_initial = true;
     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
+    params->has_cpu_throttle_increment = true;
     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
+    params->has_tls_creds = !!s->parameters.tls_creds;
     params->tls_creds = g_strdup(s->parameters.tls_creds);
+    params->has_tls_hostname = !!s->parameters.tls_hostname;
     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
+    params->has_max_bandwidth = true;
+    params->max_bandwidth = s->parameters.max_bandwidth;
+    params->has_downtime_limit = true;
+    params->downtime_limit = s->parameters.downtime_limit;
+    params->has_x_checkpoint_delay = true;
+    params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
 
     return params;
 }
@@ -686,6 +715,10 @@ MigrationInfo *qmp_query_migrate(Error **errp)
 
         get_xbzrle_cache_stats(info);
         break;
+    case MIGRATION_STATUS_COLO:
+        info->has_status = true;
+        /* TODO: display COLO specific information (checkpoint info etc.) */
+        break;
     case MIGRATION_STATUS_COMPLETED:
         get_xbzrle_cache_stats(info);
 
@@ -728,6 +761,14 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     }
 
     for (cap = params; cap; cap = cap->next) {
+        if (cap->value->capability == MIGRATION_CAPABILITY_X_COLO) {
+            if (!colo_supported()) {
+                error_setg(errp, "COLO is not currently supported, please"
+                             " configure with --enable-colo option in order to"
+                             " support COLO feature");
+                continue;
+            }
+        }
         s->enabled_capabilities[cap->value->capability] = cap->value->state;
     }
 
@@ -759,78 +800,101 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     }
 }
 
-void qmp_migrate_set_parameters(bool has_compress_level,
-                                int64_t compress_level,
-                                bool has_compress_threads,
-                                int64_t compress_threads,
-                                bool has_decompress_threads,
-                                int64_t decompress_threads,
-                                bool has_cpu_throttle_initial,
-                                int64_t cpu_throttle_initial,
-                                bool has_cpu_throttle_increment,
-                                int64_t cpu_throttle_increment,
-                                bool has_tls_creds,
-                                const char *tls_creds,
-                                bool has_tls_hostname,
-                                const char *tls_hostname,
-                                Error **errp)
+void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp)
 {
     MigrationState *s = migrate_get_current();
 
-    if (has_compress_level && (compress_level < 0 || compress_level > 9)) {
+    if (params->has_compress_level &&
+        (params->compress_level < 0 || params->compress_level > 9)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                    "is invalid, it should be in the range of 0 to 9");
         return;
     }
-    if (has_compress_threads &&
-            (compress_threads < 1 || compress_threads > 255)) {
+    if (params->has_compress_threads &&
+        (params->compress_threads < 1 || params->compress_threads > 255)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "compress_threads",
                    "is invalid, it should be in the range of 1 to 255");
         return;
     }
-    if (has_decompress_threads &&
-            (decompress_threads < 1 || decompress_threads > 255)) {
+    if (params->has_decompress_threads &&
+        (params->decompress_threads < 1 || params->decompress_threads > 255)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "decompress_threads",
                    "is invalid, it should be in the range of 1 to 255");
         return;
     }
-    if (has_cpu_throttle_initial &&
-            (cpu_throttle_initial < 1 || cpu_throttle_initial > 99)) {
+    if (params->has_cpu_throttle_initial &&
+        (params->cpu_throttle_initial < 1 ||
+         params->cpu_throttle_initial > 99)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "cpu_throttle_initial",
                    "an integer in the range of 1 to 99");
+        return;
     }
-    if (has_cpu_throttle_increment &&
-            (cpu_throttle_increment < 1 || cpu_throttle_increment > 99)) {
+    if (params->has_cpu_throttle_increment &&
+        (params->cpu_throttle_increment < 1 ||
+         params->cpu_throttle_increment > 99)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
                    "cpu_throttle_increment",
                    "an integer in the range of 1 to 99");
+        return;
+    }
+    if (params->has_max_bandwidth &&
+        (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) {
+        error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
+                         " range of 0 to %zu bytes/second", SIZE_MAX);
+        return;
+    }
+    if (params->has_downtime_limit &&
+        (params->downtime_limit < 0 || params->downtime_limit > 2000000)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                   "downtime_limit",
+                   "an integer in the range of 0 to 2000000 milliseconds");
+        return;
+    }
+    if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                    "x_checkpoint_delay",
+                    "is invalid, it should be positive");
     }
 
-    if (has_compress_level) {
-        s->parameters.compress_level = compress_level;
+    if (params->has_compress_level) {
+        s->parameters.compress_level = params->compress_level;
     }
-    if (has_compress_threads) {
-        s->parameters.compress_threads = compress_threads;
+    if (params->has_compress_threads) {
+        s->parameters.compress_threads = params->compress_threads;
     }
-    if (has_decompress_threads) {
-        s->parameters.decompress_threads = decompress_threads;
+    if (params->has_decompress_threads) {
+        s->parameters.decompress_threads = params->decompress_threads;
     }
-    if (has_cpu_throttle_initial) {
-        s->parameters.cpu_throttle_initial = cpu_throttle_initial;
+    if (params->has_cpu_throttle_initial) {
+        s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
     }
-    if (has_cpu_throttle_increment) {
-        s->parameters.cpu_throttle_increment = cpu_throttle_increment;
+    if (params->has_cpu_throttle_increment) {
+        s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
     }
-    if (has_tls_creds) {
+    if (params->has_tls_creds) {
         g_free(s->parameters.tls_creds);
-        s->parameters.tls_creds = g_strdup(tls_creds);
+        s->parameters.tls_creds = g_strdup(params->tls_creds);
     }
-    if (has_tls_hostname) {
+    if (params->has_tls_hostname) {
         g_free(s->parameters.tls_hostname);
-        s->parameters.tls_hostname = g_strdup(tls_hostname);
+        s->parameters.tls_hostname = g_strdup(params->tls_hostname);
+    }
+    if (params->has_max_bandwidth) {
+        s->parameters.max_bandwidth = params->max_bandwidth;
+        if (s->to_dst_file) {
+            qemu_file_set_rate_limit(s->to_dst_file,
+                                s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
+        }
+    }
+    if (params->has_downtime_limit) {
+        s->parameters.downtime_limit = params->downtime_limit;
+    }
+
+    if (params->has_x_checkpoint_delay) {
+        s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
     }
 }
 
@@ -903,7 +967,7 @@ static void migrate_fd_cleanup(void *opaque)
 
 void migrate_fd_error(MigrationState *s, const Error *error)
 {
-    trace_migrate_fd_error(error ? error_get_pretty(error) : "");
+    trace_migrate_fd_error(error_get_pretty(error));
     assert(s->to_dst_file == NULL);
     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
                       MIGRATION_STATUS_FAILED);
@@ -1082,7 +1146,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.shared = has_inc && inc;
 
     if (migration_is_setup_or_active(s->state) ||
-        s->state == MIGRATION_STATUS_CANCELLING) {
+        s->state == MIGRATION_STATUS_CANCELLING ||
+        s->state == MIGRATION_STATUS_COLO) {
         error_setg(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -1165,28 +1230,25 @@ int64_t qmp_query_migrate_cache_size(Error **errp)
 
 void qmp_migrate_set_speed(int64_t value, Error **errp)
 {
-    MigrationState *s;
-
-    if (value < 0) {
-        value = 0;
-    }
-    if (value > SIZE_MAX) {
-        value = SIZE_MAX;
-    }
+    MigrationParameters p = {
+        .has_max_bandwidth = true,
+        .max_bandwidth = value,
+    };
 
-    s = migrate_get_current();
-    s->bandwidth_limit = value;
-    if (s->to_dst_file) {
-        qemu_file_set_rate_limit(s->to_dst_file,
-                                 s->bandwidth_limit / XFER_LIMIT_RATIO);
-    }
+    qmp_migrate_set_parameters(&p, errp);
 }
 
 void qmp_migrate_set_downtime(double value, Error **errp)
 {
-    value *= 1e9;
-    value = MAX(0, MIN(UINT64_MAX, value));
-    max_downtime = (uint64_t)value;
+    value *= 1000; /* Convert to milliseconds */
+    value = MAX(0, MIN(INT64_MAX, value));
+
+    MigrationParameters p = {
+        .has_downtime_limit = true,
+        .downtime_limit = value,
+    };
+
+    qmp_migrate_set_parameters(&p, errp);
 }
 
 bool migrate_postcopy_ram(void)
@@ -1551,6 +1613,7 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
      * to do this we use a qemu_buf to hold the whole of the device state.
      */
     bioc = qio_channel_buffer_new(4096);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
     object_unref(OBJECT(bioc));
 
@@ -1632,7 +1695,11 @@ static void migration_completion(MigrationState *s, int current_active_state,
 
         if (!ret) {
             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-            if (ret >= 0) {
+            /*
+             * Don't mark the image with BDRV_O_INACTIVE flag if
+             * we will go into COLO stage later.
+             */
+            if (ret >= 0 && !migrate_colo_enabled()) {
                 ret = bdrv_inactivate_all();
             }
             if (ret >= 0) {
@@ -1674,8 +1741,11 @@ static void migration_completion(MigrationState *s, int current_active_state,
         goto fail_invalidate;
     }
 
-    migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_COMPLETED);
+    if (!migrate_colo_enabled()) {
+        migrate_set_state(&s->state, current_active_state,
+                          MIGRATION_STATUS_COMPLETED);
+    }
+
     return;
 
 fail_invalidate:
@@ -1696,6 +1766,12 @@ static void migration_completion(MigrationState *s, int current_active_state,
                       MIGRATION_STATUS_FAILED);
 }
 
+bool migrate_colo_enabled(void)
+{
+    MigrationState *s = migrate_get_current();
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
+}
+
 /*
  * Master migration thread on the source VM.
  * It drives the migration and pumps the data down the outgoing channel.
@@ -1714,6 +1790,7 @@ static void *migration_thread(void *opaque)
     bool entered_postcopy = false;
     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
+    bool enable_colo = migrate_colo_enabled();
 
     rcu_register_thread();
 
@@ -1793,7 +1870,7 @@ static void *migration_thread(void *opaque)
                                          initial_bytes;
             uint64_t time_spent = current_time - initial_time;
             double bandwidth = (double)transferred_bytes / time_spent;
-            max_size = bandwidth * migrate_max_downtime() / 1000000;
+            max_size = bandwidth * s->parameters.downtime_limit;
 
             s->mbps = (((double) transferred_bytes * 8.0) /
                     ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
@@ -1822,7 +1899,13 @@ static void *migration_thread(void *opaque)
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     qemu_mutex_lock_iothread();
-    qemu_savevm_state_cleanup();
+    /*
+     * The resource has been allocated by migration will be reused in COLO
+     * process, so don't release them.
+     */
+    if (!enable_colo) {
+        qemu_savevm_state_cleanup();
+    }
     if (s->state == MIGRATION_STATUS_COMPLETED) {
         uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
         s->total_time = end_time - s->total_time;
@@ -1835,6 +1918,15 @@ static void *migration_thread(void *opaque)
         }
         runstate_set(RUN_STATE_POSTMIGRATE);
     } else {
+        if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
+            migrate_start_colo_process(s);
+            qemu_savevm_state_cleanup();
+            /*
+            * Fixme: we will run VM in COLO no matter its old running state.
+            * After exited COLO, we will keep running.
+            */
+            old_vm_running = true;
+        }
         if (old_vm_running && !entered_postcopy) {
             vm_start();
         } else {
@@ -1852,13 +1944,12 @@ static void *migration_thread(void *opaque)
 
 void migrate_fd_connect(MigrationState *s)
 {
-    /* This is a best 1st approximation. ns to ms */
-    s->expected_downtime = max_downtime/1000000;
+    s->expected_downtime = s->parameters.downtime_limit;
     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s);
 
     qemu_file_set_blocking(s->to_dst_file, true);
     qemu_file_set_rate_limit(s->to_dst_file,
-                             s->bandwidth_limit / XFER_LIMIT_RATIO);
+                             s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
 
     /* Notify before starting migration thread */
     notifier_list_notify(&migration_state_notifiers, s);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9b0477835f..a40dddbaf6 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -84,6 +84,24 @@ static bool ufd_version_check(int ufd)
     return true;
 }
 
+/*
+ * Check for things that postcopy won't support; returns 0 if the block
+ * is fine.
+ */
+static int check_range(const char *block_name, void *host_addr,
+                      ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+    RAMBlock *rb = qemu_ram_block_by_name(block_name);
+
+    if (qemu_ram_pagesize(rb) > getpagesize()) {
+        error_report("Postcopy doesn't support large page sizes yet (%s)",
+                     block_name);
+        return -E2BIG;
+    }
+
+    return 0;
+}
+
 /*
  * Note: This has the side effect of munlock'ing all of RAM, that's
  * normally fine since if the postcopy succeeds it gets turned back on at the
@@ -104,6 +122,12 @@ bool postcopy_ram_supported_by_host(void)
         goto out;
     }
 
+    /* Check for anything about the RAMBlocks we don't support */
+    if (qemu_ram_foreach_block(check_range, NULL)) {
+        /* check_range will have printed its own error */
+        goto out;
+    }
+
     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
     if (ufd == -1) {
         error_report("%s: userfaultfd not available: %s", __func__,
diff --git a/migration/ram.c b/migration/ram.c
index a3d70c4c62..a1c8089010 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -43,6 +43,7 @@
 #include "trace.h"
 #include "exec/ram_addr.h"
 #include "qemu/rcu_queue.h"
+#include "migration/colo.h"
 
 #ifdef DEBUG_MIGRATION_RAM
 #define DPRINTF(fmt, ...) \
@@ -69,11 +70,11 @@ static uint64_t bitmap_sync_count;
 /* 0x80 is reserved in migration.h start with 0x100 next */
 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
 
-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
+static uint8_t *ZERO_TARGET_PAGE;
 
 static inline bool is_zero_range(uint8_t *p, uint64_t size)
 {
-    return buffer_find_nonzero_offset(p, size) == size;
+    return buffer_is_zero(p, size);
 }
 
 /* struct contains XBZRLE cache and a static page
@@ -626,7 +627,7 @@ static void migration_bitmap_sync(void)
     }
 
     trace_migration_bitmap_sync_start();
-    address_space_sync_dirty_bitmap(&address_space_memory);
+    memory_global_dirty_log_sync();
 
     qemu_mutex_lock(&migration_bitmap_mutex);
     rcu_read_lock();
@@ -771,7 +772,9 @@ static int ram_save_page(QEMUFile *f, PageSearchStatus *pss,
              * page would be stale
              */
             xbzrle_cache_zero_page(current_addr);
-        } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
+        } else if (!ram_bulk_stage &&
+                   !migration_in_postcopy(migrate_get_current()) &&
+                   migrate_use_xbzrle()) {
             pages = save_xbzrle_page(f, &p, current_addr, block,
                                      offset, last_stage, bytes_transferred);
             if (!last_stage) {
@@ -1429,6 +1432,7 @@ static void ram_migration_cleanup(void *opaque)
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.encoded_buf);
         g_free(XBZRLE.current_buf);
+        g_free(ZERO_TARGET_PAGE);
         XBZRLE.cache = NULL;
         XBZRLE.encoded_buf = NULL;
         XBZRLE.current_buf = NULL;
@@ -1868,16 +1872,8 @@ int ram_discard_range(MigrationIncomingState *mis,
     return ret;
 }
 
-
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section.  When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_init_globals(void)
 {
-    RAMBlock *block;
     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
 
     dirty_rate_high_cnt = 0;
@@ -1887,6 +1883,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
     if (migrate_use_xbzrle()) {
         XBZRLE_cache_lock();
+        ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                   TARGET_PAGE_SIZE,
                                   TARGET_PAGE_SIZE);
@@ -1943,6 +1940,29 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap_sync();
     qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
+    rcu_read_unlock();
+
+    return 0;
+}
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section.  When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+    RAMBlock *block;
+
+    /* migration has already setup the bitmap, reuse it. */
+    if (!migration_in_colo_state()) {
+        if (ram_save_init_globals() < 0) {
+            return -1;
+         }
+    }
+
+    rcu_read_lock();
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
@@ -1967,7 +1987,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     int ret;
     int i;
     int64_t t0;
-    int pages_sent = 0;
+    int done = 0;
 
     rcu_read_lock();
     if (ram_list.version != last_version) {
@@ -1987,9 +2007,9 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         pages = ram_find_and_save_block(f, false, &bytes_transferred);
         /* no more pages to sent */
         if (pages == 0) {
+            done = 1;
             break;
         }
-        pages_sent += pages;
         acct_info.iterations++;
 
         /* we want to check in the 1st loop, just in case it was the 1st time
@@ -2024,7 +2044,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         return ret;
     }
 
-    return pages_sent;
+    return done;
 }
 
 /* Called with iothread lock */
@@ -2044,7 +2064,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     while (true) {
         int pages;
 
-        pages = ram_find_and_save_block(f, true, &bytes_transferred);
+        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
+                                        &bytes_transferred);
         /* no more blocks to sent */
         if (pages == 0) {
             break;
diff --git a/migration/rdma.c b/migration/rdma.c
index 5110ec828d..674ccab12e 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -350,6 +350,7 @@ typedef struct RDMAContext {
      */
     int error_state;
     int error_reported;
+    int received_error;
 
     /*
      * Description of ram blocks used throughout the code.
@@ -1676,6 +1677,9 @@ static int qemu_rdma_exchange_get_response(RDMAContext *rdma,
                 ", but got: %s (%d), length: %d",
                 control_desc[expecting], expecting,
                 control_desc[head->type], head->type, head->len);
+        if (head->type == RDMA_CONTROL_ERROR) {
+            rdma->received_error = true;
+        }
         return -EIO;
     }
     if (head->len > RDMA_CONTROL_MAX_BUFFER - sizeof(*head)) {
@@ -1934,10 +1938,7 @@ static int qemu_rdma_write_one(QEMUFile *f, RDMAContext *rdma,
              * memset() + madvise() the entire chunk without RDMA.
              */
 
-            if (can_use_buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr,
-                                                   length)
-                   && buffer_find_nonzero_offset((void *)(uintptr_t)sge.addr,
-                                                    length) == length) {
+            if (buffer_is_zero((void *)(uintptr_t)sge.addr, length)) {
                 RDMACompress comp = {
                                         .offset = current_addr,
                                         .value = 0,
@@ -2205,7 +2206,7 @@ static void qemu_rdma_cleanup(RDMAContext *rdma)
     int ret, idx;
 
     if (rdma->cm_id && rdma->connected) {
-        if (rdma->error_state) {
+        if (rdma->error_state && !rdma->received_error) {
             RDMAControlHeader head = { .len = 0,
                                        .type = RDMA_CONTROL_ERROR,
                                        .repeat = 1,
@@ -2807,6 +2808,9 @@ static int qio_channel_rdma_close(QIOChannel *ioc,
     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
     trace_qemu_rdma_close();
     if (rioc->rdma) {
+        if (!rioc->rdma->error_state) {
+            rioc->rdma->error_state = qemu_file_get_error(rioc->file);
+        }
         qemu_rdma_cleanup(rioc->rdma);
         g_free(rioc->rdma);
         rioc->rdma = NULL;
diff --git a/migration/savevm.c b/migration/savevm.c
index 33a2911ec2..0363372acc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -265,6 +265,7 @@ typedef struct SaveState {
     bool skip_configuration;
     uint32_t len;
     const char *name;
+    uint32_t target_page_bits;
 } SaveState;
 
 static SaveState savevm_state = {
@@ -286,6 +287,19 @@ static void configuration_pre_save(void *opaque)
 
     state->len = strlen(current_name);
     state->name = current_name;
+    state->target_page_bits = TARGET_PAGE_BITS;
+}
+
+static int configuration_pre_load(void *opaque)
+{
+    SaveState *state = opaque;
+
+    /* If there is no target-page-bits subsection it means the source
+     * predates the variable-target-page-bits support and is using the
+     * minimum possible value for this CPU.
+     */
+    state->target_page_bits = TARGET_PAGE_BITS_MIN;
+    return 0;
 }
 
 static int configuration_post_load(void *opaque, int version_id)
@@ -298,12 +312,43 @@ static int configuration_post_load(void *opaque, int version_id)
                      (int) state->len, state->name, current_name);
         return -EINVAL;
     }
+
+    if (state->target_page_bits != TARGET_PAGE_BITS) {
+        error_report("Received TARGET_PAGE_BITS is %d but local is %d",
+                     state->target_page_bits, TARGET_PAGE_BITS);
+        return -EINVAL;
+    }
+
     return 0;
 }
 
+/* The target-page-bits subsection is present only if the
+ * target page size is not the same as the default (ie the
+ * minimum page size for a variable-page-size guest CPU).
+ * If it is present then it contains the actual target page
+ * bits for the machine, and migration will fail if the
+ * two ends don't agree about it.
+ */
+static bool vmstate_target_page_bits_needed(void *opaque)
+{
+    return TARGET_PAGE_BITS > TARGET_PAGE_BITS_MIN;
+}
+
+static const VMStateDescription vmstate_target_page_bits = {
+    .name = "configuration/target-page-bits",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vmstate_target_page_bits_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(target_page_bits, SaveState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_configuration = {
     .name = "configuration",
     .version_id = 1,
+    .pre_load = configuration_pre_load,
     .post_load = configuration_post_load,
     .pre_save = configuration_pre_save,
     .fields = (VMStateField[]) {
@@ -311,6 +356,10 @@ static const VMStateDescription vmstate_configuration = {
         VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
         VMSTATE_END_OF_LIST()
     },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_target_page_bits,
+        NULL
+    }
 };
 
 static void dump_vmstate_vmsd(FILE *out_file,
@@ -1582,6 +1631,7 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
     }
 
     bioc = qio_channel_buffer_new(length);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "migration-loadvm-buffer");
     ret = qemu_get_buffer(mis->from_src_file,
                           bioc->data,
                           length);
@@ -1828,40 +1878,45 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
 static int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
 {
     uint8_t section_type;
-    int ret;
+    int ret = 0;
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
-
+        ret = 0;
         trace_qemu_loadvm_state_section(section_type);
         switch (section_type) {
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
             ret = qemu_loadvm_section_start_full(f, mis);
             if (ret < 0) {
-                return ret;
+                goto out;
             }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
             ret = qemu_loadvm_section_part_end(f, mis);
             if (ret < 0) {
-                return ret;
+                goto out;
             }
             break;
         case QEMU_VM_COMMAND:
             ret = loadvm_process_command(f);
             trace_qemu_loadvm_state_section_command(ret);
             if ((ret < 0) || (ret & LOADVM_QUIT)) {
-                return ret;
+                goto out;
             }
             break;
         default:
             error_report("Unknown savevm section type %d", section_type);
-            return -EINVAL;
+            ret = -EINVAL;
+            goto out;
         }
     }
 
-    return 0;
+out:
+    if (ret < 0) {
+        qemu_file_set_error(f, ret);
+    }
+    return ret;
 }
 
 int qemu_loadvm_state(QEMUFile *f)
@@ -2068,6 +2123,7 @@ void qmp_xen_save_devices_state(const char *filename, Error **errp)
     if (!ioc) {
         goto the_end;
     }
+    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-save-state");
     f = qemu_fopen_channel_output(QIO_CHANNEL(ioc));
     ret = qemu_save_device_state(f);
     qemu_fclose(f);
@@ -2100,6 +2156,7 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
     if (!ioc) {
         return;
     }
+    qio_channel_set_name(QIO_CHANNEL(ioc), "migration-xen-load-state");
     f = qemu_fopen_channel_input(QIO_CHANNEL(ioc));
 
     migration_incoming_state_new(f);
diff --git a/migration/socket.c b/migration/socket.c
index 00de1fe127..11f80b119b 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -100,6 +100,7 @@ static void socket_start_outgoing_migration(MigrationState *s,
         data->hostname = g_strdup(saddr->u.inet.data->host);
     }
 
+    qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-outgoing");
     qio_channel_socket_connect_async(sioc,
                                      saddr,
                                      socket_outgoing_migration,
@@ -112,8 +113,12 @@ void tcp_start_outgoing_migration(MigrationState *s,
                                   const char *host_port,
                                   Error **errp)
 {
-    SocketAddress *saddr = tcp_build_address(host_port, errp);
-    socket_start_outgoing_migration(s, saddr, errp);
+    Error *err = NULL;
+    SocketAddress *saddr = tcp_build_address(host_port, &err);
+    if (!err) {
+        socket_start_outgoing_migration(s, saddr, &err);
+    }
+    error_propagate(errp, err);
 }
 
 void unix_start_outgoing_migration(MigrationState *s,
@@ -142,6 +147,7 @@ static gboolean socket_accept_incoming_migration(QIOChannel *ioc,
 
     trace_migration_socket_incoming_accepted();
 
+    qio_channel_set_name(QIO_CHANNEL(sioc), "migration-socket-incoming");
     migration_channel_process_incoming(migrate_get_current(),
                                        QIO_CHANNEL(sioc));
     object_unref(OBJECT(sioc));
@@ -158,6 +164,9 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
 {
     QIOChannelSocket *listen_ioc = qio_channel_socket_new();
 
+    qio_channel_set_name(QIO_CHANNEL(listen_ioc),
+                         "migration-socket-listener");
+
     if (qio_channel_socket_listen_sync(listen_ioc, saddr, errp) < 0) {
         object_unref(OBJECT(listen_ioc));
         qapi_free_SocketAddress(saddr);
@@ -174,8 +183,12 @@ static void socket_start_incoming_migration(SocketAddress *saddr,
 
 void tcp_start_incoming_migration(const char *host_port, Error **errp)
 {
-    SocketAddress *saddr = tcp_build_address(host_port, errp);
-    socket_start_incoming_migration(saddr, errp);
+    Error *err = NULL;
+    SocketAddress *saddr = tcp_build_address(host_port, &err);
+    if (!err) {
+        socket_start_incoming_migration(saddr, &err);
+    }
+    error_propagate(errp, err);
 }
 
 void unix_start_incoming_migration(const char *path, Error **errp)
diff --git a/migration/tls.c b/migration/tls.c
index 12c053d15f..49ca9a8930 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -99,6 +99,7 @@ void migration_tls_channel_process_incoming(MigrationState *s,
     }
 
     trace_migration_tls_incoming_handshake_start();
+    qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-incoming");
     qio_channel_tls_handshake(tioc,
                               migration_tls_incoming_handshake,
                               NULL,
@@ -154,6 +155,7 @@ void migration_tls_channel_connect(MigrationState *s,
     }
 
     trace_migration_tls_outgoing_handshake_start(hostname);
+    qio_channel_set_name(QIO_CHANNEL(tioc), "migration-tls-outgoing");
     qio_channel_tls_handshake(tioc,
                               migration_tls_outgoing_handshake,
                               s,
diff --git a/migration/trace-events b/migration/trace-events
index dfee75abf4..94134f700b 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -207,3 +207,9 @@ migration_tls_outgoing_handshake_complete(void) ""
 migration_tls_incoming_handshake_start(void) ""
 migration_tls_incoming_handshake_error(const char *err) "err=%s"
 migration_tls_incoming_handshake_complete(void) ""
+
+# migration/colo.c
+colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
+colo_send_message(const char *msg) "Send '%s' message"
+colo_receive_message(const char *msg) "Receive '%s' message"
+colo_failover_set_state(const char *new_state) "new state %s"
diff --git a/migration/vmstate.c b/migration/vmstate.c
index fc29acf74d..0bc9f35ef8 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -130,6 +130,8 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
                 }
                 if (ret < 0) {
                     qemu_file_set_error(f, ret);
+                    error_report("Failed to load %s:%s", vmsd->name,
+                                 field->name);
                     trace_vmstate_load_field_error(field->name, ret);
                     return ret;
                 }
@@ -555,6 +557,7 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size)
     if (*v == v2) {
         return 0;
     }
+    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
     return -EINVAL;
 }
 
@@ -578,6 +581,9 @@ static int get_int32_le(QEMUFile *f, void *pv, size_t size)
         *cur = loaded;
         return 0;
     }
+    error_report("Invalid value %" PRId32
+                 " expecting positive value <= %" PRId32,
+                 loaded, *cur);
     return -EINVAL;
 }
 
@@ -683,6 +689,7 @@ static int get_uint32_equal(QEMUFile *f, void *pv, size_t size)
     if (*v == v2) {
         return 0;
     }
+    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
     return -EINVAL;
 }
 
@@ -725,6 +732,7 @@ static int get_uint64_equal(QEMUFile *f, void *pv, size_t size)
     if (*v == v2) {
         return 0;
     }
+    error_report("%" PRIx64 " != %" PRIx64, *v, v2);
     return -EINVAL;
 }
 
@@ -746,6 +754,7 @@ static int get_uint8_equal(QEMUFile *f, void *pv, size_t size)
     if (*v == v2) {
         return 0;
     }
+    error_report("%x != %x", *v, v2);
     return -EINVAL;
 }
 
@@ -767,6 +776,7 @@ static int get_uint16_equal(QEMUFile *f, void *pv, size_t size)
     if (*v == v2) {
         return 0;
     }
+    error_report("%x != %x", *v, v2);
     return -EINVAL;
 }
 
diff --git a/monitor.c b/monitor.c
index c0a2fe7e84..8a57995459 100644
--- a/monitor.c
+++ b/monitor.c
@@ -59,7 +59,6 @@
 #include "qapi/qmp/json-streamer.h"
 #include "qapi/qmp/json-parser.h"
 #include "qom/object_interfaces.h"
-#include "cpu.h"
 #include "trace.h"
 #include "trace/control.h"
 #include "monitor/hmp-target.h"
@@ -76,15 +75,9 @@
 #include "qapi/qmp-event.h"
 #include "qapi-event.h"
 #include "qmp-introspect.h"
-#include "sysemu/block-backend.h"
 #include "sysemu/qtest.h"
 #include "qemu/cutils.h"
-
-/* for hmp_info_irq/pic */
-#if defined(TARGET_SPARC)
-#include "hw/sparc/sun4m.h"
-#endif
-#include "hw/lm32/lm32_pic.h"
+#include "qapi/qmp/dispatch.h"
 
 #if defined(TARGET_S390X)
 #include "hw/s390x/storage-keys.h"
@@ -133,13 +126,10 @@ typedef struct mon_cmd_t {
     const char *args_type;
     const char *params;
     const char *help;
-    union {
-        void (*cmd)(Monitor *mon, const QDict *qdict);
-        void (*cmd_new)(QDict *params, QObject **ret_data, Error **errp);
-    } mhandler;
-    /* @sub_table is a list of 2nd level of commands. If it do not exist,
-     * mhandler should be used. If it exist, sub_table[?].mhandler should be
-     * used, and mhandler of 1st level plays the role of help function.
+    void (*cmd)(Monitor *mon, const QDict *qdict);
+    /* @sub_table is a list of 2nd level of commands. If it does not exist,
+     * cmd should be used. If it exists, sub_table[?].cmd should be
+     * used, and cmd of 1st level plays the role of help function.
      */
     struct mon_cmd_t *sub_table;
     void (*command_completion)(ReadLineState *rs, int nb_args, const char *str);
@@ -172,7 +162,6 @@ struct MonFdset {
 };
 
 typedef struct {
-    QObject *id;
     JSONMessageParser parser;
     /*
      * When a client connects, we're in capabilities negotiation mode.
@@ -199,7 +188,7 @@ typedef struct {
 } MonitorQAPIEventConf;
 
 struct Monitor {
-    CharDriverState *chr;
+    CharBackend chr;
     int reset_seen;
     int flags;
     int suspend_cnt;
@@ -235,8 +224,6 @@ static int mon_refcount;
 static mon_cmd_t mon_cmds[];
 static mon_cmd_t info_cmds[];
 
-static const mon_cmd_t qmp_cmds[];
-
 Monitor *cur_mon;
 
 static QEMUClockType event_clock_type = QEMU_CLOCK_REALTIME;
@@ -312,7 +299,7 @@ static void monitor_flush_locked(Monitor *mon)
     len = qstring_get_length(mon->outbuf);
 
     if (len && !mon->mux_out) {
-        rc = qemu_chr_fe_write(mon->chr, (const uint8_t *) buf, len);
+        rc = qemu_chr_fe_write(&mon->chr, (const uint8_t *) buf, len);
         if ((rc < 0 && errno != EAGAIN) || (rc == len)) {
             /* all flushed or error */
             QDECREF(mon->outbuf);
@@ -326,8 +313,9 @@ static void monitor_flush_locked(Monitor *mon)
             mon->outbuf = tmp;
         }
         if (mon->out_watch == 0) {
-            mon->out_watch = qemu_chr_fe_add_watch(mon->chr, G_IO_OUT|G_IO_HUP,
-                                                   monitor_unblocked, mon);
+            mon->out_watch =
+                qemu_chr_fe_add_watch(&mon->chr, G_IO_OUT | G_IO_HUP,
+                                      monitor_unblocked, mon);
         }
     }
 }
@@ -407,49 +395,6 @@ static void monitor_json_emitter(Monitor *mon, const QObject *data)
     QDECREF(json);
 }
 
-static QDict *build_qmp_error_dict(Error *err)
-{
-    QObject *obj;
-
-    obj = qobject_from_jsonf("{ 'error': { 'class': %s, 'desc': %s } }",
-                             QapiErrorClass_lookup[error_get_class(err)],
-                             error_get_pretty(err));
-
-    return qobject_to_qdict(obj);
-}
-
-static void monitor_protocol_emitter(Monitor *mon, QObject *data,
-                                     Error *err)
-{
-    QDict *qmp;
-
-    trace_monitor_protocol_emitter(mon);
-
-    if (!err) {
-        /* success response */
-        qmp = qdict_new();
-        if (data) {
-            qobject_incref(data);
-            qdict_put_obj(qmp, "return", data);
-        } else {
-            /* return an empty QDict by default */
-            qdict_put(qmp, "return", qdict_new());
-        }
-    } else {
-        /* error response */
-        qmp = build_qmp_error_dict(err);
-    }
-
-    if (mon->qmp.id) {
-        qdict_put_obj(qmp, "id", mon->qmp.id);
-        mon->qmp.id = NULL;
-    }
-
-    monitor_json_emitter(mon, QOBJECT(qmp));
-    QDECREF(qmp);
-}
-
-
 static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
     /* Limit guest-triggerable events to 1 per second */
     [QAPI_EVENT_RTC_CHANGE]        = { 1000 * SCALE_MS },
@@ -621,7 +566,7 @@ static void monitor_qapi_event_init(void)
     qmp_event_set_func_emit(monitor_qapi_event_queue);
 }
 
-static void qmp_capabilities(QDict *params, QObject **ret_data, Error **errp)
+void qmp_qmp_capabilities(Error **errp)
 {
     cur_mon->qmp.in_command_mode = true;
 }
@@ -639,9 +584,7 @@ static void monitor_data_init(Monitor *mon)
 
 static void monitor_data_destroy(Monitor *mon)
 {
-    if (mon->chr) {
-        qemu_chr_add_handlers(mon->chr, NULL, NULL, NULL, NULL);
-    }
+    qemu_chr_fe_deinit(&mon->chr);
     if (monitor_is_qmp(mon)) {
         json_message_parser_destroy(&mon->qmp.parser);
     }
@@ -960,21 +903,28 @@ static void hmp_info_help(Monitor *mon, const QDict *qdict)
     help_cmd(mon, "info");
 }
 
-CommandInfoList *qmp_query_commands(Error **errp)
+static void query_commands_cb(QmpCommand *cmd, void *opaque)
 {
-    CommandInfoList *info, *cmd_list = NULL;
-    const mon_cmd_t *cmd;
-
-    for (cmd = qmp_cmds; cmd->name != NULL; cmd++) {
-        info = g_malloc0(sizeof(*info));
-        info->value = g_malloc0(sizeof(*info->value));
-        info->value->name = g_strdup(cmd->name);
+    CommandInfoList *info, **list = opaque;
 
-        info->next = cmd_list;
-        cmd_list = info;
+    if (!cmd->enabled) {
+        return;
     }
 
-    return cmd_list;
+    info = g_malloc0(sizeof(*info));
+    info->value = g_malloc0(sizeof(*info->value));
+    info->value->name = g_strdup(cmd->name);
+    info->next = *list;
+    *list = info;
+}
+
+CommandInfoList *qmp_query_commands(Error **errp)
+{
+    CommandInfoList *list = NULL;
+
+    qmp_for_each_command(query_commands_cb, &list);
+
+    return list;
 }
 
 EventInfoList *qmp_query_events(Error **errp)
@@ -1002,7 +952,7 @@ EventInfoList *qmp_query_events(Error **errp)
  * directly into QObject instead of first parsing it with
  * visit_type_SchemaInfoList() into a SchemaInfoList, then marshal it
  * to QObject with generated output marshallers, every time.  Instead,
- * we do it in test-qmp-input-visitor.c, just to make sure
+ * we do it in test-qobject-input-visitor.c, just to make sure
  * qapi-introspect.py's output actually conforms to the schema.
  */
 static void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
@@ -1011,6 +961,58 @@ static void qmp_query_qmp_schema(QDict *qdict, QObject **ret_data,
     *ret_data = qobject_from_json(qmp_schema_json);
 }
 
+/*
+ * We used to define commands in qmp-commands.hx in addition to the
+ * QAPI schema.  This permitted defining some of them only in certain
+ * configurations.  query-commands has always reflected that (good,
+ * because it lets QMP clients figure out what's actually available),
+ * while query-qmp-schema never did (not so good).  This function is a
+ * hack to keep the configuration-specific commands defined exactly as
+ * before, even though qmp-commands.hx is gone.
+ *
+ * FIXME Educate the QAPI schema on configuration-specific commands,
+ * and drop this hack.
+ */
+static void qmp_unregister_commands_hack(void)
+{
+#ifndef CONFIG_SPICE
+    qmp_unregister_command("query-spice");
+#endif
+#ifndef TARGET_I386
+    qmp_unregister_command("rtc-reset-reinjection");
+#endif
+#ifndef TARGET_S390X
+    qmp_unregister_command("dump-skeys");
+#endif
+#ifndef TARGET_ARM
+    qmp_unregister_command("query-gic-capabilities");
+#endif
+#if !defined(TARGET_S390X)
+    qmp_unregister_command("query-cpu-model-expansion");
+    qmp_unregister_command("query-cpu-model-baseline");
+    qmp_unregister_command("query-cpu-model-comparison");
+#endif
+#if !defined(TARGET_PPC) && !defined(TARGET_ARM) && !defined(TARGET_I386) \
+    && !defined(TARGET_S390X)
+    qmp_unregister_command("query-cpu-definitions");
+#endif
+}
+
+static void qmp_init_marshal(void)
+{
+    qmp_register_command("query-qmp-schema", qmp_query_qmp_schema,
+                         QCO_NO_OPTIONS);
+    qmp_register_command("device_add", qmp_device_add,
+                         QCO_NO_OPTIONS);
+    qmp_register_command("netdev_add", qmp_netdev_add,
+                         QCO_NO_OPTIONS);
+
+    /* call it after the rest of qapi_init() */
+    register_module_init(qmp_unregister_commands_hack, MODULE_INIT_QAPI);
+}
+
+qapi_init(qmp_init_marshal);
+
 /* set the current CPU defined by the user */
 int monitor_set_cpu(int cpu_index)
 {
@@ -1027,7 +1029,7 @@ int monitor_set_cpu(int cpu_index)
 CPUState *mon_get_cpu(void)
 {
     if (!cur_mon->mon_cpu) {
-        monitor_set_cpu(0);
+        monitor_set_cpu(first_cpu->cpu_index);
     }
     cpu_synchronize_state(cur_mon->mon_cpu);
     return cur_mon->mon_cpu;
@@ -1748,7 +1750,7 @@ void qmp_getfd(const char *fdname, Error **errp)
     mon_fd_t *monfd;
     int fd;
 
-    fd = qemu_chr_fe_get_msgfd(cur_mon->chr);
+    fd = qemu_chr_fe_get_msgfd(&cur_mon->chr);
     if (fd == -1) {
         error_setg(errp, QERR_FD_NOT_SUPPLIED);
         return;
@@ -1873,7 +1875,7 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
     Monitor *mon = cur_mon;
     AddfdInfo *fdinfo;
 
-    fd = qemu_chr_fe_get_msgfd(mon->chr);
+    fd = qemu_chr_fe_get_msgfd(&mon->chr);
     if (fd == -1) {
         error_setg(errp, QERR_FD_NOT_SUPPLIED);
         goto error;
@@ -2171,11 +2173,6 @@ static mon_cmd_t mon_cmds[] = {
     { NULL, NULL, },
 };
 
-static const mon_cmd_t qmp_cmds[] = {
-#include "qmp-commands-old.h"
-    { /* NULL */ },
-};
-
 /*******************************************************************/
 
 static const char *pch;
@@ -2526,11 +2523,6 @@ static const mon_cmd_t *search_dispatch_table(const mon_cmd_t *disp_table,
     return NULL;
 }
 
-static const mon_cmd_t *qmp_find_cmd(const char *cmdname)
-{
-    return search_dispatch_table(qmp_cmds, cmdname);
-}
-
 /*
  * Parse command name from @cmdp according to command table @table.
  * If blank, return NULL.
@@ -2972,7 +2964,7 @@ static void handle_hmp_command(Monitor *mon, const char *cmdline)
         return;
     }
 
-    cmd->mhandler.cmd(mon, qdict);
+    cmd->cmd(mon, qdict);
     QDECREF(qdict);
 }
 
@@ -3353,13 +3345,14 @@ void info_trace_events_completion(ReadLineState *rs, int nb_args, const char *st
     len = strlen(str);
     readline_set_completion_index(rs, len);
     if (nb_args == 2) {
-        TraceEventID id;
-        for (id = 0; id < trace_event_count(); id++) {
-            const char *event_name = trace_event_get_name(trace_event_id(id));
-            if (!strncmp(str, event_name, len)) {
-                readline_add_completion(rs, event_name);
-            }
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
         }
+        g_free(pattern);
     }
 }
 
@@ -3370,13 +3363,14 @@ void trace_event_completion(ReadLineState *rs, int nb_args, const char *str)
     len = strlen(str);
     readline_set_completion_index(rs, len);
     if (nb_args == 2) {
-        TraceEventID id;
-        for (id = 0; id < trace_event_count(); id++) {
-            const char *event_name = trace_event_get_name(trace_event_id(id));
-            if (!strncmp(str, event_name, len)) {
-                readline_add_completion(rs, event_name);
-            }
+        TraceEventIter iter;
+        TraceEvent *ev;
+        char *pattern = g_strdup_printf("%s*", str);
+        trace_event_iter_init(&iter, pattern);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
+            readline_add_completion(rs, trace_event_get_name(ev));
         }
+        g_free(pattern);
     } else if (nb_args == 3) {
         add_completion_option(rs, str, "on");
         add_completion_option(rs, str, "off");
@@ -3671,219 +3665,26 @@ static int monitor_can_read(void *opaque)
     return (mon->suspend_cnt == 0) ? 1 : 0;
 }
 
-static bool invalid_qmp_mode(const Monitor *mon, const mon_cmd_t *cmd,
+static bool invalid_qmp_mode(const Monitor *mon, const char *cmd,
                              Error **errp)
 {
-    bool is_cap = cmd->mhandler.cmd_new == qmp_capabilities;
+    bool is_cap = g_str_equal(cmd, "qmp_capabilities");
 
     if (is_cap && mon->qmp.in_command_mode) {
         error_set(errp, ERROR_CLASS_COMMAND_NOT_FOUND,
                   "Capabilities negotiation is already complete, command "
-                  "'%s' ignored", cmd->name);
+                  "'%s' ignored", cmd);
         return true;
     }
     if (!is_cap && !mon->qmp.in_command_mode) {
         error_set(errp, ERROR_CLASS_COMMAND_NOT_FOUND,
                   "Expecting capabilities negotiation with "
-                  "'qmp_capabilities' before command '%s'", cmd->name);
+                  "'qmp_capabilities' before command '%s'", cmd);
         return true;
     }
     return false;
 }
 
-/*
- * Argument validation rules:
- *
- * 1. The argument must exist in cmd_args qdict
- * 2. The argument type must be the expected one
- *
- * Special case: If the argument doesn't exist in cmd_args and
- *               the QMP_ACCEPT_UNKNOWNS flag is set, then the
- *               checking is skipped for it.
- */
-static void check_client_args_type(const QDict *client_args,
-                                   const QDict *cmd_args, int flags,
-                                   Error **errp)
-{
-    const QDictEntry *ent;
-
-    for (ent = qdict_first(client_args); ent;ent = qdict_next(client_args,ent)){
-        QObject *obj;
-        QString *arg_type;
-        const QObject *client_arg = qdict_entry_value(ent);
-        const char *client_arg_name = qdict_entry_key(ent);
-
-        obj = qdict_get(cmd_args, client_arg_name);
-        if (!obj) {
-            if (flags & QMP_ACCEPT_UNKNOWNS) {
-                /* handler accepts unknowns */
-                continue;
-            }
-            /* client arg doesn't exist */
-            error_setg(errp, QERR_INVALID_PARAMETER, client_arg_name);
-            return;
-        }
-
-        arg_type = qobject_to_qstring(obj);
-        assert(arg_type != NULL);
-
-        /* check if argument's type is correct */
-        switch (qstring_get_str(arg_type)[0]) {
-        case 'F':
-        case 'B':
-        case 's':
-            if (qobject_type(client_arg) != QTYPE_QSTRING) {
-                error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
-                           client_arg_name, "string");
-                return;
-            }
-        break;
-        case 'i':
-        case 'l':
-        case 'M':
-        case 'o':
-            if (qobject_type(client_arg) != QTYPE_QINT) {
-                error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
-                           client_arg_name, "int");
-                return;
-            }
-            break;
-        case 'T':
-            if (qobject_type(client_arg) != QTYPE_QINT &&
-                qobject_type(client_arg) != QTYPE_QFLOAT) {
-                error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
-                           client_arg_name, "number");
-                return;
-            }
-            break;
-        case 'b':
-        case '-':
-            if (qobject_type(client_arg) != QTYPE_QBOOL) {
-                error_setg(errp, QERR_INVALID_PARAMETER_TYPE,
-                           client_arg_name, "bool");
-                return;
-            }
-            break;
-        case 'O':
-            assert(flags & QMP_ACCEPT_UNKNOWNS);
-            break;
-        case 'q':
-            /* Any QObject can be passed.  */
-            break;
-        case '/':
-        case '.':
-            /*
-             * These types are not supported by QMP and thus are not
-             * handled here. Fall through.
-             */
-        default:
-            abort();
-        }
-    }
-}
-
-/*
- * - Check if the client has passed all mandatory args
- * - Set special flags for argument validation
- */
-static void check_mandatory_args(const QDict *cmd_args,
-                                 const QDict *client_args, int *flags,
-                                 Error **errp)
-{
-    const QDictEntry *ent;
-
-    for (ent = qdict_first(cmd_args); ent; ent = qdict_next(cmd_args, ent)) {
-        const char *cmd_arg_name = qdict_entry_key(ent);
-        QString *type = qobject_to_qstring(qdict_entry_value(ent));
-        assert(type != NULL);
-
-        if (qstring_get_str(type)[0] == 'O') {
-            assert((*flags & QMP_ACCEPT_UNKNOWNS) == 0);
-            *flags |= QMP_ACCEPT_UNKNOWNS;
-        } else if (qstring_get_str(type)[0] != '-' &&
-                   qstring_get_str(type)[1] != '?' &&
-                   !qdict_haskey(client_args, cmd_arg_name)) {
-            error_setg(errp, QERR_MISSING_PARAMETER, cmd_arg_name);
-            return;
-        }
-    }
-}
-
-static QDict *qdict_from_args_type(const char *args_type)
-{
-    int i;
-    QDict *qdict;
-    QString *key, *type, *cur_qs;
-
-    assert(args_type != NULL);
-
-    qdict = qdict_new();
-
-    if (args_type == NULL || args_type[0] == '\0') {
-        /* no args, empty qdict */
-        goto out;
-    }
-
-    key = qstring_new();
-    type = qstring_new();
-
-    cur_qs = key;
-
-    for (i = 0;; i++) {
-        switch (args_type[i]) {
-            case ',':
-            case '\0':
-                qdict_put(qdict, qstring_get_str(key), type);
-                QDECREF(key);
-                if (args_type[i] == '\0') {
-                    goto out;
-                }
-                type = qstring_new(); /* qdict has ref */
-                cur_qs = key = qstring_new();
-                break;
-            case ':':
-                cur_qs = type;
-                break;
-            default:
-                qstring_append_chr(cur_qs, args_type[i]);
-                break;
-        }
-    }
-
-out:
-    return qdict;
-}
-
-/*
- * Client argument checking rules:
- *
- * 1. Client must provide all mandatory arguments
- * 2. Each argument provided by the client must be expected
- * 3. Each argument provided by the client must have the type expected
- *    by the command
- */
-static void qmp_check_client_args(const mon_cmd_t *cmd, QDict *client_args,
-                                  Error **errp)
-{
-    Error *err = NULL;
-    int flags;
-    QDict *cmd_args;
-
-    cmd_args = qdict_from_args_type(cmd->args_type);
-
-    flags = 0;
-    check_mandatory_args(cmd_args, client_args, &flags, &err);
-    if (err) {
-        goto out;
-    }
-
-    check_client_args_type(client_args, cmd_args, flags, &err);
-
-out:
-    error_propagate(errp, err);
-    QDECREF(cmd_args);
-}
-
 /*
  * Input object checking rules
  *
@@ -3942,65 +3743,58 @@ static QDict *qmp_check_input_obj(QObject *input_obj, Error **errp)
 
 static void handle_qmp_command(JSONMessageParser *parser, GQueue *tokens)
 {
-    Error *local_err = NULL;
-    QObject *obj, *data;
-    QDict *input, *args;
-    const mon_cmd_t *cmd;
+    QObject *req, *rsp = NULL, *id = NULL;
+    QDict *qdict = NULL;
     const char *cmd_name;
     Monitor *mon = cur_mon;
+    Error *err = NULL;
 
-    args = input = NULL;
-    data = NULL;
-
-    obj = json_parser_parse(tokens, NULL);
-    if (!obj) {
-        // FIXME: should be triggered in json_parser_parse()
-        error_setg(&local_err, QERR_JSON_PARSING);
+    req = json_parser_parse_err(tokens, NULL, &err);
+    if (err || !req || qobject_type(req) != QTYPE_QDICT) {
+        if (!err) {
+            error_setg(&err, QERR_JSON_PARSING);
+        }
         goto err_out;
     }
 
-    input = qmp_check_input_obj(obj, &local_err);
-    if (!input) {
-        qobject_decref(obj);
+    qdict = qmp_check_input_obj(req, &err);
+    if (!qdict) {
         goto err_out;
     }
 
-    mon->qmp.id = qdict_get(input, "id");
-    qobject_incref(mon->qmp.id);
+    id = qdict_get(qdict, "id");
+    qobject_incref(id);
+    qdict_del(qdict, "id");
 
-    cmd_name = qdict_get_str(input, "execute");
+    cmd_name = qdict_get_str(qdict, "execute");
     trace_handle_qmp_command(mon, cmd_name);
-    cmd = qmp_find_cmd(cmd_name);
-    if (!cmd) {
-        error_set(&local_err, ERROR_CLASS_COMMAND_NOT_FOUND,
-                  "The command %s has not been found", cmd_name);
-        goto err_out;
-    }
-    if (invalid_qmp_mode(mon, cmd, &local_err)) {
+
+    if (invalid_qmp_mode(mon, cmd_name, &err)) {
         goto err_out;
     }
 
-    obj = qdict_get(input, "arguments");
-    if (!obj) {
-        args = qdict_new();
-    } else {
-        args = qobject_to_qdict(obj);
-        QINCREF(args);
-    }
+    rsp = qmp_dispatch(req);
 
-    qmp_check_client_args(cmd, args, &local_err);
-    if (local_err) {
-        goto err_out;
+err_out:
+    if (err) {
+        qdict = qdict_new();
+        qdict_put_obj(qdict, "error", qmp_build_error_object(err));
+        error_free(err);
+        rsp = QOBJECT(qdict);
     }
 
-    cmd->mhandler.cmd_new(args, &data, &local_err);
+    if (rsp) {
+        if (id) {
+            qdict_put_obj(qobject_to_qdict(rsp), "id", id);
+            id = NULL;
+        }
 
-err_out:
-    monitor_protocol_emitter(mon, data, local_err);
-    qobject_decref(data);
-    error_free(local_err);
-    QDECREF(input);
-    QDECREF(args);
+        monitor_json_emitter(mon, rsp);
+    }
+
+    qobject_decref(id);
+    qobject_decref(rsp);
+    qobject_decref(req);
 }
 
 static void monitor_qmp_read(void *opaque, const uint8_t *buf, int size)
@@ -4065,7 +3859,9 @@ static QObject *get_qmp_greeting(void)
     QObject *ver = NULL;
 
     qmp_marshal_query_version(NULL, &ver, NULL);
-    return qobject_from_jsonf("{'QMP':{'version': %p,'capabilities': []}}",ver);
+
+    return qobject_from_jsonf("{'QMP': {'version': %p, 'capabilities': []}}",
+                              ver);
 }
 
 static void monitor_qmp_event(void *opaque, int event)
@@ -4177,6 +3973,27 @@ static void monitor_readline_flush(void *opaque)
     monitor_flush(opaque);
 }
 
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf().  Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+    if (cur_mon && !monitor_cur_is_qmp()) {
+        monitor_vprintf(cur_mon, fmt, ap);
+    } else {
+        vfprintf(stderr, fmt, ap);
+    }
+}
+
+void error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+    if (cur_mon && !monitor_cur_is_qmp()) {
+        monitor_vprintf(cur_mon, fmt, ap);
+    }
+}
+
 static void __attribute__((constructor)) monitor_lock_init(void)
 {
     qemu_mutex_init(&monitor_lock);
@@ -4196,7 +4013,7 @@ void monitor_init(CharDriverState *chr, int flags)
     mon = g_malloc(sizeof(*mon));
     monitor_data_init(mon);
 
-    mon->chr = chr;
+    qemu_chr_fe_init(&mon->chr, chr, &error_abort);
     mon->flags = flags;
     if (flags & MONITOR_USE_READLINE) {
         mon->rs = readline_init(monitor_readline_printf,
@@ -4207,13 +4024,13 @@ void monitor_init(CharDriverState *chr, int flags)
     }
 
     if (monitor_is_qmp(mon)) {
-        qemu_chr_add_handlers(chr, monitor_can_read, monitor_qmp_read,
-                              monitor_qmp_event, mon);
-        qemu_chr_fe_set_echo(chr, true);
+        qemu_chr_fe_set_handlers(&mon->chr, monitor_can_read, monitor_qmp_read,
+                                 monitor_qmp_event, mon, NULL, true);
+        qemu_chr_fe_set_echo(&mon->chr, true);
         json_message_parser_init(&mon->qmp.parser, handle_qmp_command);
     } else {
-        qemu_chr_add_handlers(chr, monitor_can_read, monitor_read,
-                              monitor_event, mon);
+        qemu_chr_fe_set_handlers(&mon->chr, monitor_can_read, monitor_read,
+                                 monitor_event, mon, NULL, true);
     }
 
     qemu_mutex_lock(&monitor_lock);
@@ -4314,7 +4131,7 @@ QemuOptsList qemu_mon_opts = {
             .name = "chardev",
             .type = QEMU_OPT_STRING,
         },{
-            .name = "default",
+            .name = "default",  /* deprecated */
             .type = QEMU_OPT_BOOL,
         },{
             .name = "pretty",
diff --git a/nbd/client.c b/nbd/client.c
index a92f1e2275..ffb0743bce 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Client Side
@@ -22,23 +23,34 @@
 
 static int nbd_errno_to_system_errno(int err)
 {
+    int ret;
     switch (err) {
     case NBD_SUCCESS:
-        return 0;
+        ret = 0;
+        break;
     case NBD_EPERM:
-        return EPERM;
+        ret = EPERM;
+        break;
     case NBD_EIO:
-        return EIO;
+        ret = EIO;
+        break;
     case NBD_ENOMEM:
-        return ENOMEM;
+        ret = ENOMEM;
+        break;
     case NBD_ENOSPC:
-        return ENOSPC;
+        ret = ENOSPC;
+        break;
+    case NBD_ESHUTDOWN:
+        ret = ESHUTDOWN;
+        break;
     default:
         TRACE("Squashing unexpected error %d to EINVAL", err);
         /* fallthrough */
     case NBD_EINVAL:
-        return EINVAL;
+        ret = EINVAL;
+        break;
     }
+    return ret;
 }
 
 /* Definitions for opaque data types */
@@ -74,64 +86,181 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
 
 */
 
+/* Discard length bytes from channel.  Return -errno on failure, or
+ * the amount of bytes consumed. */
+static ssize_t drop_sync(QIOChannel *ioc, size_t size)
+{
+    ssize_t ret = 0;
+    char small[1024];
+    char *buffer;
+
+    buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
+    while (size > 0) {
+        ssize_t count = read_sync(ioc, buffer, MIN(65536, size));
+
+        if (count <= 0) {
+            goto cleanup;
+        }
+        assert(count <= size);
+        size -= count;
+        ret += count;
+    }
+
+ cleanup:
+    if (buffer != small) {
+        g_free(buffer);
+    }
+    return ret;
+}
+
+/* Send an option request.
+ *
+ * The request is for option @opt, with @data containing @len bytes of
+ * additional payload for the request (@len may be -1 to treat @data as
+ * a C string; and @data may be NULL if @len is 0).
+ * Return 0 if successful, -1 with errp set if it is impossible to
+ * continue. */
+static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
+                                   uint32_t len, const char *data,
+                                   Error **errp)
+{
+    nbd_option req;
+    QEMU_BUILD_BUG_ON(sizeof(req) != 16);
+
+    if (len == -1) {
+        req.length = len = strlen(data);
+    }
+    TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len);
 
-/* If type represents success, return 1 without further action.
- * If type represents an error reply, consume the rest of the packet on ioc.
- * Then return 0 for unsupported (so the client can fall back to
- * other approaches), or -1 with errp set for other errors.
+    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
+    stl_be_p(&req.option, opt);
+    stl_be_p(&req.length, len);
+
+    if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) {
+        error_setg(errp, "Failed to send option request header");
+        return -1;
+    }
+
+    if (len && write_sync(ioc, (char *) data, len) != len) {
+        error_setg(errp, "Failed to send option request data");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
+ * not going to attempt further negotiation. */
+static void nbd_send_opt_abort(QIOChannel *ioc)
+{
+    /* Technically, a compliant server is supposed to reply to us; but
+     * older servers disconnected instead. At any rate, we're allowed
+     * to disconnect without waiting for the server reply, so we don't
+     * even care if the request makes it to the server, let alone
+     * waiting around for whether the server replies. */
+    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
+}
+
+
+/* Receive the header of an option reply, which should match the given
+ * opt.  Read through the length field, but NOT the length bytes of
+ * payload. Return 0 if successful, -1 with errp set if it is
+ * impossible to continue. */
+static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
+                                    nbd_opt_reply *reply, Error **errp)
+{
+    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
+    if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
+        error_setg(errp, "failed to read option reply");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    be64_to_cpus(&reply->magic);
+    be32_to_cpus(&reply->option);
+    be32_to_cpus(&reply->type);
+    be32_to_cpus(&reply->length);
+
+    TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32,
+          reply->option, reply->type, reply->length);
+
+    if (reply->magic != NBD_REP_MAGIC) {
+        error_setg(errp, "Unexpected option reply magic");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    if (reply->option != opt) {
+        error_setg(errp, "Unexpected option type %x expected %x",
+                   reply->option, opt);
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    return 0;
+}
+
+/* If reply represents success, return 1 without further action.
+ * If reply represents an error, consume the optional payload of
+ * the packet on ioc.  Then return 0 for unsupported (so the client
+ * can fall back to other approaches), or -1 with errp set for other
+ * errors.
  */
-static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
+static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
                                 Error **errp)
 {
-    uint32_t len;
     char *msg = NULL;
     int result = -1;
 
-    if (!(type & (1 << 31))) {
+    if (!(reply->type & (1 << 31))) {
         return 1;
     }
 
-    if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
-        error_setg(errp, "failed to read option length");
-        return -1;
-    }
-    len = be32_to_cpu(len);
-    if (len) {
-        if (len > NBD_MAX_BUFFER_SIZE) {
+    if (reply->length) {
+        if (reply->length > NBD_MAX_BUFFER_SIZE) {
             error_setg(errp, "server's error message is too long");
             goto cleanup;
         }
-        msg = g_malloc(len + 1);
-        if (read_sync(ioc, msg, len) != len) {
+        msg = g_malloc(reply->length + 1);
+        if (read_sync(ioc, msg, reply->length) != reply->length) {
             error_setg(errp, "failed to read option error message");
             goto cleanup;
         }
-        msg[len] = '\0';
+        msg[reply->length] = '\0';
     }
 
-    switch (type) {
+    switch (reply->type) {
     case NBD_REP_ERR_UNSUP:
         TRACE("server doesn't understand request %" PRIx32
-              ", attempting fallback", opt);
+              ", attempting fallback", reply->option);
         result = 0;
         goto cleanup;
 
     case NBD_REP_ERR_POLICY:
-        error_setg(errp, "Denied by server for option %" PRIx32, opt);
+        error_setg(errp, "Denied by server for option %" PRIx32,
+                   reply->option);
         break;
 
     case NBD_REP_ERR_INVALID:
-        error_setg(errp, "Invalid data length for option %" PRIx32, opt);
+        error_setg(errp, "Invalid data length for option %" PRIx32,
+                   reply->option);
+        break;
+
+    case NBD_REP_ERR_PLATFORM:
+        error_setg(errp, "Server lacks support for option %" PRIx32,
+                   reply->option);
         break;
 
     case NBD_REP_ERR_TLS_REQD:
         error_setg(errp, "TLS negotiation required before option %" PRIx32,
-                   opt);
+                   reply->option);
+        break;
+
+    case NBD_REP_ERR_SHUTDOWN:
+        error_setg(errp, "Server shutting down before option %" PRIx32,
+                   reply->option);
         break;
 
     default:
         error_setg(errp, "Unknown error code when asking for option %" PRIx32,
-                   opt);
+                   reply->option);
         break;
     }
 
@@ -141,244 +270,160 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
 
  cleanup:
     g_free(msg);
+    if (result < 0) {
+        nbd_send_opt_abort(ioc);
+    }
     return result;
 }
 
-static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
+/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
+ * the current reply matches @want or if the server does not support
+ * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
+ * is complete, positive if more replies are expected, or negative
+ * with @errp set if an unrecoverable error occurred. */
+static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
+                            Error **errp)
 {
-    uint64_t magic;
-    uint32_t opt;
-    uint32_t type;
+    nbd_opt_reply reply;
     uint32_t len;
     uint32_t namelen;
+    char name[NBD_MAX_NAME_SIZE + 1];
     int error;
 
-    *name = NULL;
-    if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-        error_setg(errp, "failed to read list option magic");
+    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
         return -1;
     }
-    magic = be64_to_cpu(magic);
-    if (magic != NBD_REP_MAGIC) {
-        error_setg(errp, "Unexpected option list magic");
-        return -1;
-    }
-    if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-        error_setg(errp, "failed to read list option");
-        return -1;
+    error = nbd_handle_reply_err(ioc, &reply, errp);
+    if (error <= 0) {
+        /* The server did not support NBD_OPT_LIST, so set *match on
+         * the assumption that any name will be accepted.  */
+        *match = true;
+        return error;
     }
-    opt = be32_to_cpu(opt);
-    if (opt != NBD_OPT_LIST) {
-        error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
-                   opt, NBD_OPT_LIST);
+    len = reply.length;
+
+    if (reply.type == NBD_REP_ACK) {
+        if (len != 0) {
+            error_setg(errp, "length too long for option end");
+            nbd_send_opt_abort(ioc);
+            return -1;
+        }
+        return 0;
+    } else if (reply.type != NBD_REP_SERVER) {
+        error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
+                   reply.type, NBD_REP_SERVER);
+        nbd_send_opt_abort(ioc);
         return -1;
     }
 
-    if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
-        error_setg(errp, "failed to read list option type");
+    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
+        error_setg(errp, "incorrect option length %" PRIu32, len);
+        nbd_send_opt_abort(ioc);
         return -1;
     }
-    type = be32_to_cpu(type);
-    error = nbd_handle_reply_err(ioc, opt, type, errp);
-    if (error <= 0) {
-        return error;
+    if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
+        error_setg(errp, "failed to read option name length");
+        nbd_send_opt_abort(ioc);
+        return -1;
     }
-
-    if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
-        error_setg(errp, "failed to read option length");
+    namelen = be32_to_cpu(namelen);
+    len -= sizeof(namelen);
+    if (len < namelen) {
+        error_setg(errp, "incorrect option name length");
+        nbd_send_opt_abort(ioc);
         return -1;
     }
-    len = be32_to_cpu(len);
-
-    if (type == NBD_REP_ACK) {
-        if (len != 0) {
-            error_setg(errp, "length too long for option end");
-            return -1;
-        }
-    } else if (type == NBD_REP_SERVER) {
-        if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
-            error_setg(errp, "incorrect option length");
-            return -1;
-        }
-        if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
-            error_setg(errp, "failed to read option name length");
-            return -1;
-        }
-        namelen = be32_to_cpu(namelen);
-        len -= sizeof(namelen);
-        if (len < namelen) {
-            error_setg(errp, "incorrect option name length");
-            return -1;
-        }
-        if (namelen > NBD_MAX_NAME_SIZE) {
-            error_setg(errp, "export name length too long %" PRIu32, namelen);
+    if (namelen != strlen(want)) {
+        if (drop_sync(ioc, len) != len) {
+            error_setg(errp, "failed to skip export name with wrong length");
+            nbd_send_opt_abort(ioc);
             return -1;
         }
+        return 1;
+    }
 
-        *name = g_new0(char, namelen + 1);
-        if (read_sync(ioc, *name, namelen) != namelen) {
-            error_setg(errp, "failed to read export name");
-            g_free(*name);
-            *name = NULL;
-            return -1;
-        }
-        (*name)[namelen] = '\0';
-        len -= namelen;
-        if (len) {
-            char *buf = g_malloc(len + 1);
-            if (read_sync(ioc, buf, len) != len) {
-                error_setg(errp, "failed to read export description");
-                g_free(*name);
-                g_free(buf);
-                *name = NULL;
-                return -1;
-            }
-            buf[len] = '\0';
-            TRACE("Ignoring export description: %s", buf);
-            g_free(buf);
-        }
-    } else {
-        error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
-                   type, NBD_REP_SERVER);
+    assert(namelen < sizeof(name));
+    if (read_sync(ioc, name, namelen) != namelen) {
+        error_setg(errp, "failed to read export name");
+        nbd_send_opt_abort(ioc);
         return -1;
     }
+    name[namelen] = '\0';
+    len -= namelen;
+    if (drop_sync(ioc, len) != len) {
+        error_setg(errp, "failed to read export description");
+        nbd_send_opt_abort(ioc);
+        return -1;
+    }
+    if (!strcmp(name, want)) {
+        *match = true;
+    }
     return 1;
 }
 
 
+/* Return -1 on failure, 0 if wantname is an available export. */
 static int nbd_receive_query_exports(QIOChannel *ioc,
                                      const char *wantname,
                                      Error **errp)
 {
-    uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
-    uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
-    uint32_t length = 0;
     bool foundExport = false;
 
-    TRACE("Querying export list");
-    if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-        error_setg(errp, "Failed to send list option magic");
-        return -1;
-    }
-
-    if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-        error_setg(errp, "Failed to send list option number");
-        return -1;
-    }
-
-    if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
-        error_setg(errp, "Failed to send list option length");
+    TRACE("Querying export list for '%s'", wantname);
+    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
         return -1;
     }
 
     TRACE("Reading available export names");
     while (1) {
-        char *name = NULL;
-        int ret = nbd_receive_list(ioc, &name, errp);
+        int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
 
         if (ret < 0) {
-            g_free(name);
-            name = NULL;
+            /* Server gave unexpected reply */
             return -1;
+        } else if (ret == 0) {
+            /* Done iterating. */
+            if (!foundExport) {
+                error_setg(errp, "No export with name '%s' available",
+                           wantname);
+                nbd_send_opt_abort(ioc);
+                return -1;
+            }
+            TRACE("Found desired export name '%s'", wantname);
+            return 0;
         }
-        if (ret == 0) {
-            /* Server doesn't support export listing, so
-             * we will just assume an export with our
-             * wanted name exists */
-            foundExport = true;
-            break;
-        }
-        if (name == NULL) {
-            TRACE("End of export name list");
-            break;
-        }
-        if (g_str_equal(name, wantname)) {
-            foundExport = true;
-            TRACE("Found desired export name '%s'", name);
-        } else {
-            TRACE("Ignored export name '%s'", name);
-        }
-        g_free(name);
-    }
-
-    if (!foundExport) {
-        error_setg(errp, "No export with name '%s' available", wantname);
-        return -1;
     }
-
-    return 0;
 }
 
 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
                                         QCryptoTLSCreds *tlscreds,
                                         const char *hostname, Error **errp)
 {
-    uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
-    uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
-    uint32_t length = 0;
-    uint32_t type;
+    nbd_opt_reply reply;
     QIOChannelTLS *tioc;
     struct NBDTLSHandshakeData data = { 0 };
 
     TRACE("Requesting TLS from server");
-    if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-        error_setg(errp, "Failed to send option magic");
-        return NULL;
-    }
-
-    if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-        error_setg(errp, "Failed to send option number");
-        return NULL;
-    }
-
-    if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
-        error_setg(errp, "Failed to send option length");
-        return NULL;
-    }
-
-    TRACE("Getting TLS reply from server1");
-    if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-        error_setg(errp, "failed to read option magic");
-        return NULL;
-    }
-    magic = be64_to_cpu(magic);
-    if (magic != NBD_REP_MAGIC) {
-        error_setg(errp, "Unexpected option magic");
-        return NULL;
-    }
-    TRACE("Getting TLS reply from server2");
-    if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-        error_setg(errp, "failed to read option");
-        return NULL;
-    }
-    opt = be32_to_cpu(opt);
-    if (opt != NBD_OPT_STARTTLS) {
-        error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
-                   opt, NBD_OPT_STARTTLS);
+    if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
         return NULL;
     }
 
     TRACE("Getting TLS reply from server");
-    if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
-        error_setg(errp, "failed to read option type");
+    if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
         return NULL;
     }
-    type = be32_to_cpu(type);
-    if (type != NBD_REP_ACK) {
+
+    if (reply.type != NBD_REP_ACK) {
         error_setg(errp, "Server rejected request to start TLS %" PRIx32,
-                   type);
+                   reply.type);
+        nbd_send_opt_abort(ioc);
         return NULL;
     }
 
-    TRACE("Getting TLS reply from server");
-    if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
-        error_setg(errp, "failed to read option length");
-        return NULL;
-    }
-    length = be32_to_cpu(length);
-    if (length != 0) {
+    if (reply.length != 0) {
         error_setg(errp, "Start TLS response was not zero %" PRIu32,
-                   length);
+                   reply.length);
+        nbd_send_opt_abort(ioc);
         return NULL;
     }
 
@@ -387,6 +432,7 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
     if (!tioc) {
         return NULL;
     }
+    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
     TRACE("Starting TLS handshake");
     qio_channel_tls_handshake(tioc,
@@ -416,6 +462,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
     char buf[256];
     uint64_t magic, s;
     int rc;
+    bool zeroes = true;
 
     TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
           tlscreds, hostname ? hostname : "<null>");
@@ -465,8 +512,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
 
     if (magic == NBD_OPTS_MAGIC) {
         uint32_t clientflags = 0;
-        uint32_t opt;
-        uint32_t namesize;
         uint16_t globalflags;
         bool fixedNewStyle = false;
 
@@ -482,6 +527,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
             TRACE("Server supports fixed new style");
             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
         }
+        if (globalflags & NBD_FLAG_NO_ZEROES) {
+            zeroes = false;
+            TRACE("Server supports no zeroes");
+            clientflags |= NBD_FLAG_C_NO_ZEROES;
+        }
         /* client requested flags */
         clientflags = cpu_to_be32(clientflags);
         if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
@@ -516,28 +566,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
                 goto fail;
             }
         }
-        /* write the export name */
-        magic = cpu_to_be64(magic);
-        if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-            error_setg(errp, "Failed to send export name magic");
-            goto fail;
-        }
-        opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
-        if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-            error_setg(errp, "Failed to send export name option number");
-            goto fail;
-        }
-        namesize = cpu_to_be32(strlen(name));
-        if (write_sync(ioc, &namesize, sizeof(namesize)) !=
-            sizeof(namesize)) {
-            error_setg(errp, "Failed to send export name length");
-            goto fail;
-        }
-        if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
-            error_setg(errp, "Failed to send export name");
+        /* write the export name request */
+        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
+                                    errp) < 0) {
             goto fail;
         }
 
+        /* Read the response */
         if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
             error_setg(errp, "Failed to read export length");
             goto fail;
@@ -584,7 +619,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
     }
 
     TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
-    if (read_sync(ioc, &buf, 124) != 124) {
+    if (zeroes && drop_sync(ioc, 124) != 124) {
         error_setg(errp, "Failed to read reserved block");
         goto fail;
     }
@@ -706,18 +741,20 @@ int nbd_disconnect(int fd)
 }
 #endif
 
-ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
+ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 {
     uint8_t buf[NBD_REQUEST_SIZE];
     ssize_t ret;
 
     TRACE("Sending request to server: "
           "{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64
-          ", .type=%" PRIu32 " }",
-          request->from, request->len, request->handle, request->type);
+          ", .flags = %" PRIx16 ", .type = %" PRIu16 " }",
+          request->from, request->len, request->handle,
+          request->flags, request->type);
 
     stl_be_p(buf, NBD_REQUEST_MAGIC);
-    stl_be_p(buf + 4, request->type);
+    stw_be_p(buf + 4, request->flags);
+    stw_be_p(buf + 6, request->type);
     stq_be_p(buf + 8, request->handle);
     stq_be_p(buf + 16, request->from);
     stl_be_p(buf + 24, request->len);
@@ -734,7 +771,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
     return 0;
 }
 
-ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
+ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
 {
     uint8_t buf[NBD_REPLY_SIZE];
     uint32_t magic;
@@ -762,6 +799,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
 
     reply->error = nbd_errno_to_system_errno(reply->error);
 
+    if (reply->error == ESHUTDOWN) {
+        /* This works even on mingw which lacks a native ESHUTDOWN */
+        LOG("server shutting down");
+        return -EINVAL;
+    }
     TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
           ", handle = %" PRIu64" }",
           magic, reply->error, reply->handle);
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 93a6ca8549..eee20abc25 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -53,16 +53,16 @@
 /* This is all part of the "official" NBD API.
  *
  * The most up-to-date documentation is available at:
- * https://github.com/yoe/nbd/blob/master/doc/proto.txt
+ * https://github.com/yoe/nbd/blob/master/doc/proto.md
  */
 
-#define NBD_REQUEST_SIZE        (4 + 4 + 8 + 8 + 4)
+#define NBD_REQUEST_SIZE        (4 + 2 + 2 + 8 + 8 + 4)
 #define NBD_REPLY_SIZE          (4 + 4 + 8)
 #define NBD_REQUEST_MAGIC       0x25609513
 #define NBD_REPLY_MAGIC         0x67446698
 #define NBD_OPTS_MAGIC          0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC        0x0000420281861253LL
-#define NBD_REP_MAGIC           0x3e889045565a9LL
+#define NBD_REP_MAGIC           0x0003e889045565a9LL
 
 #define NBD_SET_SOCK            _IO(0xab, 0)
 #define NBD_SET_BLKSIZE         _IO(0xab, 1)
@@ -92,6 +92,7 @@
 #define NBD_ENOMEM     12
 #define NBD_EINVAL     22
 #define NBD_ENOSPC     28
+#define NBD_ESHUTDOWN  108
 
 static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
 {
@@ -104,9 +105,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
     return nbd_wr_syncv(ioc, &iov, 1, size, true);
 }
 
-static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size)
+static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer,
+                                 size_t size)
 {
-    struct iovec iov = { .iov_base = buffer, .iov_len = size };
+    struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
 
     return nbd_wr_syncv(ioc, &iov, 1, size, false);
 }
diff --git a/nbd/server.c b/nbd/server.c
index 80fbb4da1d..5b76261666 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1,4 +1,5 @@
 /*
+ *  Copyright (C) 2016 Red Hat, Inc.
  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
  *
  *  Network Block Device Server Side
@@ -38,6 +39,8 @@ static int system_errno_to_nbd_errno(int err)
     case EFBIG:
     case ENOSPC:
         return NBD_ENOSPC;
+    case ESHUTDOWN:
+        return NBD_ESHUTDOWN;
     case EINVAL:
     default:
         return NBD_EINVAL;
@@ -46,10 +49,10 @@ static int system_errno_to_nbd_errno(int err)
 
 /* Definitions for opaque data types */
 
-typedef struct NBDRequest NBDRequest;
+typedef struct NBDRequestData NBDRequestData;
 
-struct NBDRequest {
-    QSIMPLEQ_ENTRY(NBDRequest) entry;
+struct NBDRequestData {
+    QSIMPLEQ_ENTRY(NBDRequestData) entry;
     NBDClient *client;
     uint8_t *data;
     bool complete;
@@ -61,6 +64,7 @@ struct NBDExport {
 
     BlockBackend *blk;
     char *name;
+    char *description;
     off_t dev_offset;
     off_t size;
     uint16_t nbdflags;
@@ -69,6 +73,7 @@ struct NBDExport {
 
     AioContext *ctx;
 
+    BlockBackend *eject_notifier_blk;
     Notifier eject_notifier;
 };
 
@@ -78,6 +83,7 @@ struct NBDClient {
     int refcount;
     void (*close)(NBDClient *client);
 
+    bool no_zeroes;
     NBDExport *exp;
     QCryptoTLSCreds *tlscreds;
     char *tlsaclname;
@@ -128,7 +134,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
 
 }
 
-static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
+static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer,
+                                   size_t size)
 {
     ssize_t ret;
     guint watch;
@@ -192,12 +199,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
 
 */
 
-static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
+/* Send a reply header, including length, but no payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
+                                      uint32_t opt, uint32_t len)
 {
     uint64_t magic;
-    uint32_t len;
 
-    TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
+    TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
+          type, opt, len);
 
     magic = cpu_to_be64(NBD_REP_MAGIC);
     if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
@@ -214,7 +224,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
         LOG("write failed (rep type)");
         return -EINVAL;
     }
-    len = cpu_to_be32(0);
+    len = cpu_to_be32(len);
     if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
         LOG("write failed (rep data length)");
         return -EINVAL;
@@ -222,45 +232,82 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
     return 0;
 }
 
-static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
+/* Send a reply header with default 0 length.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
 {
-    uint64_t magic, name_len;
-    uint32_t opt, type, len;
+    return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
+}
 
-    TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
-    name_len = strlen(exp->name);
-    magic = cpu_to_be64(NBD_REP_MAGIC);
-    if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
-        LOG("write failed (magic)");
-        return -EINVAL;
-     }
-    opt = cpu_to_be32(NBD_OPT_LIST);
-    if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
-        LOG("write failed (opt)");
-        return -EINVAL;
+/* Send an error reply.
+ * Return -errno on error, 0 on success. */
+static int GCC_FMT_ATTR(4, 5)
+nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
+                           uint32_t opt, const char *fmt, ...)
+{
+    va_list va;
+    char *msg;
+    int ret;
+    size_t len;
+
+    va_start(va, fmt);
+    msg = g_strdup_vprintf(fmt, va);
+    va_end(va);
+    len = strlen(msg);
+    assert(len < 4096);
+    TRACE("sending error message \"%s\"", msg);
+    ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
+    if (ret < 0) {
+        goto out;
     }
-    type = cpu_to_be32(NBD_REP_SERVER);
-    if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
-        LOG("write failed (reply type)");
-        return -EINVAL;
+    if (nbd_negotiate_write(ioc, msg, len) != len) {
+        LOG("write failed (error message)");
+        ret = -EIO;
+    } else {
+        ret = 0;
     }
-    len = cpu_to_be32(name_len + sizeof(len));
-    if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
-        LOG("write failed (length)");
-        return -EINVAL;
+out:
+    g_free(msg);
+    return ret;
+}
+
+/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
+ * Return -errno on error, 0 on success. */
+static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
+{
+    size_t name_len, desc_len;
+    uint32_t len;
+    const char *name = exp->name ? exp->name : "";
+    const char *desc = exp->description ? exp->description : "";
+    int rc;
+
+    TRACE("Advertising export name '%s' description '%s'", name, desc);
+    name_len = strlen(name);
+    desc_len = strlen(desc);
+    len = name_len + desc_len + sizeof(len);
+    rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
+    if (rc < 0) {
+        return rc;
     }
+
     len = cpu_to_be32(name_len);
     if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
-        LOG("write failed (length)");
+        LOG("write failed (name length)");
         return -EINVAL;
     }
-    if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
-        LOG("write failed (buffer)");
+    if (nbd_negotiate_write(ioc, name, name_len) != name_len) {
+        LOG("write failed (name buffer)");
+        return -EINVAL;
+    }
+    if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) {
+        LOG("write failed (description buffer)");
         return -EINVAL;
     }
     return 0;
 }
 
+/* Process the NBD_OPT_LIST command, with a potential series of replies.
+ * Return -errno on error, 0 on success. */
 static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
 {
     NBDExport *exp;
@@ -269,8 +316,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
         if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
             return -EIO;
         }
-        return nbd_negotiate_send_rep(client->ioc,
-                                      NBD_REP_ERR_INVALID, NBD_OPT_LIST);
+        return nbd_negotiate_send_rep_err(client->ioc,
+                                          NBD_REP_ERR_INVALID, NBD_OPT_LIST,
+                                          "OPT_LIST should not have length");
     }
 
     /* For each export, send a NBD_REP_SERVER reply. */
@@ -317,7 +365,8 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
     return rc;
 }
 
-
+/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
+ * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
                                                  uint32_t length)
 {
@@ -331,7 +380,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
         if (nbd_negotiate_drop_sync(ioc, length) != length) {
             return NULL;
         }
-        nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
+        nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
+                                   "OPT_STARTTLS should not have length");
         return NULL;
     }
 
@@ -348,6 +398,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
         return NULL;
     }
 
+    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
     TRACE("Starting TLS handshake");
     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
     qio_channel_tls_handshake(tioc,
@@ -369,6 +420,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 }
 
 
+/* Process all NBD_OPT_* client option commands.
+ * Return -errno on error, 0 on success. */
 static int nbd_negotiate_options(NBDClient *client)
 {
     uint32_t flags;
@@ -400,6 +453,11 @@ static int nbd_negotiate_options(NBDClient *client)
         fixedNewstyle = true;
         flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
     }
+    if (flags & NBD_FLAG_C_NO_ZEROES) {
+        TRACE("Client supports no zeroes at handshake end");
+        client->no_zeroes = true;
+        flags &= ~NBD_FLAG_C_NO_ZEROES;
+    }
     if (flags != 0) {
         TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
         return -EIO;
@@ -459,16 +517,22 @@ static int nbd_negotiate_options(NBDClient *client)
                 return -EINVAL;
 
             default:
-                TRACE("Option 0x%" PRIx32 " not permitted before TLS",
-                      clientflags);
                 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
                     return -EIO;
                 }
-                ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
-                                             clientflags);
+                ret = nbd_negotiate_send_rep_err(client->ioc,
+                                                 NBD_REP_ERR_TLS_REQD,
+                                                 clientflags,
+                                                 "Option 0x%" PRIx32
+                                                 "not permitted before TLS",
+                                                 clientflags);
                 if (ret < 0) {
                     return ret;
                 }
+                /* Let the client keep trying, unless they asked to quit */
+                if (clientflags == NBD_OPT_ABORT) {
+                    return -EINVAL;
+                }
                 break;
             }
         } else if (fixedNewstyle) {
@@ -481,6 +545,10 @@ static int nbd_negotiate_options(NBDClient *client)
                 break;
 
             case NBD_OPT_ABORT:
+                /* NBD spec says we must try to reply before
+                 * disconnecting, but that we must also tolerate
+                 * guests that don't wait for our reply. */
+                nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
                 return -EINVAL;
 
             case NBD_OPT_EXPORT_NAME:
@@ -491,27 +559,30 @@ static int nbd_negotiate_options(NBDClient *client)
                     return -EIO;
                 }
                 if (client->tlscreds) {
-                    TRACE("TLS already enabled");
-                    ret = nbd_negotiate_send_rep(client->ioc,
-                                                 NBD_REP_ERR_INVALID,
-                                                 clientflags);
+                    ret = nbd_negotiate_send_rep_err(client->ioc,
+                                                     NBD_REP_ERR_INVALID,
+                                                     clientflags,
+                                                     "TLS already enabled");
                 } else {
-                    TRACE("TLS not configured");
-                    ret = nbd_negotiate_send_rep(client->ioc,
-                                                 NBD_REP_ERR_POLICY,
-                                                 clientflags);
+                    ret = nbd_negotiate_send_rep_err(client->ioc,
+                                                     NBD_REP_ERR_POLICY,
+                                                     clientflags,
+                                                     "TLS not configured");
                 }
                 if (ret < 0) {
                     return ret;
                 }
                 break;
             default:
-                TRACE("Unsupported option 0x%" PRIx32, clientflags);
                 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
                     return -EIO;
                 }
-                ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
-                                             clientflags);
+                ret = nbd_negotiate_send_rep_err(client->ioc,
+                                                 NBD_REP_ERR_UNSUP,
+                                                 clientflags,
+                                                 "Unsupported option 0x%"
+                                                 PRIx32,
+                                                 clientflags);
                 if (ret < 0) {
                     return ret;
                 }
@@ -545,8 +616,10 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
     char buf[8 + 8 + 8 + 128];
     int rc;
     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
-                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
+                              NBD_FLAG_SEND_WRITE_ZEROES);
     bool oldStyle;
+    size_t len;
 
     /* Old style negotiation header without options
         [ 0 ..   7]   passwd       ("NBDMAGIC")
@@ -563,7 +636,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
         ....options sent....
         [18 ..  25]   size
         [26 ..  27]   export flags
-        [28 .. 151]   reserved     (0)
+        [28 .. 151]   reserved     (0, omit if no_zeroes)
      */
 
     qio_channel_set_blocking(client->ioc, false, NULL);
@@ -582,7 +655,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
         stw_be_p(buf + 26, client->exp->nbdflags | myflags);
     } else {
         stq_be_p(buf + 8, NBD_OPTS_MAGIC);
-        stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
+        stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
     }
 
     if (oldStyle) {
@@ -609,8 +682,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
               client->exp->size, client->exp->nbdflags | myflags);
         stq_be_p(buf + 18, client->exp->size);
         stw_be_p(buf + 26, client->exp->nbdflags | myflags);
-        if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
-            sizeof(buf) - 18) {
+        len = client->no_zeroes ? 10 : sizeof(buf) - 18;
+        if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) {
             LOG("write failed");
             goto fail;
         }
@@ -622,7 +695,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
     return rc;
 }
 
-static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
+static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
 {
     uint8_t buf[NBD_REQUEST_SIZE];
     uint32_t magic;
@@ -640,21 +713,23 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
 
     /* Request
        [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
-       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
+       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
+       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
        [ 8 .. 15]   handle
        [16 .. 23]   from
        [24 .. 27]   len
      */
 
     magic = ldl_be_p(buf);
-    request->type   = ldl_be_p(buf + 4);
+    request->flags  = lduw_be_p(buf + 4);
+    request->type   = lduw_be_p(buf + 6);
     request->handle = ldq_be_p(buf + 8);
     request->from   = ldq_be_p(buf + 16);
     request->len    = ldl_be_p(buf + 24);
 
-    TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
-          ", from = %" PRIu64 " , len = %" PRIu32 " }",
-          magic, request->type, request->from, request->len);
+    TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
+          ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
+          magic, request->flags, request->type, request->from, request->len);
 
     if (magic != NBD_REQUEST_MAGIC) {
         LOG("invalid magic (got 0x%" PRIx32 ")", magic);
@@ -663,7 +738,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
     return 0;
 }
 
-static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
+static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
 {
     uint8_t buf[NBD_REPLY_SIZE];
     ssize_t ret;
@@ -745,21 +820,21 @@ static void client_close(NBDClient *client)
     }
 }
 
-static NBDRequest *nbd_request_get(NBDClient *client)
+static NBDRequestData *nbd_request_get(NBDClient *client)
 {
-    NBDRequest *req;
+    NBDRequestData *req;
 
     assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
     client->nb_requests++;
     nbd_update_can_read(client);
 
-    req = g_new0(NBDRequest, 1);
+    req = g_new0(NBDRequestData, 1);
     nbd_client_get(client);
     req->client = client;
     return req;
 }
 
-static void nbd_request_put(NBDRequest *req)
+static void nbd_request_put(NBDRequestData *req)
 {
     NBDClient *client = req->client;
 
@@ -807,11 +882,18 @@ static void nbd_eject_notifier(Notifier *n, void *data)
     nbd_export_close(exp);
 }
 
-NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
+NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
                           uint16_t nbdflags, void (*close)(NBDExport *),
+                          bool writethrough, BlockBackend *on_eject_blk,
                           Error **errp)
 {
+    BlockBackend *blk;
     NBDExport *exp = g_malloc0(sizeof(NBDExport));
+
+    blk = blk_new();
+    blk_insert_bs(blk, bs);
+    blk_set_enable_write_cache(blk, !writethrough);
+
     exp->refcount = 1;
     QTAILQ_INIT(&exp->clients);
     exp->blk = blk;
@@ -827,11 +909,14 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
 
     exp->close = close;
     exp->ctx = blk_get_aio_context(blk);
-    blk_ref(blk);
     blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
 
-    exp->eject_notifier.notify = nbd_eject_notifier;
-    blk_add_remove_bs_notifier(blk, &exp->eject_notifier);
+    if (on_eject_blk) {
+        blk_ref(on_eject_blk);
+        exp->eject_notifier_blk = on_eject_blk;
+        exp->eject_notifier.notify = nbd_eject_notifier;
+        blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
+    }
 
     /*
      * NBD exports are used for non-shared storage migration.  Make sure
@@ -844,6 +929,7 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
     return exp;
 
 fail:
+    blk_unref(blk);
     g_free(exp);
     return NULL;
 }
@@ -881,6 +967,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name)
     nbd_export_put(exp);
 }
 
+void nbd_export_set_description(NBDExport *exp, const char *description)
+{
+    g_free(exp->description);
+    exp->description = g_strdup(description);
+}
+
 void nbd_export_close(NBDExport *exp)
 {
     NBDClient *client, *next;
@@ -890,6 +982,7 @@ void nbd_export_close(NBDExport *exp)
         client_close(client);
     }
     nbd_export_set_name(exp, NULL);
+    nbd_export_set_description(exp, NULL);
     nbd_export_put(exp);
 }
 
@@ -908,13 +1001,17 @@ void nbd_export_put(NBDExport *exp)
 
     if (--exp->refcount == 0) {
         assert(exp->name == NULL);
+        assert(exp->description == NULL);
 
         if (exp->close) {
             exp->close(exp);
         }
 
         if (exp->blk) {
-            notifier_remove(&exp->eject_notifier);
+            if (exp->eject_notifier_blk) {
+                notifier_remove(&exp->eject_notifier);
+                blk_unref(exp->eject_notifier_blk);
+            }
             blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
                                             blk_aio_detach, exp);
             blk_unref(exp->blk);
@@ -939,7 +1036,7 @@ void nbd_export_close_all(void)
     }
 }
 
-static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
+static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
                                  int len)
 {
     NBDClient *client = req->client;
@@ -975,11 +1072,10 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
  * and any other negative value to report an error to the client
  * (although the caller may still need to disconnect after reporting
  * the error).  */
-static ssize_t nbd_co_receive_request(NBDRequest *req,
-                                      struct nbd_request *request)
+static ssize_t nbd_co_receive_request(NBDRequestData *req,
+                                      NBDRequest *request)
 {
     NBDClient *client = req->client;
-    uint32_t command;
     ssize_t rc;
 
     g_assert(qemu_in_coroutine());
@@ -996,13 +1092,12 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
 
     TRACE("Decoding type");
 
-    command = request->type & NBD_CMD_MASK_COMMAND;
-    if (command != NBD_CMD_WRITE) {
+    if (request->type != NBD_CMD_WRITE) {
         /* No payload, we are ready to read the next request.  */
         req->complete = true;
     }
 
-    if (command == NBD_CMD_DISC) {
+    if (request->type == NBD_CMD_DISC) {
         /* Special case: we're going to disconnect without a reply,
          * whether or not flags, from, or len are bogus */
         TRACE("Request type is DISCONNECT");
@@ -1019,7 +1114,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
         goto out;
     }
 
-    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
+    if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
         if (request->len > NBD_MAX_BUFFER_SIZE) {
             LOG("len (%" PRIu32" ) is larger than max len (%u)",
                 request->len, NBD_MAX_BUFFER_SIZE);
@@ -1033,7 +1128,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
             goto out;
         }
     }
-    if (command == NBD_CMD_WRITE) {
+    if (request->type == NBD_CMD_WRITE) {
         TRACE("Reading %" PRIu32 " byte(s)", request->len);
 
         if (read_sync(client->ioc, req->data, request->len) != request->len) {
@@ -1049,12 +1144,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
         LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
             ", Size: %" PRIu64, request->from, request->len,
             (uint64_t)client->exp->size);
-        rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
+        rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
         goto out;
     }
-    if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
-        LOG("unsupported flags (got 0x%x)",
-            request->type & ~NBD_CMD_MASK_COMMAND);
+    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
+        LOG("unsupported flags (got 0x%x)", request->flags);
+        rc = -EINVAL;
+        goto out;
+    }
+    if (request->type != NBD_CMD_WRITE_ZEROES &&
+        (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
+        LOG("unexpected flags (got 0x%x)", request->flags);
         rc = -EINVAL;
         goto out;
     }
@@ -1072,11 +1172,10 @@ static void nbd_trip(void *opaque)
 {
     NBDClient *client = opaque;
     NBDExport *exp = client->exp;
-    NBDRequest *req;
-    struct nbd_request request;
-    struct nbd_reply reply;
+    NBDRequestData *req;
+    NBDRequest request;
+    NBDReply reply;
     ssize_t ret;
-    uint32_t command;
     int flags;
 
     TRACE("Reading request.");
@@ -1100,7 +1199,6 @@ static void nbd_trip(void *opaque)
         reply.error = -ret;
         goto error_reply;
     }
-    command = request.type & NBD_CMD_MASK_COMMAND;
 
     if (client->closing) {
         /*
@@ -1110,11 +1208,12 @@ static void nbd_trip(void *opaque)
         goto done;
     }
 
-    switch (command) {
+    switch (request.type) {
     case NBD_CMD_READ:
         TRACE("Request type is READ");
 
-        if (request.type & NBD_CMD_FLAG_FUA) {
+        /* XXX: NBD Protocol only documents use of FUA with WRITE */
+        if (request.flags & NBD_CMD_FLAG_FUA) {
             ret = blk_co_flush(exp->blk);
             if (ret < 0) {
                 LOG("flush failed");
@@ -1147,7 +1246,7 @@ static void nbd_trip(void *opaque)
         TRACE("Writing to device");
 
         flags = 0;
-        if (request.type & NBD_CMD_FLAG_FUA) {
+        if (request.flags & NBD_CMD_FLAG_FUA) {
             flags |= BDRV_REQ_FUA;
         }
         ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
@@ -1163,6 +1262,37 @@ static void nbd_trip(void *opaque)
         }
         break;
 
+    case NBD_CMD_WRITE_ZEROES:
+        TRACE("Request type is WRITE_ZEROES");
+
+        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+            TRACE("Server is read-only, return error");
+            reply.error = EROFS;
+            goto error_reply;
+        }
+
+        TRACE("Writing to device");
+
+        flags = 0;
+        if (request.flags & NBD_CMD_FLAG_FUA) {
+            flags |= BDRV_REQ_FUA;
+        }
+        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+        ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
+                                request.len, flags);
+        if (ret < 0) {
+            LOG("writing to file failed");
+            reply.error = -ret;
+            goto error_reply;
+        }
+
+        if (nbd_co_send_reply(req, &reply, 0) < 0) {
+            goto out;
+        }
+        break;
+
     case NBD_CMD_DISC:
         /* unreachable, thanks to special case in nbd_co_receive_request() */
         abort();
diff --git a/net/Makefile.objs b/net/Makefile.objs
index b7c22fddbf..2a80df5fa7 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -16,3 +16,6 @@ common-obj-$(CONFIG_NETMAP) += netmap.o
 common-obj-y += filter.o
 common-obj-y += filter-buffer.o
 common-obj-y += filter-mirror.o
+common-obj-y += colo-compare.o
+common-obj-y += colo.o
+common-obj-y += filter-rewriter.o
diff --git a/net/colo-compare.c b/net/colo-compare.c
new file mode 100644
index 0000000000..9bfc736f55
--- /dev/null
+++ b/net/colo-compare.c
@@ -0,0 +1,737 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/error.h"
+#include "net/net.h"
+#include "net/eth.h"
+#include "qom/object_interfaces.h"
+#include "qemu/iov.h"
+#include "qom/object.h"
+#include "qemu/typedefs.h"
+#include "net/queue.h"
+#include "sysemu/char.h"
+#include "qemu/sockets.h"
+#include "qapi-visit.h"
+#include "net/colo.h"
+
+#define TYPE_COLO_COMPARE "colo-compare"
+#define COLO_COMPARE(obj) \
+    OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE)
+
+#define COMPARE_READ_LEN_MAX NET_BUFSIZE
+#define MAX_QUEUE_SIZE 1024
+
+/* TODO: Should be configurable */
+#define REGULAR_PACKET_CHECK_MS 3000
+
+/*
+  + CompareState ++
+  |               |
+  +---------------+   +---------------+         +---------------+
+  |conn list      +--->conn           +--------->conn           |
+  +---------------+   +---------------+         +---------------+
+  |               |     |           |             |          |
+  +---------------+ +---v----+  +---v----+    +---v----+ +---v----+
+                    |primary |  |secondary    |primary | |secondary
+                    |packet  |  |packet  +    |packet  | |packet  +
+                    +--------+  +--------+    +--------+ +--------+
+                        |           |             |          |
+                    +---v----+  +---v----+    +---v----+ +---v----+
+                    |primary |  |secondary    |primary | |secondary
+                    |packet  |  |packet  +    |packet  | |packet  +
+                    +--------+  +--------+    +--------+ +--------+
+                        |           |             |          |
+                    +---v----+  +---v----+    +---v----+ +---v----+
+                    |primary |  |secondary    |primary | |secondary
+                    |packet  |  |packet  +    |packet  | |packet  +
+                    +--------+  +--------+    +--------+ +--------+
+*/
+typedef struct CompareState {
+    Object parent;
+
+    char *pri_indev;
+    char *sec_indev;
+    char *outdev;
+    CharBackend chr_pri_in;
+    CharBackend chr_sec_in;
+    CharBackend chr_out;
+    SocketReadState pri_rs;
+    SocketReadState sec_rs;
+
+    /* connection list: the connections belonged to this NIC could be found
+     * in this list.
+     * element type: Connection
+     */
+    GQueue conn_list;
+    /* hashtable to save connection */
+    GHashTable *connection_track_table;
+    /* compare thread, a thread for each NIC */
+    QemuThread thread;
+    /* Timer used on the primary to find packets that are never matched */
+    QEMUTimer *timer;
+    QemuMutex timer_check_lock;
+} CompareState;
+
+typedef struct CompareClass {
+    ObjectClass parent_class;
+} CompareClass;
+
+enum {
+    PRIMARY_IN = 0,
+    SECONDARY_IN,
+};
+
+static int compare_chr_send(CharBackend *out,
+                            const uint8_t *buf,
+                            uint32_t size);
+
+/*
+ * Return 0 on success, if return -1 means the pkt
+ * is unsupported(arp and ipv6) and will be sent later
+ */
+static int packet_enqueue(CompareState *s, int mode)
+{
+    ConnectionKey key;
+    Packet *pkt = NULL;
+    Connection *conn;
+
+    if (mode == PRIMARY_IN) {
+        pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
+    } else {
+        pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
+    }
+
+    if (parse_packet_early(pkt)) {
+        packet_destroy(pkt, NULL);
+        pkt = NULL;
+        return -1;
+    }
+    fill_connection_key(pkt, &key);
+
+    conn = connection_get(s->connection_track_table,
+                          &key,
+                          &s->conn_list);
+
+    if (!conn->processing) {
+        g_queue_push_tail(&s->conn_list, conn);
+        conn->processing = true;
+    }
+
+    if (mode == PRIMARY_IN) {
+        if (g_queue_get_length(&conn->primary_list) <=
+                               MAX_QUEUE_SIZE) {
+            g_queue_push_tail(&conn->primary_list, pkt);
+        } else {
+            error_report("colo compare primary queue size too big,"
+                         "drop packet");
+        }
+    } else {
+        if (g_queue_get_length(&conn->secondary_list) <=
+                               MAX_QUEUE_SIZE) {
+            g_queue_push_tail(&conn->secondary_list, pkt);
+        } else {
+            error_report("colo compare secondary queue size too big,"
+                         "drop packet");
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * The IP packets sent by primary and secondary
+ * will be compared in here
+ * TODO support ip fragment, Out-Of-Order
+ * return:    0  means packet same
+ *            > 0 || < 0 means packet different
+ */
+static int colo_packet_compare(Packet *ppkt, Packet *spkt)
+{
+    trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
+                               inet_ntoa(ppkt->ip->ip_dst), spkt->size,
+                               inet_ntoa(spkt->ip->ip_src),
+                               inet_ntoa(spkt->ip->ip_dst));
+
+    if (ppkt->size == spkt->size) {
+        return memcmp(ppkt->data, spkt->data, spkt->size);
+    } else {
+        return -1;
+    }
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare tcp packet
+ * compare_tcp copied from Dr. David Alan Gilbert's branch
+ */
+static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
+{
+    struct tcphdr *ptcp, *stcp;
+    int res;
+
+    trace_colo_compare_main("compare tcp");
+    if (ppkt->size != spkt->size) {
+        if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+            trace_colo_compare_main("pkt size not same");
+        }
+        return -1;
+    }
+
+    ptcp = (struct tcphdr *)ppkt->transport_header;
+    stcp = (struct tcphdr *)spkt->transport_header;
+
+    /*
+     * The 'identification' field in the IP header is *very* random
+     * it almost never matches.  Fudge this by ignoring differences in
+     * unfragmented packets; they'll normally sort themselves out if different
+     * anyway, and it should recover at the TCP level.
+     * An alternative would be to get both the primary and secondary to rewrite
+     * somehow; but that would need some sync traffic to sync the state
+     */
+    if (ntohs(ppkt->ip->ip_off) & IP_DF) {
+        spkt->ip->ip_id = ppkt->ip->ip_id;
+        /* and the sum will be different if the IDs were different */
+        spkt->ip->ip_sum = ppkt->ip->ip_sum;
+    }
+
+    res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN,
+                (spkt->size - ETH_HLEN));
+
+    if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+        trace_colo_compare_pkt_info_src(inet_ntoa(ppkt->ip->ip_src),
+                                        ntohl(stcp->th_seq),
+                                        ntohl(stcp->th_ack),
+                                        res, stcp->th_flags,
+                                        spkt->size);
+
+        trace_colo_compare_pkt_info_dst(inet_ntoa(ppkt->ip->ip_dst),
+                                        ntohl(ptcp->th_seq),
+                                        ntohl(ptcp->th_ack),
+                                        res, ptcp->th_flags,
+                                        ppkt->size);
+
+        qemu_hexdump((char *)ppkt->data, stderr,
+                     "colo-compare ppkt", ppkt->size);
+        qemu_hexdump((char *)spkt->data, stderr,
+                     "colo-compare spkt", spkt->size);
+    }
+
+    return res;
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare udp packet
+ */
+static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
+{
+    int ret;
+
+    trace_colo_compare_main("compare udp");
+    ret = colo_packet_compare(ppkt, spkt);
+
+    if (ret) {
+        trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
+        qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
+        trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
+        qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
+    }
+
+    return ret;
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare icmp packet
+ */
+static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
+{
+    int network_length;
+
+    trace_colo_compare_main("compare icmp");
+    network_length = ppkt->ip->ip_hl * 4;
+    if (ppkt->size != spkt->size ||
+        ppkt->size < network_length + ETH_HLEN) {
+        return -1;
+    }
+
+    if (colo_packet_compare(ppkt, spkt)) {
+        trace_colo_compare_icmp_miscompare("primary pkt size",
+                                           ppkt->size);
+        qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
+                     ppkt->size);
+        trace_colo_compare_icmp_miscompare("Secondary pkt size",
+                                           spkt->size);
+        qemu_hexdump((char *)spkt->data, stderr, "colo-compare",
+                     spkt->size);
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare other packet
+ */
+static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
+{
+    trace_colo_compare_main("compare other");
+    trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
+                               inet_ntoa(ppkt->ip->ip_dst), spkt->size,
+                               inet_ntoa(spkt->ip->ip_src),
+                               inet_ntoa(spkt->ip->ip_dst));
+    return colo_packet_compare(ppkt, spkt);
+}
+
+static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
+{
+    int64_t now = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+
+    if ((now - pkt->creation_ms) > (*check_time)) {
+        trace_colo_old_packet_check_found(pkt->creation_ms);
+        return 0;
+    } else {
+        return 1;
+    }
+}
+
+static void colo_old_packet_check_one_conn(void *opaque,
+                                           void *user_data)
+{
+    Connection *conn = opaque;
+    GList *result = NULL;
+    int64_t check_time = REGULAR_PACKET_CHECK_MS;
+
+    result = g_queue_find_custom(&conn->primary_list,
+                                 &check_time,
+                                 (GCompareFunc)colo_old_packet_check_one);
+
+    if (result) {
+        /* do checkpoint will flush old packet */
+        /* TODO: colo_notify_checkpoint();*/
+    }
+}
+
+/*
+ * Look for old packets that the secondary hasn't matched,
+ * if we have some then we have to checkpoint to wake
+ * the secondary up.
+ */
+static void colo_old_packet_check(void *opaque)
+{
+    CompareState *s = opaque;
+
+    g_queue_foreach(&s->conn_list, colo_old_packet_check_one_conn, NULL);
+}
+
+/*
+ * Called from the compare thread on the primary
+ * for compare connection
+ */
+static void colo_compare_connection(void *opaque, void *user_data)
+{
+    CompareState *s = user_data;
+    Connection *conn = opaque;
+    Packet *pkt = NULL;
+    GList *result = NULL;
+    int ret;
+
+    while (!g_queue_is_empty(&conn->primary_list) &&
+           !g_queue_is_empty(&conn->secondary_list)) {
+        qemu_mutex_lock(&s->timer_check_lock);
+        pkt = g_queue_pop_tail(&conn->primary_list);
+        qemu_mutex_unlock(&s->timer_check_lock);
+        switch (conn->ip_proto) {
+        case IPPROTO_TCP:
+            result = g_queue_find_custom(&conn->secondary_list,
+                     pkt, (GCompareFunc)colo_packet_compare_tcp);
+            break;
+        case IPPROTO_UDP:
+            result = g_queue_find_custom(&conn->secondary_list,
+                     pkt, (GCompareFunc)colo_packet_compare_udp);
+            break;
+        case IPPROTO_ICMP:
+            result = g_queue_find_custom(&conn->secondary_list,
+                     pkt, (GCompareFunc)colo_packet_compare_icmp);
+            break;
+        default:
+            result = g_queue_find_custom(&conn->secondary_list,
+                     pkt, (GCompareFunc)colo_packet_compare_other);
+            break;
+        }
+
+        if (result) {
+            ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
+            if (ret < 0) {
+                error_report("colo_send_primary_packet failed");
+            }
+            trace_colo_compare_main("packet same and release packet");
+            g_queue_remove(&conn->secondary_list, result->data);
+            packet_destroy(pkt, NULL);
+        } else {
+            /*
+             * If one packet arrive late, the secondary_list or
+             * primary_list will be empty, so we can't compare it
+             * until next comparison.
+             */
+            trace_colo_compare_main("packet different");
+            qemu_mutex_lock(&s->timer_check_lock);
+            g_queue_push_tail(&conn->primary_list, pkt);
+            qemu_mutex_unlock(&s->timer_check_lock);
+            /* TODO: colo_notify_checkpoint();*/
+            break;
+        }
+    }
+}
+
+static int compare_chr_send(CharBackend *out,
+                            const uint8_t *buf,
+                            uint32_t size)
+{
+    int ret = 0;
+    uint32_t len = htonl(size);
+
+    if (!size) {
+        return 0;
+    }
+
+    ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
+    if (ret != sizeof(len)) {
+        goto err;
+    }
+
+    ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
+    if (ret != size) {
+        goto err;
+    }
+
+    return 0;
+
+err:
+    return ret < 0 ? ret : -EIO;
+}
+
+static int compare_chr_can_read(void *opaque)
+{
+    return COMPARE_READ_LEN_MAX;
+}
+
+/*
+ * Called from the main thread on the primary for packets
+ * arriving over the socket from the primary.
+ */
+static void compare_pri_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+    CompareState *s = COLO_COMPARE(opaque);
+    int ret;
+
+    ret = net_fill_rstate(&s->pri_rs, buf, size);
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL,
+                                 NULL, NULL, true);
+        error_report("colo-compare primary_in error");
+    }
+}
+
+/*
+ * Called from the main thread on the primary for packets
+ * arriving over the socket from the secondary.
+ */
+static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
+{
+    CompareState *s = COLO_COMPARE(opaque);
+    int ret;
+
+    ret = net_fill_rstate(&s->sec_rs, buf, size);
+    if (ret == -1) {
+        qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL,
+                                 NULL, NULL, true);
+        error_report("colo-compare secondary_in error");
+    }
+}
+
+static void *colo_compare_thread(void *opaque)
+{
+    GMainContext *worker_context;
+    GMainLoop *compare_loop;
+    CompareState *s = opaque;
+
+    worker_context = g_main_context_new();
+
+    qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read,
+                             compare_pri_chr_in, NULL, s, worker_context, true);
+    qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
+                             compare_sec_chr_in, NULL, s, worker_context, true);
+
+    compare_loop = g_main_loop_new(worker_context, FALSE);
+
+    g_main_loop_run(compare_loop);
+
+    g_main_loop_unref(compare_loop);
+    g_main_context_unref(worker_context);
+    return NULL;
+}
+
+static char *compare_get_pri_indev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->pri_indev);
+}
+
+static void compare_set_pri_indev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->pri_indev);
+    s->pri_indev = g_strdup(value);
+}
+
+static char *compare_get_sec_indev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->sec_indev);
+}
+
+static void compare_set_sec_indev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->sec_indev);
+    s->sec_indev = g_strdup(value);
+}
+
+static char *compare_get_outdev(Object *obj, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    return g_strdup(s->outdev);
+}
+
+static void compare_set_outdev(Object *obj, const char *value, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    g_free(s->outdev);
+    s->outdev = g_strdup(value);
+}
+
+static void compare_pri_rs_finalize(SocketReadState *pri_rs)
+{
+    CompareState *s = container_of(pri_rs, CompareState, pri_rs);
+
+    if (packet_enqueue(s, PRIMARY_IN)) {
+        trace_colo_compare_main("primary: unsupported packet in");
+        compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
+    } else {
+        /* compare connection */
+        g_queue_foreach(&s->conn_list, colo_compare_connection, s);
+    }
+}
+
+static void compare_sec_rs_finalize(SocketReadState *sec_rs)
+{
+    CompareState *s = container_of(sec_rs, CompareState, sec_rs);
+
+    if (packet_enqueue(s, SECONDARY_IN)) {
+        trace_colo_compare_main("secondary: unsupported packet in");
+    } else {
+        /* compare connection */
+        g_queue_foreach(&s->conn_list, colo_compare_connection, s);
+    }
+}
+
+
+/*
+ * Return 0 is success.
+ * Return 1 is failed.
+ */
+static int find_and_check_chardev(CharDriverState **chr,
+                                  char *chr_name,
+                                  Error **errp)
+{
+    *chr = qemu_chr_find(chr_name);
+    if (*chr == NULL) {
+        error_setg(errp, "Device '%s' not found",
+                   chr_name);
+        return 1;
+    }
+
+    if (!qemu_chr_has_feature(*chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
+        error_setg(errp, "chardev \"%s\" is not reconnectable",
+                   chr_name);
+        return 1;
+    }
+
+    return 0;
+}
+
+/*
+ * Check old packet regularly so it can watch for any packets
+ * that the secondary hasn't produced equivalents of.
+ */
+static void check_old_packet_regular(void *opaque)
+{
+    CompareState *s = opaque;
+
+    timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+              REGULAR_PACKET_CHECK_MS);
+    /* if have old packet we will notify checkpoint */
+    /*
+     * TODO: Make timer handler run in compare thread
+     * like qemu_chr_add_handlers_full.
+     */
+    qemu_mutex_lock(&s->timer_check_lock);
+    colo_old_packet_check(s);
+    qemu_mutex_unlock(&s->timer_check_lock);
+}
+
+/*
+ * Called from the main thread on the primary
+ * to setup colo-compare.
+ */
+static void colo_compare_complete(UserCreatable *uc, Error **errp)
+{
+    CompareState *s = COLO_COMPARE(uc);
+    CharDriverState *chr;
+    char thread_name[64];
+    static int compare_id;
+
+    if (!s->pri_indev || !s->sec_indev || !s->outdev) {
+        error_setg(errp, "colo compare needs 'primary_in' ,"
+                   "'secondary_in','outdev' property set");
+        return;
+    } else if (!strcmp(s->pri_indev, s->outdev) ||
+               !strcmp(s->sec_indev, s->outdev) ||
+               !strcmp(s->pri_indev, s->sec_indev)) {
+        error_setg(errp, "'indev' and 'outdev' could not be same "
+                   "for compare module");
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
+        !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->sec_indev, errp) ||
+        !qemu_chr_fe_init(&s->chr_sec_in, chr, errp)) {
+        return;
+    }
+
+    if (find_and_check_chardev(&chr, s->outdev, errp) ||
+        !qemu_chr_fe_init(&s->chr_out, chr, errp)) {
+        return;
+    }
+
+    net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
+    net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
+
+    g_queue_init(&s->conn_list);
+    qemu_mutex_init(&s->timer_check_lock);
+
+    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+                                                      connection_key_equal,
+                                                      g_free,
+                                                      connection_destroy);
+
+    sprintf(thread_name, "colo-compare %d", compare_id);
+    qemu_thread_create(&s->thread, thread_name,
+                       colo_compare_thread, s,
+                       QEMU_THREAD_JOINABLE);
+    compare_id++;
+
+    /* A regular timer to kick any packets that the secondary doesn't match */
+    s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */
+                            check_old_packet_regular, s);
+    timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+                        REGULAR_PACKET_CHECK_MS);
+
+    return;
+}
+
+static void colo_compare_class_init(ObjectClass *oc, void *data)
+{
+    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
+
+    ucc->complete = colo_compare_complete;
+}
+
+static void colo_compare_init(Object *obj)
+{
+    object_property_add_str(obj, "primary_in",
+                            compare_get_pri_indev, compare_set_pri_indev,
+                            NULL);
+    object_property_add_str(obj, "secondary_in",
+                            compare_get_sec_indev, compare_set_sec_indev,
+                            NULL);
+    object_property_add_str(obj, "outdev",
+                            compare_get_outdev, compare_set_outdev,
+                            NULL);
+}
+
+static void colo_compare_finalize(Object *obj)
+{
+    CompareState *s = COLO_COMPARE(obj);
+
+    qemu_chr_fe_deinit(&s->chr_pri_in);
+    qemu_chr_fe_deinit(&s->chr_sec_in);
+    qemu_chr_fe_deinit(&s->chr_out);
+
+    g_queue_free(&s->conn_list);
+
+    if (qemu_thread_is_self(&s->thread)) {
+        /* compare connection */
+        g_queue_foreach(&s->conn_list, colo_compare_connection, s);
+        qemu_thread_join(&s->thread);
+    }
+
+    if (s->timer) {
+        timer_del(s->timer);
+    }
+
+    qemu_mutex_destroy(&s->timer_check_lock);
+
+    g_free(s->pri_indev);
+    g_free(s->sec_indev);
+    g_free(s->outdev);
+}
+
+static const TypeInfo colo_compare_info = {
+    .name = TYPE_COLO_COMPARE,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(CompareState),
+    .instance_init = colo_compare_init,
+    .instance_finalize = colo_compare_finalize,
+    .class_size = sizeof(CompareClass),
+    .class_init = colo_compare_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_USER_CREATABLE },
+        { }
+    }
+};
+
+static void register_types(void)
+{
+    type_register_static(&colo_compare_info);
+}
+
+type_init(register_types);
diff --git a/net/colo.c b/net/colo.c
new file mode 100644
index 0000000000..6a6eacd2dc
--- /dev/null
+++ b/net/colo.c
@@ -0,0 +1,211 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "net/colo.h"
+
+uint32_t connection_key_hash(const void *opaque)
+{
+    const ConnectionKey *key = opaque;
+    uint32_t a, b, c;
+
+    /* Jenkins hash */
+    a = b = c = JHASH_INITVAL + sizeof(*key);
+    a += key->src.s_addr;
+    b += key->dst.s_addr;
+    c += (key->src_port | key->dst_port << 16);
+    __jhash_mix(a, b, c);
+
+    a += key->ip_proto;
+    __jhash_final(a, b, c);
+
+    return c;
+}
+
+int connection_key_equal(const void *key1, const void *key2)
+{
+    return memcmp(key1, key2, sizeof(ConnectionKey)) == 0;
+}
+
+int parse_packet_early(Packet *pkt)
+{
+    int network_length;
+    static const uint8_t vlan[] = {0x81, 0x00};
+    uint8_t *data = pkt->data;
+    uint16_t l3_proto;
+    ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
+
+    if (pkt->size < ETH_HLEN) {
+        trace_colo_proxy_main("pkt->size < ETH_HLEN");
+        return 1;
+    }
+
+    /*
+     * TODO: support vlan.
+     */
+    if (!memcmp(&data[12], vlan, sizeof(vlan))) {
+        trace_colo_proxy_main("COLO-proxy don't support vlan");
+        return 1;
+    }
+
+    pkt->network_header = data + l2hdr_len;
+
+    const struct iovec l2vec = {
+        .iov_base = (void *) data,
+        .iov_len = l2hdr_len
+    };
+    l3_proto = eth_get_l3_proto(&l2vec, 1, l2hdr_len);
+
+    if (l3_proto != ETH_P_IP) {
+        return 1;
+    }
+
+    network_length = pkt->ip->ip_hl * 4;
+    if (pkt->size < l2hdr_len + network_length) {
+        trace_colo_proxy_main("pkt->size < network_header + network_length");
+        return 1;
+    }
+    pkt->transport_header = pkt->network_header + network_length;
+
+    return 0;
+}
+
+void fill_connection_key(Packet *pkt, ConnectionKey *key)
+{
+    uint32_t tmp_ports;
+
+    memset(key, 0, sizeof(*key));
+    key->ip_proto = pkt->ip->ip_p;
+
+    switch (key->ip_proto) {
+    case IPPROTO_TCP:
+    case IPPROTO_UDP:
+    case IPPROTO_DCCP:
+    case IPPROTO_ESP:
+    case IPPROTO_SCTP:
+    case IPPROTO_UDPLITE:
+        tmp_ports = *(uint32_t *)(pkt->transport_header);
+        key->src = pkt->ip->ip_src;
+        key->dst = pkt->ip->ip_dst;
+        key->src_port = ntohs(tmp_ports & 0xffff);
+        key->dst_port = ntohs(tmp_ports >> 16);
+        break;
+    case IPPROTO_AH:
+        tmp_ports = *(uint32_t *)(pkt->transport_header + 4);
+        key->src = pkt->ip->ip_src;
+        key->dst = pkt->ip->ip_dst;
+        key->src_port = ntohs(tmp_ports & 0xffff);
+        key->dst_port = ntohs(tmp_ports >> 16);
+        break;
+    default:
+        break;
+    }
+}
+
+void reverse_connection_key(ConnectionKey *key)
+{
+    struct in_addr tmp_ip;
+    uint16_t tmp_port;
+
+    tmp_ip = key->src;
+    key->src = key->dst;
+    key->dst = tmp_ip;
+
+    tmp_port = key->src_port;
+    key->src_port = key->dst_port;
+    key->dst_port = tmp_port;
+}
+
+Connection *connection_new(ConnectionKey *key)
+{
+    Connection *conn = g_slice_new(Connection);
+
+    conn->ip_proto = key->ip_proto;
+    conn->processing = false;
+    conn->offset = 0;
+    conn->syn_flag = 0;
+    g_queue_init(&conn->primary_list);
+    g_queue_init(&conn->secondary_list);
+
+    return conn;
+}
+
+void connection_destroy(void *opaque)
+{
+    Connection *conn = opaque;
+
+    g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
+    g_queue_free(&conn->primary_list);
+    g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
+    g_queue_free(&conn->secondary_list);
+    g_slice_free(Connection, conn);
+}
+
+Packet *packet_new(const void *data, int size)
+{
+    Packet *pkt = g_slice_new(Packet);
+
+    pkt->data = g_memdup(data, size);
+    pkt->size = size;
+    pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+
+    return pkt;
+}
+
+void packet_destroy(void *opaque, void *user_data)
+{
+    Packet *pkt = opaque;
+
+    g_free(pkt->data);
+    g_slice_free(Packet, pkt);
+}
+
+/*
+ * Clear hashtable, stop this hash growing really huge
+ */
+void connection_hashtable_reset(GHashTable *connection_track_table)
+{
+    g_hash_table_remove_all(connection_track_table);
+}
+
+/* if not found, create a new connection and add to hash table */
+Connection *connection_get(GHashTable *connection_track_table,
+                           ConnectionKey *key,
+                           GQueue *conn_list)
+{
+    Connection *conn = g_hash_table_lookup(connection_track_table, key);
+
+    if (conn == NULL) {
+        ConnectionKey *new_key = g_memdup(key, sizeof(*key));
+
+        conn = connection_new(key);
+
+        if (g_hash_table_size(connection_track_table) > HASHTABLE_MAX_SIZE) {
+            trace_colo_proxy_main("colo proxy connection hashtable full,"
+                                  " clear it");
+            connection_hashtable_reset(connection_track_table);
+            /*
+             * clear the conn_list
+             */
+            while (!g_queue_is_empty(conn_list)) {
+                connection_destroy(g_queue_pop_head(conn_list));
+            }
+        }
+
+        g_hash_table_insert(connection_track_table, new_key, conn);
+    }
+
+    return conn;
+}
diff --git a/net/colo.h b/net/colo.h
new file mode 100644
index 0000000000..7c524f3a1c
--- /dev/null
+++ b/net/colo.h
@@ -0,0 +1,88 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_COLO_PROXY_H
+#define QEMU_COLO_PROXY_H
+
+#include "slirp/slirp.h"
+#include "qemu/jhash.h"
+#include "qemu/timer.h"
+
+#define HASHTABLE_MAX_SIZE 16384
+
+#ifndef IPPROTO_DCCP
+#define IPPROTO_DCCP 33
+#endif
+
+#ifndef IPPROTO_SCTP
+#define IPPROTO_SCTP 132
+#endif
+
+#ifndef IPPROTO_UDPLITE
+#define IPPROTO_UDPLITE 136
+#endif
+
+typedef struct Packet {
+    void *data;
+    union {
+        uint8_t *network_header;
+        struct ip *ip;
+    };
+    uint8_t *transport_header;
+    int size;
+    /* Time of packet creation, in wall clock ms */
+    int64_t creation_ms;
+} Packet;
+
+typedef struct ConnectionKey {
+    /* (src, dst) must be grouped, in the same way than in IP header */
+    struct in_addr src;
+    struct in_addr dst;
+    uint16_t src_port;
+    uint16_t dst_port;
+    uint8_t ip_proto;
+} QEMU_PACKED ConnectionKey;
+
+typedef struct Connection {
+    /* connection primary send queue: element type: Packet */
+    GQueue primary_list;
+    /* connection secondary send queue: element type: Packet */
+    GQueue secondary_list;
+    /* flag to enqueue unprocessed_connections */
+    bool processing;
+    uint8_t ip_proto;
+    /* offset = secondary_seq - primary_seq */
+    tcp_seq  offset;
+    /*
+     * we use this flag update offset func
+     * run once in independent tcp connection
+     */
+    int syn_flag;
+} Connection;
+
+uint32_t connection_key_hash(const void *opaque);
+int connection_key_equal(const void *opaque1, const void *opaque2);
+int parse_packet_early(Packet *pkt);
+void fill_connection_key(Packet *pkt, ConnectionKey *key);
+void reverse_connection_key(ConnectionKey *key);
+Connection *connection_new(ConnectionKey *key);
+void connection_destroy(void *opaque);
+Connection *connection_get(GHashTable *connection_track_table,
+                           ConnectionKey *key,
+                           GQueue *conn_list);
+void connection_hashtable_reset(GHashTable *connection_track_table);
+Packet *packet_new(const void *data, int size);
+void packet_destroy(void *opaque, void *user_data);
+
+#endif /* QEMU_COLO_PROXY_H */
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index 35df37451d..b7d645617c 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -38,12 +38,12 @@ typedef struct MirrorState {
     NetFilterState parent_obj;
     char *indev;
     char *outdev;
-    CharDriverState *chr_in;
-    CharDriverState *chr_out;
+    CharBackend chr_in;
+    CharBackend chr_out;
     SocketReadState rs;
 } MirrorState;
 
-static int filter_mirror_send(CharDriverState *chr_out,
+static int filter_mirror_send(CharBackend *chr_out,
                               const struct iovec *iov,
                               int iovcnt)
 {
@@ -110,7 +110,8 @@ static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
     ret = net_fill_rstate(&s->rs, buf, size);
 
     if (ret == -1) {
-        qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL);
+        qemu_chr_fe_set_handlers(&s->chr_in, NULL, NULL, NULL,
+                                 NULL, NULL, true);
     }
 }
 
@@ -121,7 +122,8 @@ static void redirector_chr_event(void *opaque, int event)
 
     switch (event) {
     case CHR_EVENT_CLOSED:
-        qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL);
+        qemu_chr_fe_set_handlers(&s->chr_in, NULL, NULL, NULL,
+                                 NULL, NULL, true);
         break;
     default:
         break;
@@ -138,7 +140,7 @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
     MirrorState *s = FILTER_MIRROR(nf);
     int ret;
 
-    ret = filter_mirror_send(s->chr_out, iov, iovcnt);
+    ret = filter_mirror_send(&s->chr_out, iov, iovcnt);
     if (ret) {
         error_report("filter_mirror_send failed(%s)", strerror(-ret));
     }
@@ -160,8 +162,8 @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
     MirrorState *s = FILTER_REDIRECTOR(nf);
     int ret;
 
-    if (s->chr_out) {
-        ret = filter_mirror_send(s->chr_out, iov, iovcnt);
+    if (qemu_chr_fe_get_driver(&s->chr_out)) {
+        ret = filter_mirror_send(&s->chr_out, iov, iovcnt);
         if (ret) {
             error_report("filter_mirror_send failed(%s)", strerror(-ret));
         }
@@ -175,45 +177,36 @@ static void filter_mirror_cleanup(NetFilterState *nf)
 {
     MirrorState *s = FILTER_MIRROR(nf);
 
-    if (s->chr_out) {
-        qemu_chr_fe_release(s->chr_out);
-    }
+    qemu_chr_fe_deinit(&s->chr_out);
 }
 
 static void filter_redirector_cleanup(NetFilterState *nf)
 {
     MirrorState *s = FILTER_REDIRECTOR(nf);
 
-    if (s->chr_in) {
-        qemu_chr_add_handlers(s->chr_in, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(s->chr_in);
-    }
-    if (s->chr_out) {
-        qemu_chr_fe_release(s->chr_out);
-    }
+    qemu_chr_fe_deinit(&s->chr_in);
+    qemu_chr_fe_deinit(&s->chr_out);
 }
 
 static void filter_mirror_setup(NetFilterState *nf, Error **errp)
 {
     MirrorState *s = FILTER_MIRROR(nf);
+    CharDriverState *chr;
 
     if (!s->outdev) {
-        error_setg(errp, "filter filter mirror needs 'outdev' "
+        error_setg(errp, "filter mirror needs 'outdev' "
                    "property set");
         return;
     }
 
-    s->chr_out = qemu_chr_find(s->outdev);
-    if (s->chr_out == NULL) {
+    chr = qemu_chr_find(s->outdev);
+    if (chr == NULL) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                   "Device '%s' not found", s->outdev);
         return;
     }
 
-    if (qemu_chr_fe_claim(s->chr_out) != 0) {
-        error_setg(errp, QERR_DEVICE_IN_USE, s->outdev);
-        return;
-    }
+    qemu_chr_fe_init(&s->chr_out, chr, errp);
 }
 
 static void redirector_rs_finalize(SocketReadState *rs)
@@ -227,6 +220,7 @@ static void redirector_rs_finalize(SocketReadState *rs)
 static void filter_redirector_setup(NetFilterState *nf, Error **errp)
 {
     MirrorState *s = FILTER_REDIRECTOR(nf);
+    CharDriverState *chr;
 
     if (!s->indev && !s->outdev) {
         error_setg(errp, "filter redirector needs 'indev' or "
@@ -243,26 +237,32 @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
     net_socket_rs_init(&s->rs, redirector_rs_finalize);
 
     if (s->indev) {
-        s->chr_in = qemu_chr_find(s->indev);
-        if (s->chr_in == NULL) {
+        chr = qemu_chr_find(s->indev);
+        if (chr == NULL) {
             error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                       "IN Device '%s' not found", s->indev);
             return;
         }
 
-        qemu_chr_fe_claim_no_fail(s->chr_in);
-        qemu_chr_add_handlers(s->chr_in, redirector_chr_can_read,
-                              redirector_chr_read, redirector_chr_event, nf);
+        if (!qemu_chr_fe_init(&s->chr_in, chr, errp)) {
+            return;
+        }
+
+        qemu_chr_fe_set_handlers(&s->chr_in, redirector_chr_can_read,
+                                 redirector_chr_read, redirector_chr_event,
+                                 nf, NULL, true);
     }
 
     if (s->outdev) {
-        s->chr_out = qemu_chr_find(s->outdev);
-        if (s->chr_out == NULL) {
+        chr = qemu_chr_find(s->outdev);
+        if (chr == NULL) {
             error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                       "OUT Device '%s' not found", s->outdev);
             return;
         }
-        qemu_chr_fe_claim_no_fail(s->chr_out);
+        if (!qemu_chr_fe_init(&s->chr_out, chr, errp)) {
+            return;
+        }
     }
 }
 
@@ -315,7 +315,7 @@ filter_mirror_set_outdev(Object *obj, const char *value, Error **errp)
     g_free(s->outdev);
     s->outdev = g_strdup(value);
     if (!s->outdev) {
-        error_setg(errp, "filter filter mirror needs 'outdev' "
+        error_setg(errp, "filter mirror needs 'outdev' "
                    "property set");
         return;
     }
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
new file mode 100644
index 0000000000..c4ab91cdee
--- /dev/null
+++ b/net/filter-rewriter.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "net/colo.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/iov.h"
+#include "net/checksum.h"
+
+#define FILTER_COLO_REWRITER(obj) \
+    OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER)
+
+#define TYPE_FILTER_REWRITER "filter-rewriter"
+
+typedef struct RewriterState {
+    NetFilterState parent_obj;
+    NetQueue *incoming_queue;
+    /* hashtable to save connection */
+    GHashTable *connection_track_table;
+} RewriterState;
+
+static void filter_rewriter_flush(NetFilterState *nf)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    if (!qemu_net_queue_flush(s->incoming_queue)) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(s->incoming_queue, nf->netdev);
+    }
+}
+
+/*
+ * Return 1 on success, if return 0 means the pkt
+ * is not TCP packet
+ */
+static int is_tcp_packet(Packet *pkt)
+{
+    if (!parse_packet_early(pkt) &&
+        pkt->ip->ip_p == IPPROTO_TCP) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+/* handle tcp packet from primary guest */
+static int handle_primary_tcp_pkt(NetFilterState *nf,
+                                  Connection *conn,
+                                  Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_header;
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        trace_colo_filter_rewriter_pkt_info(__func__,
+                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) {
+        /*
+         * we use this flag update offset func
+         * run once in independent tcp connection
+         */
+        conn->syn_flag = 1;
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) {
+        if (conn->syn_flag) {
+            /*
+             * offset = secondary_seq - primary seq
+             * ack packet sent by guest from primary node,
+             * so we use th_ack - 1 get primary_seq
+             */
+            conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
+            conn->syn_flag = 0;
+        }
+        /* handle packets to the secondary from the primary */
+        tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
+/* handle tcp packet from secondary guest */
+static int handle_secondary_tcp_pkt(NetFilterState *nf,
+                                    Connection *conn,
+                                    Packet *pkt)
+{
+    struct tcphdr *tcp_pkt;
+
+    tcp_pkt = (struct tcphdr *)pkt->transport_header;
+
+    if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) {
+        trace_colo_filter_rewriter_pkt_info(__func__,
+                    inet_ntoa(pkt->ip->ip_src), inet_ntoa(pkt->ip->ip_dst),
+                    ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack),
+                    tcp_pkt->th_flags);
+        trace_colo_filter_rewriter_conn_offset(conn->offset);
+    }
+
+    if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) {
+        /*
+         * save offset = secondary_seq and then
+         * in handle_primary_tcp_pkt make offset
+         * = secondary_seq - primary_seq
+         */
+        conn->offset = ntohl(tcp_pkt->th_seq);
+    }
+
+    if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
+        /* handle packets to the primary from the secondary*/
+        tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
+
+        net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
+    }
+
+    return 0;
+}
+
+static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
+                                         NetClientState *sender,
+                                         unsigned flags,
+                                         const struct iovec *iov,
+                                         int iovcnt,
+                                         NetPacketSent *sent_cb)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+    Connection *conn;
+    ConnectionKey key;
+    Packet *pkt;
+    ssize_t size = iov_size(iov, iovcnt);
+    char *buf = g_malloc0(size);
+
+    iov_to_buf(iov, iovcnt, 0, buf, size);
+    pkt = packet_new(buf, size);
+    g_free(buf);
+
+    /*
+     * if we get tcp packet
+     * we will rewrite it to make secondary guest's
+     * connection established successfully
+     */
+    if (pkt && is_tcp_packet(pkt)) {
+
+        fill_connection_key(pkt, &key);
+
+        if (sender == nf->netdev) {
+            /*
+             * We need make tcp TX and RX packet
+             * into one connection.
+             */
+            reverse_connection_key(&key);
+        }
+        conn = connection_get(s->connection_track_table,
+                              &key,
+                              NULL);
+
+        if (sender == nf->netdev) {
+            /* NET_FILTER_DIRECTION_TX */
+            if (!handle_primary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
+        } else {
+            /* NET_FILTER_DIRECTION_RX */
+            if (!handle_secondary_tcp_pkt(nf, conn, pkt)) {
+                qemu_net_queue_send(s->incoming_queue, sender, 0,
+                (const uint8_t *)pkt->data, pkt->size, NULL);
+                packet_destroy(pkt, NULL);
+                pkt = NULL;
+                /*
+                 * We block the packet here,after rewrite pkt
+                 * and will send it
+                 */
+                return 1;
+            }
+        }
+    }
+
+    packet_destroy(pkt, NULL);
+    pkt = NULL;
+    return 0;
+}
+
+static void colo_rewriter_cleanup(NetFilterState *nf)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    /* flush packets */
+    if (s->incoming_queue) {
+        filter_rewriter_flush(nf);
+        g_free(s->incoming_queue);
+    }
+}
+
+static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
+{
+    RewriterState *s = FILTER_COLO_REWRITER(nf);
+
+    s->connection_track_table = g_hash_table_new_full(connection_key_hash,
+                                                      connection_key_equal,
+                                                      g_free,
+                                                      connection_destroy);
+    s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
+}
+
+static void colo_rewriter_class_init(ObjectClass *oc, void *data)
+{
+    NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+    nfc->setup = colo_rewriter_setup;
+    nfc->cleanup = colo_rewriter_cleanup;
+    nfc->receive_iov = colo_rewriter_receive_iov;
+}
+
+static const TypeInfo colo_rewriter_info = {
+    .name = TYPE_FILTER_REWRITER,
+    .parent = TYPE_NETFILTER,
+    .class_init = colo_rewriter_class_init,
+    .instance_size = sizeof(RewriterState),
+};
+
+static void register_types(void)
+{
+    type_register_static(&colo_rewriter_info);
+}
+
+type_init(register_types);
diff --git a/net/filter.c b/net/filter.c
index 888fe6dd93..1dfd2caa23 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -239,7 +239,7 @@ static void netfilter_finalize(Object *obj)
     }
 
     if (nf->netdev && !QTAILQ_EMPTY(&nf->netdev->filters) &&
-        nf->next.tqe_prev) {
+        QTAILQ_IN_USE(nf, next)) {
         QTAILQ_REMOVE(&nf->netdev->filters, nf, next);
     }
     g_free(nf->netdev_id);
diff --git a/net/net.c b/net/net.c
index d51cb29882..939fe3193a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -690,9 +690,13 @@ static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
         buffer = iov[0].iov_base;
         offset = iov[0].iov_len;
     } else {
-        buf = g_new(uint8_t, NET_BUFSIZE);
+        offset = iov_size(iov, iovcnt);
+        if (offset > NET_BUFSIZE) {
+            return -1;
+        }
+        buf = g_malloc(offset);
         buffer = buf;
-        offset = iov_to_buf(iov, iovcnt, 0, buf, NET_BUFSIZE);
+        offset = iov_to_buf(iov, iovcnt, 0, buf, offset);
     }
 
     if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
@@ -1179,6 +1183,7 @@ void hmp_host_net_remove(Monitor *mon, const QDict *qdict)
 
     qemu_del_net_client(nc->peer);
     qemu_del_net_client(nc);
+    qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device));
 }
 
 void netdev_add(QemuOpts *opts, Error **errp)
@@ -1648,9 +1653,8 @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
             if (rs->index >= rs->packet_len) {
                 rs->index = 0;
                 rs->state = 0;
-                if (rs->finalize) {
-                    rs->finalize(rs);
-                }
+                assert(rs->finalize);
+                rs->finalize(rs);
             }
             break;
         }
diff --git a/net/slirp.c b/net/slirp.c
index b60893f9c5..bcd1c5f57d 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -40,6 +40,7 @@
 #include "sysemu/char.h"
 #include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
+#include "qapi/error.h"
 
 static int get_str_sep(char *buf, int buf_size, const char **pp, int sep)
 {
@@ -682,7 +683,7 @@ int net_slirp_smb(const char *exported_dir)
 #endif /* !defined(_WIN32) */
 
 struct GuestFwd {
-    CharDriverState *hd;
+    CharBackend hd;
     struct in_addr server;
     int port;
     Slirp *slirp;
@@ -746,15 +747,23 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
             return -1;
         }
     } else {
-        fwd = g_new(struct GuestFwd, 1);
-        fwd->hd = qemu_chr_new(buf, p, NULL);
-        if (!fwd->hd) {
+        Error *err = NULL;
+        CharDriverState *chr = qemu_chr_new(buf, p);
+
+        if (!chr) {
             error_report("could not open guest forwarding device '%s'", buf);
+            return -1;
+        }
+
+        fwd = g_new(struct GuestFwd, 1);
+        qemu_chr_fe_init(&fwd->hd, chr, &err);
+        if (err) {
+            error_report_err(err);
             g_free(fwd);
             return -1;
         }
 
-        if (slirp_add_exec(s->slirp, 3, fwd->hd, &server, port) < 0) {
+        if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) {
             error_report("conflicting/invalid host:port in guest forwarding "
                          "rule '%s'", config_str);
             g_free(fwd);
@@ -764,9 +773,8 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
         fwd->port = port;
         fwd->slirp = s->slirp;
 
-        qemu_chr_fe_claim_no_fail(fwd->hd);
-        qemu_chr_add_handlers(fwd->hd, guestfwd_can_read, guestfwd_read,
-                              NULL, fwd);
+        qemu_chr_fe_set_handlers(&fwd->hd, guestfwd_can_read, guestfwd_read,
+                                 NULL, fwd, NULL, true);
     }
     return 0;
 
diff --git a/net/socket.c b/net/socket.c
index 3f98eefb34..fe3547b018 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -489,90 +489,106 @@ static int net_socket_listen_init(NetClientState *peer,
 {
     NetClientState *nc;
     NetSocketState *s;
-    struct sockaddr_in saddr;
-    int fd, ret;
-
-    if (parse_host_port(&saddr, host_str) < 0)
-        return -1;
+    SocketAddress *saddr;
+    int ret;
+    Error *local_error = NULL;
 
-    fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
-    if (fd < 0) {
-        perror("socket");
+    saddr = socket_parse(host_str, &local_error);
+    if (saddr == NULL) {
+        error_report_err(local_error);
         return -1;
     }
-    qemu_set_nonblock(fd);
-
-    socket_set_fast_reuse(fd);
 
-    ret = bind(fd, (struct sockaddr *)&saddr, sizeof(saddr));
-    if (ret < 0) {
-        perror("bind");
-        closesocket(fd);
-        return -1;
-    }
-    ret = listen(fd, 0);
+    ret = socket_listen(saddr, &local_error);
     if (ret < 0) {
-        perror("listen");
-        closesocket(fd);
+        qapi_free_SocketAddress(saddr);
+        error_report_err(local_error);
         return -1;
     }
 
     nc = qemu_new_net_client(&net_socket_info, peer, model, name);
     s = DO_UPCAST(NetSocketState, nc, nc);
     s->fd = -1;
-    s->listen_fd = fd;
+    s->listen_fd = ret;
     s->nc.link_down = true;
+    net_socket_rs_init(&s->rs, net_socket_rs_finalize);
 
     qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
+    qapi_free_SocketAddress(saddr);
     return 0;
 }
 
+typedef struct {
+    NetClientState *peer;
+    SocketAddress *saddr;
+    char *model;
+    char *name;
+} socket_connect_data;
+
+static void socket_connect_data_free(socket_connect_data *c)
+{
+    qapi_free_SocketAddress(c->saddr);
+    g_free(c->model);
+    g_free(c->name);
+    g_free(c);
+}
+
+static void net_socket_connected(int fd, Error *err, void *opaque)
+{
+    socket_connect_data *c = opaque;
+    NetSocketState *s;
+    char *addr_str = NULL;
+    Error *local_error = NULL;
+
+    addr_str = socket_address_to_string(c->saddr, &local_error);
+    if (addr_str == NULL) {
+        error_report_err(local_error);
+        closesocket(fd);
+        goto end;
+    }
+
+    s = net_socket_fd_init(c->peer, c->model, c->name, fd, true);
+    if (!s) {
+        closesocket(fd);
+        goto end;
+    }
+
+    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
+             "socket: connect to %s", addr_str);
+
+end:
+    g_free(addr_str);
+    socket_connect_data_free(c);
+}
+
 static int net_socket_connect_init(NetClientState *peer,
                                    const char *model,
                                    const char *name,
                                    const char *host_str)
 {
-    NetSocketState *s;
-    int fd, connected, ret;
-    struct sockaddr_in saddr;
+    socket_connect_data *c = g_new0(socket_connect_data, 1);
+    int fd = -1;
+    Error *local_error = NULL;
 
-    if (parse_host_port(&saddr, host_str) < 0)
-        return -1;
+    c->peer = peer;
+    c->model = g_strdup(model);
+    c->name = g_strdup(name);
+    c->saddr = socket_parse(host_str, &local_error);
+    if (c->saddr == NULL) {
+        goto err;
+    }
 
-    fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
+    fd = socket_connect(c->saddr, &local_error, net_socket_connected, c);
     if (fd < 0) {
-        perror("socket");
-        return -1;
+        goto err;
     }
-    qemu_set_nonblock(fd);
 
-    connected = 0;
-    for(;;) {
-        ret = connect(fd, (struct sockaddr *)&saddr, sizeof(saddr));
-        if (ret < 0) {
-            if (errno == EINTR || errno == EWOULDBLOCK) {
-                /* continue */
-            } else if (errno == EINPROGRESS ||
-                       errno == EALREADY ||
-                       errno == EINVAL) {
-                break;
-            } else {
-                perror("connect");
-                closesocket(fd);
-                return -1;
-            }
-        } else {
-            connected = 1;
-            break;
-        }
-    }
-    s = net_socket_fd_init(peer, model, name, fd, connected);
-    if (!s)
-        return -1;
-    snprintf(s->nc.info_str, sizeof(s->nc.info_str),
-             "socket: connect to %s:%d",
-             inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
     return 0;
+
+err:
+    error_report_err(local_error);
+    socket_connect_data_free(c);
+    return -1;
 }
 
 static int net_socket_mcast_init(NetClientState *peer,
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index c506ac31d6..6c9692263d 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -35,6 +35,10 @@
 #include <net/if_tap.h>
 #endif
 
+#if defined(__OpenBSD__)
+#include <sys/param.h>
+#endif
+
 #ifndef __FreeBSD__
 int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
              int vnet_hdr_required, int mq_required, Error **errp)
@@ -55,7 +59,7 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr,
         if (*ifname) {
             snprintf(dname, sizeof dname, "/dev/%s", ifname);
         } else {
-#if defined(__OpenBSD__)
+#if defined(__OpenBSD__) && OpenBSD < 201605
             snprintf(dname, sizeof dname, "/dev/tun%d", i);
 #else
             snprintf(dname, sizeof dname, "/dev/tap%d", i);
diff --git a/net/tap.c b/net/tap.c
index 6abb962efd..b6896a7b7c 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -857,7 +857,9 @@ int net_init_tap(const Netdev *netdev, const char *name,
             return -1;
         }
 
-        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
+        fd = net_bridge_run_helper(tap->helper,
+                                   tap->has_br ?
+                                   tap->br : DEFAULT_BRIDGE_INTERFACE,
                                    errp);
         if (fd == -1) {
             return -1;
diff --git a/net/trace-events b/net/trace-events
index 65c46a48fb..35198bc742 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -2,3 +2,21 @@
 
 # net/vhost-user.c
 vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
+
+# net/colo.c
+colo_proxy_main(const char *chr) ": %s"
+
+# net/colo-compare.c
+colo_compare_main(const char *chr) ": %s"
+colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d"
+colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
+colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
+colo_old_packet_check_found(int64_t old_time) "%" PRId64
+colo_compare_miscompare(void) ""
+colo_compare_pkt_info_src(const char *src, uint32_t sseq, uint32_t sack, int res, uint32_t sflag, int ssize) "src/dst: %s s: seq/ack=%u/%u res=%d flags=%x spkt_size: %d\n"
+colo_compare_pkt_info_dst(const char *dst, uint32_t dseq, uint32_t dack, int res, uint32_t dflag, int dsize) "src/dst: %s d: seq/ack=%u/%u res=%d flags=%x dpkt_size: %d\n"
+
+# net/filter-rewriter.c
+colo_filter_rewriter_debug(void) ""
+colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u  flags=%x\n"
+colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n"
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b0595f8781..7aff77ee4a 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -20,18 +20,13 @@
 
 typedef struct VhostUserState {
     NetClientState nc;
-    CharDriverState *chr;
+    CharBackend chr; /* only queue index 0 */
     VHostNetState *vhost_net;
     guint watch;
     uint64_t acked_features;
     bool started;
 } VhostUserState;
 
-typedef struct VhostUserChardevProps {
-    bool is_socket;
-    bool is_unix;
-} VhostUserChardevProps;
-
 VHostNetState *vhost_user_get_vhost_net(NetClientState *nc)
 {
     VhostUserState *s = DO_UPCAST(VhostUserState, nc, nc);
@@ -67,7 +62,7 @@ static void vhost_user_stop(int queues, NetClientState *ncs[])
     }
 }
 
-static int vhost_user_start(int queues, NetClientState *ncs[])
+static int vhost_user_start(int queues, NetClientState *ncs[], CharBackend *be)
 {
     VhostNetOptions options;
     struct vhost_net *net = NULL;
@@ -83,7 +78,7 @@ static int vhost_user_start(int queues, NetClientState *ncs[])
         s = DO_UPCAST(VhostUserState, nc, ncs[i]);
 
         options.net_backend = ncs[i];
-        options.opaque      = s->chr;
+        options.opaque      = be;
         options.busyloop_timeout = 0;
         net = vhost_net_init(&options);
         if (!net) {
@@ -155,10 +150,8 @@ static void vhost_user_cleanup(NetClientState *nc)
         g_free(s->vhost_net);
         s->vhost_net = NULL;
     }
-    if (s->chr) {
-        qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
-        qemu_chr_fe_release(s->chr);
-        s->chr = NULL;
+    if (nc->queue_index == 0) {
+        qemu_chr_fe_deinit(&s->chr);
     }
 
     qemu_purge_queued_packets(nc);
@@ -192,7 +185,7 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
 {
     VhostUserState *s = opaque;
 
-    qemu_chr_disconnect(s->chr);
+    qemu_chr_fe_disconnect(&s->chr);
 
     return FALSE;
 }
@@ -202,6 +195,7 @@ static void net_vhost_user_event(void *opaque, int event)
     const char *name = opaque;
     NetClientState *ncs[MAX_QUEUE_NUM];
     VhostUserState *s;
+    CharDriverState *chr;
     Error *err = NULL;
     int queues;
 
@@ -211,13 +205,14 @@ static void net_vhost_user_event(void *opaque, int event)
     assert(queues < MAX_QUEUE_NUM);
 
     s = DO_UPCAST(VhostUserState, nc, ncs[0]);
-    trace_vhost_user_event(s->chr->label, event);
+    chr = qemu_chr_fe_get_driver(&s->chr);
+    trace_vhost_user_event(chr->label, event);
     switch (event) {
     case CHR_EVENT_OPENED:
-        s->watch = qemu_chr_fe_add_watch(s->chr, G_IO_HUP,
+        s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP,
                                          net_vhost_user_watch, s);
-        if (vhost_user_start(queues, ncs) < 0) {
-            qemu_chr_disconnect(s->chr);
+        if (vhost_user_start(queues, ncs, &s->chr) < 0) {
+            qemu_chr_fe_disconnect(&s->chr);
             return;
         }
         qmp_set_link(name, true, &err);
@@ -240,6 +235,7 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
                                const char *name, CharDriverState *chr,
                                int queues)
 {
+    Error *err = NULL;
     NetClientState *nc, *nc0 = NULL;
     VhostUserState *s;
     int i;
@@ -249,28 +245,28 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
 
     for (i = 0; i < queues; i++) {
         nc = qemu_new_net_client(&net_vhost_user_info, peer, device, name);
-        if (!nc0) {
-            nc0 = nc;
-        }
-
         snprintf(nc->info_str, sizeof(nc->info_str), "vhost-user%d to %s",
                  i, chr->label);
-
         nc->queue_index = i;
+        if (!nc0) {
+            nc0 = nc;
+            s = DO_UPCAST(VhostUserState, nc, nc);
+            if (!qemu_chr_fe_init(&s->chr, chr, &err)) {
+                error_report_err(err);
+                return -1;
+            }
+        }
 
-        s = DO_UPCAST(VhostUserState, nc, nc);
-        s->chr = chr;
     }
 
     s = DO_UPCAST(VhostUserState, nc, nc0);
     do {
-        Error *err = NULL;
-        if (qemu_chr_wait_connected(chr, &err) < 0) {
+        if (qemu_chr_fe_wait_connected(&s->chr, &err) < 0) {
             error_report_err(err);
             return -1;
         }
-        qemu_chr_add_handlers(chr, NULL, NULL,
-                              net_vhost_user_event, nc0->name);
+        qemu_chr_fe_set_handlers(&s->chr, NULL, NULL,
+                                 net_vhost_user_event, nc0->name, NULL, true);
     } while (!s->started);
 
     assert(s->vhost_net);
@@ -278,51 +274,27 @@ static int net_vhost_user_init(NetClientState *peer, const char *device,
     return 0;
 }
 
-static int net_vhost_chardev_opts(void *opaque,
-                                  const char *name, const char *value,
-                                  Error **errp)
-{
-    VhostUserChardevProps *props = opaque;
-
-    if (strcmp(name, "backend") == 0 && strcmp(value, "socket") == 0) {
-        props->is_socket = true;
-    } else if (strcmp(name, "path") == 0) {
-        props->is_unix = true;
-    } else if (strcmp(name, "server") == 0) {
-    } else {
-        error_setg(errp,
-                   "vhost-user does not support a chardev with option %s=%s",
-                   name, value);
-        return -1;
-    }
-    return 0;
-}
-
-static CharDriverState *net_vhost_parse_chardev(
+static CharDriverState *net_vhost_claim_chardev(
     const NetdevVhostUserOptions *opts, Error **errp)
 {
     CharDriverState *chr = qemu_chr_find(opts->chardev);
-    VhostUserChardevProps props;
 
     if (chr == NULL) {
         error_setg(errp, "chardev \"%s\" not found", opts->chardev);
         return NULL;
     }
 
-    /* inspect chardev opts */
-    memset(&props, 0, sizeof(props));
-    if (qemu_opt_foreach(chr->opts, net_vhost_chardev_opts, &props, errp)) {
+    if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE)) {
+        error_setg(errp, "chardev \"%s\" is not reconnectable",
+                   opts->chardev);
         return NULL;
     }
-
-    if (!props.is_socket || !props.is_unix) {
-        error_setg(errp, "chardev \"%s\" is not a unix socket",
+    if (!qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_FD_PASS)) {
+        error_setg(errp, "chardev \"%s\" does not support FD passing",
                    opts->chardev);
         return NULL;
     }
 
-    qemu_chr_fe_claim_no_fail(chr);
-
     return chr;
 }
 
@@ -357,7 +329,7 @@ int net_init_vhost_user(const Netdev *netdev, const char *name,
     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_USER);
     vhost_user_opts = &netdev->u.vhost_user;
 
-    chr = net_vhost_parse_chardev(vhost_user_opts, errp);
+    chr = net_vhost_claim_chardev(vhost_user_opts, errp);
     if (!chr) {
         return -1;
     }
diff --git a/numa.c b/numa.c
index 6289f469bd..9c09e45e7d 100644
--- a/numa.c
+++ b/numa.c
@@ -550,3 +550,15 @@ MemdevList *qmp_query_memdev(Error **errp)
     object_child_foreach(obj, query_memdev, &list);
     return list;
 }
+
+int numa_get_node_for_cpu(int idx)
+{
+    int i;
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        if (test_bit(idx, numa_info[i].node_cpu)) {
+            break;
+        }
+    }
+    return i;
+}
diff --git a/pc-bios/README b/pc-bios/README
index 5a8a93193e..47a913f9c7 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/aik/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20160223.
+  built from git tag qemu-slof-20161019.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
@@ -42,3 +42,8 @@
   it was compiled using the qemu-ppce500 target.
   A git mirror is available at: git://git.qemu-project.org/u-boot.git
   The hash used to compile the current version is: 2072e72
+
+- Skiboot (https://github.com/open-power/skiboot/) is an OPAL
+  (OpenPower Abstraction Layer) firmware for OpenPOWER systems. It can
+  run an hypervisor OS or simply a host OS on the "baremetal"
+  platform, also known as the PowerNV (Non-Virtualized) platform.
diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 57fb4d88b1..229b5af986 100644
Binary files a/pc-bios/bios-256k.bin and b/pc-bios/bios-256k.bin differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 8a6869ff1b..9a9b0f0106 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/pc-bios/efi-e1000.rom b/pc-bios/efi-e1000.rom
index 4e61f9b2df..675992428d 100644
Binary files a/pc-bios/efi-e1000.rom and b/pc-bios/efi-e1000.rom differ
diff --git a/pc-bios/efi-e1000e.rom b/pc-bios/efi-e1000e.rom
index 192a43729d..145896c219 100644
Binary files a/pc-bios/efi-e1000e.rom and b/pc-bios/efi-e1000e.rom differ
diff --git a/pc-bios/efi-eepro100.rom b/pc-bios/efi-eepro100.rom
index 66c52269cd..ff2793f974 100644
Binary files a/pc-bios/efi-eepro100.rom and b/pc-bios/efi-eepro100.rom differ
diff --git a/pc-bios/efi-ne2k_pci.rom b/pc-bios/efi-ne2k_pci.rom
index 8c3e5fd6d1..c832ec017e 100644
Binary files a/pc-bios/efi-ne2k_pci.rom and b/pc-bios/efi-ne2k_pci.rom differ
diff --git a/pc-bios/efi-pcnet.rom b/pc-bios/efi-pcnet.rom
index 802e225cb5..4d803d30bc 100644
Binary files a/pc-bios/efi-pcnet.rom and b/pc-bios/efi-pcnet.rom differ
diff --git a/pc-bios/efi-rtl8139.rom b/pc-bios/efi-rtl8139.rom
index 8827181f31..83488cd54b 100644
Binary files a/pc-bios/efi-rtl8139.rom and b/pc-bios/efi-rtl8139.rom differ
diff --git a/pc-bios/efi-virtio.rom b/pc-bios/efi-virtio.rom
index 2fc0497958..3563776dbd 100644
Binary files a/pc-bios/efi-virtio.rom and b/pc-bios/efi-virtio.rom differ
diff --git a/pc-bios/efi-vmxnet3.rom b/pc-bios/efi-vmxnet3.rom
index 3d42635d16..e22275253b 100644
Binary files a/pc-bios/efi-vmxnet3.rom and b/pc-bios/efi-vmxnet3.rom differ
diff --git a/pc-bios/linuxboot_dma.bin b/pc-bios/linuxboot_dma.bin
index 238a195d38..218d3ab4a2 100644
Binary files a/pc-bios/linuxboot_dma.bin and b/pc-bios/linuxboot_dma.bin differ
diff --git a/pc-bios/openbios-ppc b/pc-bios/openbios-ppc
index d913fd007f..95f1167261 100644
Binary files a/pc-bios/openbios-ppc and b/pc-bios/openbios-ppc differ
diff --git a/pc-bios/openbios-sparc32 b/pc-bios/openbios-sparc32
index c8c6fcbe77..675968ea62 100644
Binary files a/pc-bios/openbios-sparc32 and b/pc-bios/openbios-sparc32 differ
diff --git a/pc-bios/openbios-sparc64 b/pc-bios/openbios-sparc64
index 73f03a04a3..d4b95326fe 100644
Binary files a/pc-bios/openbios-sparc64 and b/pc-bios/openbios-sparc64 differ
diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index afa48f1cf1..fa53d9e58e 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -43,16 +43,16 @@ build-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin
 
 
 %.o: %.S
-	$(call quiet-command,$(CPP) $(QEMU_INCLUDES) $(QEMU_DGFLAGS) -c -o - $< | $(AS) $(ASFLAGS) -o $@,"  AS    $(TARGET_DIR)$@")
+	$(call quiet-command,$(CPP) $(QEMU_INCLUDES) $(QEMU_DGFLAGS) -c -o - $< | $(AS) $(ASFLAGS) -o $@,"AS","$(TARGET_DIR)$@")
 
 %.img: %.o
-	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_I386_EMULATION) -T $(SRC_PATH)/pc-bios/optionrom/flat.lds -s -o $@ $<,"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_I386_EMULATION) -T $(SRC_PATH)/pc-bios/optionrom/flat.lds -s -o $@ $<,"BUILD","$(TARGET_DIR)$@")
 
 %.raw: %.img
-	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"BUILD","$(TARGET_DIR)$@")
 
 %.bin: %.raw
-	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/signrom.py $< $@,"  Signing $(TARGET_DIR)$@")
+	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/signrom.py $< $@,"SIGN","$(TARGET_DIR)$@")
 
 clean:
 	rm -f *.o *.d *.raw *.img *.bin *~
diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
index 7549797732..4754282ad7 100644
--- a/pc-bios/optionrom/linuxboot_dma.c
+++ b/pc-bios/optionrom/linuxboot_dma.c
@@ -122,24 +122,14 @@ static inline void writel_es(uint16_t offset, uint32_t val)
 
 static inline uint32_t bswap32(uint32_t x)
 {
-    return
-        ((x & 0x000000ffU) << 24) |
-        ((x & 0x0000ff00U) <<  8) |
-        ((x & 0x00ff0000U) >>  8) |
-        ((x & 0xff000000U) >> 24);
+    asm("bswapl %0" : "=r" (x) : "0" (x));
+    return x;
 }
 
 static inline uint64_t bswap64(uint64_t x)
 {
-    return
-        ((x & 0x00000000000000ffULL) << 56) |
-        ((x & 0x000000000000ff00ULL) << 40) |
-        ((x & 0x0000000000ff0000ULL) << 24) |
-        ((x & 0x00000000ff000000ULL) <<  8) |
-        ((x & 0x000000ff00000000ULL) >>  8) |
-        ((x & 0x0000ff0000000000ULL) >> 24) |
-        ((x & 0x00ff000000000000ULL) >> 40) |
-        ((x & 0xff00000000000000ULL) >> 56);
+    asm("bswapl %%eax; bswapl %%edx; xchg %%eax, %%edx" : "=A" (x) : "0" (x));
+    return x;
 }
 
 static inline uint64_t cpu_to_be64(uint64_t x)
diff --git a/pc-bios/palcode-clipper b/pc-bios/palcode-clipper
index 9956340cd1..1df377a0fd 100755
Binary files a/pc-bios/palcode-clipper and b/pc-bios/palcode-clipper differ
diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 089f6ba5e9..cf05bf0be2 100644
Binary files a/pc-bios/s390-ccw.img and b/pc-bios/s390-ccw.img differ
diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 0ab25388a4..0339c24789 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -19,10 +19,10 @@ LDFLAGS += -Wl,-pie -nostdlib
 build-all: s390-ccw.img
 
 s390-ccw.elf: $(OBJECTS)
-	$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $(OBJECTS),"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) $(LDFLAGS) -o $@ $(OBJECTS),"BUILD","$(TARGET_DIR)$@")
 
 s390-ccw.img: s390-ccw.elf
-	$(call quiet-command,strip --strip-unneeded $< -o $@,"  Stripping $(TARGET_DIR)$@")
+	$(call quiet-command,strip --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@")
 
 $(OBJECTS): Makefile
 
diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
index 1d34e8c1aa..b333734955 100644
--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
@@ -97,7 +97,8 @@ static int run_ccw(VDev *vdev, int cmd, void *ptr, int len)
 
     /* start command processing */
     stsch_err(vdev->schid, &schib);
-    schib.scsw.ctrl = SCSW_FCTL_START_FUNC;
+    /* enable the subchannel for IPL device */
+    schib.pmcw.ena = 1;
     msch(vdev->schid, &schib);
 
     /* start subchannel command */
diff --git a/pc-bios/skiboot.lid b/pc-bios/skiboot.lid
new file mode 100644
index 0000000000..0e59a8280d
Binary files /dev/null and b/pc-bios/skiboot.lid differ
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index f2384939ef..30ce7ac384 100644
Binary files a/pc-bios/slof.bin and b/pc-bios/slof.bin differ
diff --git a/pc-bios/spapr-rtas/Makefile b/pc-bios/spapr-rtas/Makefile
index dc8b23e3ce..f26dd428b7 100644
--- a/pc-bios/spapr-rtas/Makefile
+++ b/pc-bios/spapr-rtas/Makefile
@@ -15,10 +15,10 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/spapr-rtas)
 build-all: spapr-rtas.bin
 
 %.img: %.o
-	$(call quiet-command,$(CC) -nostdlib -o $@ $<,"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) -nostdlib -o $@ $<,"Building","$(TARGET_DIR)$@")
 
 %.bin: %.img
-	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"Building","$(TARGET_DIR)$@")
 
 clean:
 	rm -f *.o *.d *.img *.bin *~
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 3f4bb30fe3..9dadce2345 100644
Binary files a/pc-bios/vgabios-cirrus.bin and b/pc-bios/vgabios-cirrus.bin differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index 38d31b6b5b..a89725c81c 100644
Binary files a/pc-bios/vgabios-qxl.bin and b/pc-bios/vgabios-qxl.bin differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index e469c107a7..ea041412a2 100644
Binary files a/pc-bios/vgabios-stdvga.bin and b/pc-bios/vgabios-stdvga.bin differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index d42b02873a..71e22fc868 100644
Binary files a/pc-bios/vgabios-virtio.bin and b/pc-bios/vgabios-virtio.bin differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index 26bc0b704c..ad239cbfe8 100644
Binary files a/pc-bios/vgabios-vmware.bin and b/pc-bios/vgabios-vmware.bin differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index 2d1a7c6c95..9947c2c26f 100644
Binary files a/pc-bios/vgabios.bin and b/pc-bios/vgabios.bin differ
diff --git a/po/Makefile b/po/Makefile
index 7bab09dce2..cc630363de 100644
--- a/po/Makefile
+++ b/po/Makefile
@@ -10,7 +10,7 @@ all:
 .PHONY:	all build clean install update
 
 %.mo: %.po
-	$(call quiet-command, msgfmt -o $@ $<, "  GEN   $@")
+	$(call quiet-command, msgfmt -o $@ $<,"GEN","$@")
 
 -include ../config-host.mak
 include $(SRC_PATH)/rules.mak
@@ -46,7 +46,7 @@ $(PO_PATH)/messages.po: $(SRC_PATH)/ui/gtk.c
           xgettext -o - --from-code=UTF-8 --foreign-user \
 	    --package-name=QEMU --package-version=$(VERSION) \
 	    --msgid-bugs-address=qemu-devel@nongnu.org -k_ -C ui/gtk.c | \
-	  sed -e s/CHARSET/UTF-8/) >$@, "  GEN   $@")
+	  sed -e s/CHARSET/UTF-8/) >$@,"GEN","$@")
 
 $(PO_PATH)/%.po: $(PO_PATH)/messages.po
-	$(call quiet-command, msgmerge -q $@ $< > $@.bak && mv $@.bak $@, "  GEN   $@")
+	$(call quiet-command, msgmerge -q $@ $< > $@.bak && mv $@.bak $@,"GEN","$@")
diff --git a/po/bg.po b/po/bg.po
index 50478616ee..279d1b864a 100644
--- a/po/bg.po
+++ b/po/bg.po
@@ -6,8 +6,8 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 2.6.50\n"
-"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2016-06-26 10:16+0300\n"
+"Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2016-06-09 15:54+0300\n"
 "Last-Translator: Alexander Shopov <ash@kambanaria.org>\n"
 "Language-Team: Bulgarian <dict@ludost.net>\n"
@@ -17,74 +17,74 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n != 1);\n"
 
-#: ui/gtk.c:274
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " — натиснете Ctrl+Alt+G, за да освободите фокуса"
 
-#: ui/gtk.c:278
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [пауза]"
 
-#: ui/gtk.c:1906
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Пауза"
 
-#: ui/gtk.c:1912
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "_Рестартиране"
 
-#: ui/gtk.c:1915
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Изключване"
 
-#: ui/gtk.c:1921
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr "_Спиране на програмата"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr "На _цял екран"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2032
 msgid "_Copy"
 msgstr "_Копиране"
 
-#: ui/gtk.c:2032
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr "_Увеличаване"
 
-#: ui/gtk.c:2039
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr "_Намаляване"
 
-#: ui/gtk.c:2046
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr "По_местване"
 
-#: ui/gtk.c:2053
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Напас_ване"
 
-#: ui/gtk.c:2059
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Прихващане при посо_чване"
 
-#: ui/gtk.c:2062
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "Прихващане на _фокуса"
 
-#: ui/gtk.c:2091
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "Подпро_зорци"
 
-#: ui/gtk.c:2094
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr "Към самостоятелен подпрозорец"
 
-#: ui/gtk.c:2106
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Машина"
 
-#: ui/gtk.c:2111
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Изглед"
diff --git a/po/de_DE.po b/po/de_DE.po
index 97ae655169..de27fcf174 100644
--- a/po/de_DE.po
+++ b/po/de_DE.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2012-02-28 16:00+0100\n"
 "Last-Translator: Kevin Wolf <kwolf@redhat.com>\n"
 "Language-Team: Deutsch <de@li.org>\n"
@@ -16,70 +16,74 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=(n!=1);\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " - Strg+Alt+G drücken, um Eingabegeräte freizugeben"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [Angehalten]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Angehalten"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "_Reset"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Herunterfahren"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr "_Beenden"
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr "_Vollbild"
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr "_Kopieren"
+
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr "_Heranzoomen"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr "_Wegzoomen"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr "_Einpassen"
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Auf _Fenstergröße skalieren"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Tastatur _automatisch einfangen"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "_Eingabegeräte einfangen"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "Reiter anzeigen"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr "Reiter abtrennen"
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Maschine"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Ansicht"
diff --git a/po/fr_FR.po b/po/fr_FR.po
index e9fb0402db..94f4a94f5c 100644
--- a/po/fr_FR.po
+++ b/po/fr_FR.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2014-07-28 23:25+0200\n"
 "Last-Translator: Aurelien Jarno <aurelien@aurel32.net>\n"
 "Language-Team: French <FR@li.org>\n"
@@ -17,70 +17,74 @@ msgstr ""
 "Plural-Forms: nplurals=2; plural=n != 1;\n"
 "X-Generator: Lokalize 1.4\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr "- Appuyer sur Ctrl+Alt+G pour arrêter la capture"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [En pause]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Pause"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "_Réinitialiser"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Éteindre"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr "_Quitter"
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr "Mode _plein écran"
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr "_Copier"
+
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr "Zoom _avant"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr "_Zoom arrière"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr "Zoom _idéal"
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Zoomer pour a_juster"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Capturer en _survolant"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "_Capturer les entrées"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "Montrer les _onglets"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr "_Détacher l'onglet"
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Machine"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Vue"
diff --git a/po/hu.po b/po/hu.po
index 1149adf26b..86f78e92b9 100644
--- a/po/hu.po
+++ b/po/hu.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2013-05-06 20:42+0200\n"
 "Last-Translator: Ákos Kovács <akoskovacs@gmx.com>\n"
 "Language-Team: Hungarian <hu@li.org>\n"
@@ -15,73 +15,77 @@ msgstr ""
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " - Nyomj Ctrl+Alt+G-t a bemeneti eszközök elengedéséhez"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [Megállítva]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Megállítás"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "Új_raindítás"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Leállítás"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr ""
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr ""
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr ""
+
+#: ui/gtk.c:2048
 #, fuzzy
 msgid "Zoom _In"
 msgstr "Ablakmérethez _igazítás"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 #, fuzzy
 msgid "Zoom _Out"
 msgstr "Ablakmérethez _igazítás"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr ""
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Ablakmérethez _igazítás"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Automatikus _elfogás"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "_Bemeneti eszközök megragadása"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "_Fülek megjelenítése"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr ""
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Gép"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Nézet"
 
diff --git a/po/it.po b/po/it.po
index a2c8e558c5..bfae84e797 100644
--- a/po/it.po
+++ b/po/it.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2014-07-29 08:25+0200\n"
 "Last-Translator: Paolo Bonzini <pbonzini@redhat.com>\n"
 "Language-Team: Italian <it@li.org>\n"
@@ -16,70 +16,74 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Plural-Forms: nplurals=2; plural=n != 1;\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " - Premere Ctrl+Alt+G per rilasciare l'input"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [Pausa]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Pausa"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "_Reset"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Spegni"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr "_Esci"
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
-msgstr ""
+msgstr "A t_utto schermo"
+
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr "_Copia"
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr "_Aumenta zoom"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr "_Riduci zoom"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr "A_nnulla zoom"
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Adatta alla _finestra"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Cattura _automatica input"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "_Cattura input"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "Mostra _tab"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr "_Sposta in una nuova finestra"
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Macchina virtuale"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Visualizza"
diff --git a/po/messages.po b/po/messages.po
index e8f9ccd389..e0a98c16a9 100644
--- a/po/messages.po
+++ b/po/messages.po
@@ -5,9 +5,9 @@
 #, fuzzy
 msgid ""
 msgstr ""
-"Project-Id-Version: QEMU 2.5.92\n"
+"Project-Id-Version: QEMU 2.7.93\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -16,70 +16,74 @@ msgstr ""
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr ""
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr ""
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr ""
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr ""
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr ""
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr ""
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr ""
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr ""
+
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr ""
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr ""
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr ""
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr ""
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr ""
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr ""
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr ""
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr ""
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr ""
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr ""
diff --git a/po/tr.po b/po/tr.po
index ec99127793..af34b52d52 100644
--- a/po/tr.po
+++ b/po/tr.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 1.4.50\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2013-04-22 18:35+0300\n"
 "Last-Translator: Ozan Çağlayan <ozancag@gmail.com>\n"
 "Language-Team: Türkçe <>\n"
@@ -17,72 +17,76 @@ msgstr ""
 "Plural-Forms: nplurals=1; plural=0;\n"
 "X-Generator: Gtranslator 2.91.6\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " - Yakalamayı durdurmak için Ctrl+Alt+G tuşlarına basın"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [Duraklatıldı]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "_Duraklat"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "_Sıfırla"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "_Kapat"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr ""
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr ""
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr ""
+
+#: ui/gtk.c:2048
 #, fuzzy
 msgid "Zoom _In"
 msgstr "Yakınlaş ve Sığ_dır"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 #, fuzzy
 msgid "Zoom _Out"
 msgstr "Yakınlaş ve Sığ_dır"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr ""
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "Yakınlaş ve Sığ_dır"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "Ü_zerindeyken Yakala"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "Girdiyi _Yakala"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "Se_kmeleri Göster"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr ""
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "_Makine"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "_Görüntüle"
diff --git a/po/zh_CN.po b/po/zh_CN.po
index b55b6f375a..d20b6c6981 100644
--- a/po/zh_CN.po
+++ b/po/zh_CN.po
@@ -6,7 +6,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: QEMU 2.2\n"
 "Report-Msgid-Bugs-To: qemu-devel@nongnu.org\n"
-"POT-Creation-Date: 2016-04-18 14:50+0100\n"
+"POT-Creation-Date: 2016-12-13 21:46+0000\n"
 "PO-Revision-Date: 2014-07-31 10:00+0800\n"
 "Last-Translator: Fam Zheng <famz@redhat.com>\n"
 "Language-Team: Chinese <zh@li.org>\n"
@@ -17,70 +17,74 @@ msgstr ""
 "Plural-Forms: nplurals=2; plural=n != 1;\n"
 "X-Generator: Lokalize 1.4\n"
 
-#: ui/gtk.c:273
+#: ui/gtk.c:275
 msgid " - Press Ctrl+Alt+G to release grab"
 msgstr " - 按下 Ctrl+Alt+G 取消捕获"
 
-#: ui/gtk.c:277
+#: ui/gtk.c:279
 msgid " [Paused]"
 msgstr " [已暂停]"
 
-#: ui/gtk.c:1833
+#: ui/gtk.c:1922
 msgid "_Pause"
 msgstr "暂停(_P)"
 
-#: ui/gtk.c:1839
+#: ui/gtk.c:1928
 msgid "_Reset"
 msgstr "重置(_R)"
 
-#: ui/gtk.c:1842
+#: ui/gtk.c:1931
 msgid "Power _Down"
 msgstr "关闭电源(_D)"
 
-#: ui/gtk.c:1848
+#: ui/gtk.c:1937
 msgid "_Quit"
 msgstr "退出(_Q)"
 
-#: ui/gtk.c:1940
+#: ui/gtk.c:2029
 msgid "_Fullscreen"
 msgstr "全屏(_F)"
 
-#: ui/gtk.c:1954
+#: ui/gtk.c:2032
+msgid "_Copy"
+msgstr "复制(_C)"
+
+#: ui/gtk.c:2048
 msgid "Zoom _In"
 msgstr "放大(_I)"
 
-#: ui/gtk.c:1961
+#: ui/gtk.c:2055
 msgid "Zoom _Out"
 msgstr "缩小(_O)"
 
-#: ui/gtk.c:1968
+#: ui/gtk.c:2062
 msgid "Best _Fit"
 msgstr "最合适大小(_F)"
 
-#: ui/gtk.c:1975
+#: ui/gtk.c:2069
 msgid "Zoom To _Fit"
 msgstr "缩放以适应大小(_F)"
 
-#: ui/gtk.c:1981
+#: ui/gtk.c:2075
 msgid "Grab On _Hover"
 msgstr "鼠标经过时捕获(_H)"
 
-#: ui/gtk.c:1984
+#: ui/gtk.c:2078
 msgid "_Grab Input"
 msgstr "捕获输入(_G)"
 
-#: ui/gtk.c:2013
+#: ui/gtk.c:2107
 msgid "Show _Tabs"
 msgstr "显示标签页(_T)"
 
-#: ui/gtk.c:2016
+#: ui/gtk.c:2110
 msgid "Detach Tab"
 msgstr "分离标签页"
 
-#: ui/gtk.c:2028
+#: ui/gtk.c:2122
 msgid "_Machine"
 msgstr "虚拟机(_M)"
 
-#: ui/gtk.c:2033
+#: ui/gtk.c:2127
 msgid "_View"
 msgstr "视图(_V)"
diff --git a/qapi-schema.json b/qapi-schema.json
index 5658723b37..a0d3b5d7c5 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -20,6 +20,27 @@
 # QAPI introspection
 { 'include': 'qapi/introspect.json' }
 
+##
+# @qmp_capabilities:
+#
+# Enable QMP capabilities.
+#
+# Arguments: None.
+#
+# Example:
+#
+# -> { "execute": "qmp_capabilities" }
+# <- { "return": {} }
+#
+# Notes: This command is valid exactly when first connecting: it must be
+# issued before any other command will be accepted, and will fail once the
+# monitor is accepting other commands. (see qemu docs/qmp-spec.txt)
+#
+# Since: 0.13
+#
+##
+{ 'command': 'qmp_capabilities' }
+
 ##
 # @LostTickPolicy:
 #
@@ -44,7 +65,8 @@
 { 'enum': 'LostTickPolicy',
   'data': ['discard', 'delay', 'merge', 'slew' ] }
 
-# @add_client
+##
+# @add_client:
 #
 # Allow client connections for VNC, Spice and socket based
 # character devices to be passed in to QEMU via SCM_RIGHTS.
@@ -75,7 +97,7 @@
 #
 # @name: #optional The name of the guest
 #
-# Since 0.14.0
+# Since: 0.14.0
 ##
 { 'struct': 'NameInfo', 'data': {'*name': 'str'} }
 
@@ -86,7 +108,7 @@
 #
 # Returns: @NameInfo of the guest
 #
-# Since 0.14.0
+# Since: 0.14.0
 ##
 { 'command': 'query-name', 'returns': 'NameInfo' }
 
@@ -115,7 +137,7 @@
 { 'command': 'query-kvm', 'returns': 'KvmInfo' }
 
 ##
-# @RunState
+# @RunState:
 #
 # An enumeration of VM run states.
 #
@@ -154,12 +176,16 @@
 # @watchdog: the watchdog action is configured to pause and has been triggered
 #
 # @guest-panicked: guest has been panicked as a result of guest OS panic
+#
+# @colo: guest is paused to save/restore VM state under colo checkpoint,
+#        VM can not get into this state unless colo capability is enabled
+#        for migration. (since 2.8)
 ##
 { 'enum': 'RunState',
   'data': [ 'debug', 'inmigrate', 'internal-error', 'io-error', 'paused',
             'postmigrate', 'prelaunch', 'finish-migrate', 'restore-vm',
             'running', 'save-vm', 'shutdown', 'suspended', 'watchdog',
-            'guest-panicked' ] }
+            'guest-panicked', 'colo' ] }
 
 ##
 # @StatusInfo:
@@ -210,7 +236,7 @@
 #
 # Returns: The @UuidInfo for the guest
 #
-# Since 0.14.0
+# Since: 0.14.0
 ##
 { 'command': 'query-uuid', 'returns': 'UuidInfo' }
 
@@ -357,7 +383,7 @@
 { 'command': 'query-events', 'returns': ['EventInfo'] }
 
 ##
-# @MigrationStats
+# @MigrationStats:
 #
 # Detailed migration status.
 #
@@ -371,7 +397,7 @@
 #
 # @skipped: number of skipped zero pages (since 1.5)
 #
-# @normal : number of normal pages (since 1.2)
+# @normal: number of normal pages (since 1.2)
 #
 # @normal-bytes: number of normal bytes sent (since 1.2)
 #
@@ -395,7 +421,7 @@
            'postcopy-requests' : 'int' } }
 
 ##
-# @XBZRLECacheStats
+# @XBZRLECacheStats:
 #
 # Detailed XBZRLE migration cache statistics
 #
@@ -418,6 +444,7 @@
            'cache-miss': 'int', 'cache-miss-rate': 'number',
            'overflow': 'int' } }
 
+##
 # @MigrationStatus:
 #
 # An enumeration of migration status.
@@ -438,15 +465,18 @@
 #
 # @failed: some error occurred during migration process.
 #
+# @colo: VM is in the process of fault tolerance, VM can not get into this
+#        state unless colo capability is enabled for migration. (since 2.8)
+#
 # Since: 2.3
 #
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
-            'active', 'postcopy-active', 'completed', 'failed' ] }
+            'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
 
 ##
-# @MigrationInfo
+# @MigrationInfo:
 #
 # Information about current migration process.
 #
@@ -506,7 +536,7 @@
            '*error-desc': 'str'} }
 
 ##
-# @query-migrate
+# @query-migrate:
 #
 # Returns information about current migration process.
 #
@@ -517,7 +547,7 @@
 { 'command': 'query-migrate', 'returns': 'MigrationInfo' }
 
 ##
-# @MigrationCapability
+# @MigrationCapability:
 #
 # Migration capabilities enumeration
 #
@@ -553,14 +583,19 @@
 #          been migrated, pulling the remaining pages along as needed. NOTE: If
 #          the migration fails during postcopy the VM will fail.  (since 2.6)
 #
+# @x-colo: If enabled, migration will never end, and the state of the VM on the
+#        primary side will be migrated continuously to the VM on secondary
+#        side, this process is called COarse-Grain LOck Stepping (COLO) for
+#        Non-stop Service. (since 2.8)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-           'compress', 'events', 'postcopy-ram'] }
+           'compress', 'events', 'postcopy-ram', 'x-colo'] }
 
 ##
-# @MigrationCapabilityStatus
+# @MigrationCapabilityStatus:
 #
 # Migration capability information
 #
@@ -574,7 +609,7 @@
   'data': { 'capability' : 'MigrationCapability', 'state' : 'bool' } }
 
 ##
-# @migrate-set-capabilities
+# @migrate-set-capabilities:
 #
 # Enable/Disable the following migration capabilities (like xbzrle)
 #
@@ -586,7 +621,7 @@
   'data': { 'capabilities': ['MigrationCapabilityStatus'] } }
 
 ##
-# @query-migrate-capabilities
+# @query-migrate-capabilities:
 #
 # Returns information about the current migration capabilities status
 #
@@ -596,7 +631,8 @@
 ##
 { 'command': 'query-migrate-capabilities', 'returns':   ['MigrationCapabilityStatus']}
 
-# @MigrationParameter
+##
+# @MigrationParameter:
 #
 # Migration parameters enumeration
 #
@@ -637,102 +673,94 @@
 #                hostname must be provided so that the server's x509
 #                certificate identity can be validated. (Since 2.7)
 #
+# @max-bandwidth: to set maximum speed for migration. maximum speed in
+#                 bytes per second. (Since 2.8)
+#
+# @downtime-limit: set maximum tolerated downtime for migration. maximum
+#                  downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: The delay time (in ms) between two COLO checkpoints in
+#          periodic mode. (Since 2.8)
+#
 # Since: 2.4
 ##
 { 'enum': 'MigrationParameter',
   'data': ['compress-level', 'compress-threads', 'decompress-threads',
            'cpu-throttle-initial', 'cpu-throttle-increment',
-           'tls-creds', 'tls-hostname'] }
+           'tls-creds', 'tls-hostname', 'max-bandwidth',
+           'downtime-limit', 'x-checkpoint-delay' ] }
 
+##
+# @migrate-set-parameters:
 #
-# @migrate-set-parameters
+# Set various migration parameters.  See MigrationParameters for details.
 #
-# Set the following migration parameters
+# Since: 2.4
+##
+{ 'command': 'migrate-set-parameters', 'boxed': true,
+  'data': 'MigrationParameters' }
+
+##
+# @MigrationParameters:
 #
-# @compress-level: compression level
+# Optional members can be omitted on input ('migrate-set-parameters')
+# but most members will always be present on output
+# ('query-migrate-parameters'), with the exception of tls-creds and
+# tls-hostname.
 #
-# @compress-threads: compression thread count
+# @compress-level: #optional compression level
 #
-# @decompress-threads: decompression thread count
+# @compress-threads: #optional compression thread count
 #
-# @cpu-throttle-initial: Initial percentage of time guest cpus are throttled
-#                        when migration auto-converge is activated. The
-#                        default value is 20. (Since 2.7)
+# @decompress-threads: #optional decompression thread count
 #
-# @cpu-throttle-increment: throttle percentage increase each time
+# @cpu-throttle-initial: #optional Initial percentage of time guest cpus are
+#                        throttledwhen migration auto-converge is activated.
+#                        The default value is 20. (Since 2.7)
+#
+# @cpu-throttle-increment: #optional throttle percentage increase each time
 #                          auto-converge detects that migration is not making
 #                          progress. The default value is 10. (Since 2.7)
 #
-# @tls-creds: ID of the 'tls-creds' object that provides credentials for
-#             establishing a TLS connection over the migration data channel.
-#             On the outgoing side of the migration, the credentials must
-#             be for a 'client' endpoint, while for the incoming side the
+# @tls-creds: #optional ID of the 'tls-creds' object that provides credentials
+#             for establishing a TLS connection over the migration data
+#             channel. On the outgoing side of the migration, the credentials
+#             must be for a 'client' endpoint, while for the incoming side the
 #             credentials must be for a 'server' endpoint. Setting this
 #             will enable TLS for all migrations. The default is unset,
 #             resulting in unsecured migration at the QEMU level. (Since 2.7)
 #
-# @tls-hostname: hostname of the target host for the migration. This is
-#                required when using x509 based TLS credentials and the
+# @tls-hostname: #optional hostname of the target host for the migration. This
+#                is required when using x509 based TLS credentials and the
 #                migration URI does not already include a hostname. For
 #                example if using fd: or exec: based migration, the
 #                hostname must be provided so that the server's x509
 #                certificate identity can be validated. (Since 2.7)
 #
+# @max-bandwidth: to set maximum speed for migration. maximum speed in
+#                 bytes per second. (Since 2.8)
+#
+# @downtime-limit: set maximum tolerated downtime for migration. maximum
+#                  downtime in milliseconds (Since 2.8)
+#
+# @x-checkpoint-delay: the delay time between two COLO checkpoints. (Since 2.8)
+#
 # Since: 2.4
 ##
-{ 'command': 'migrate-set-parameters',
+{ 'struct': 'MigrationParameters',
   'data': { '*compress-level': 'int',
             '*compress-threads': 'int',
             '*decompress-threads': 'int',
             '*cpu-throttle-initial': 'int',
             '*cpu-throttle-increment': 'int',
             '*tls-creds': 'str',
-            '*tls-hostname': 'str'} }
+            '*tls-hostname': 'str',
+            '*max-bandwidth': 'int',
+            '*downtime-limit': 'int',
+            '*x-checkpoint-delay': 'int'} }
 
-#
-# @MigrationParameters
-#
-# @compress-level: compression level
-#
-# @compress-threads: compression thread count
-#
-# @decompress-threads: decompression thread count
-#
-# @cpu-throttle-initial: Initial percentage of time guest cpus are throttled
-#                        when migration auto-converge is activated. The
-#                        default value is 20. (Since 2.7)
-#
-# @cpu-throttle-increment: throttle percentage increase each time
-#                          auto-converge detects that migration is not making
-#                          progress. The default value is 10. (Since 2.7)
-#
-# @tls-creds: ID of the 'tls-creds' object that provides credentials for
-#             establishing a TLS connection over the migration data channel.
-#             On the outgoing side of the migration, the credentials must
-#             be for a 'client' endpoint, while for the incoming side the
-#             credentials must be for a 'server' endpoint. Setting this
-#             will enable TLS for all migrations. The default is unset,
-#             resulting in unsecured migration at the QEMU level. (Since 2.7)
-#
-# @tls-hostname: hostname of the target host for the migration. This is
-#                required when using x509 based TLS credentials and the
-#                migration URI does not already include a hostname. For
-#                example if using fd: or exec: based migration, the
-#                hostname must be provided so that the server's x509
-#                certificate identity can be validated. (Since 2.7)
-#
-# Since: 2.4
-##
-{ 'struct': 'MigrationParameters',
-  'data': { 'compress-level': 'int',
-            'compress-threads': 'int',
-            'decompress-threads': 'int',
-            'cpu-throttle-initial': 'int',
-            'cpu-throttle-increment': 'int',
-            'tls-creds': 'str',
-            'tls-hostname': 'str'} }
 ##
-# @query-migrate-parameters
+# @query-migrate-parameters:
 #
 # Returns information about the current migration parameters
 #
@@ -744,7 +772,7 @@
   'returns': 'MigrationParameters' }
 
 ##
-# @client_migrate_info
+# @client_migrate_info:
 #
 # Set migration information for remote display.  This makes the server
 # ask the client to automatically reconnect using the new parameters
@@ -763,15 +791,88 @@
             '*tls-port': 'int', '*cert-subject': 'str' } }
 
 ##
-# @migrate-start-postcopy
+# @migrate-start-postcopy:
 #
 # Followup to a migration command to switch the migration to postcopy mode.
 # The postcopy-ram capability must be set before the original migration
 # command.
 #
 # Since: 2.5
+##
 { 'command': 'migrate-start-postcopy' }
 
+##
+# @COLOMessage:
+#
+# The message transmission between Primary side and Secondary side.
+#
+# @checkpoint-ready: Secondary VM (SVM) is ready for checkpointing
+#
+# @checkpoint-request: Primary VM (PVM) tells SVM to prepare for checkpointing
+#
+# @checkpoint-reply: SVM gets PVM's checkpoint request
+#
+# @vmstate-send: VM's state will be sent by PVM.
+#
+# @vmstate-size: The total size of VMstate.
+#
+# @vmstate-received: VM's state has been received by SVM.
+#
+# @vmstate-loaded: VM's state has been loaded by SVM.
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMessage',
+  'data': [ 'checkpoint-ready', 'checkpoint-request', 'checkpoint-reply',
+            'vmstate-send', 'vmstate-size', 'vmstate-received',
+            'vmstate-loaded' ] }
+
+##
+# @COLOMode:
+#
+# The colo mode
+#
+# @unknown: unknown mode
+#
+# @primary: master side
+#
+# @secondary: slave side
+#
+# Since: 2.8
+##
+{ 'enum': 'COLOMode',
+  'data': [ 'unknown', 'primary', 'secondary'] }
+
+##
+# @FailoverStatus:
+#
+# An enumeration of COLO failover status
+#
+# @none: no failover has ever happened
+#
+# @require: got failover requirement but not handled
+#
+# @active: in the process of doing failover
+#
+# @completed: finish the process of failover
+#
+# Since: 2.8
+##
+{ 'enum': 'FailoverStatus',
+  'data': [ 'none', 'require', 'active', 'completed'] }
+
+##
+# @x-colo-lost-heartbeat:
+#
+# Tell qemu that heartbeat is lost, request it to do takeover procedures.
+# If this command is sent to the PVM, the Primary side will exit COLO mode.
+# If sent to the Secondary, the Secondary side will run failover work,
+# then takes over server operation to become the service VM.
+#
+# Since: 2.8
+##
+{ 'command': 'x-colo-lost-heartbeat' }
+
 ##
 # @MouseInfo:
 #
@@ -855,7 +956,7 @@
 #
 # @pc: the 64-bit instruction pointer
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'CpuInfoX86', 'data': { 'pc': 'int' } }
 
@@ -868,7 +969,7 @@
 #
 # @npc: the NPC component of the instruction pointer
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'CpuInfoSPARC', 'data': { 'pc': 'int', 'npc': 'int' } }
 
@@ -879,7 +980,7 @@
 #
 # @nip: the instruction pointer
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'CpuInfoPPC', 'data': { 'nip': 'int' } }
 
@@ -890,7 +991,7 @@
 #
 # @PC: the instruction pointer
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'CpuInfoMIPS', 'data': { 'PC': 'int' } }
 
@@ -901,7 +1002,7 @@
 #
 # @PC: the instruction pointer
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'CpuInfoTricore', 'data': { 'PC': 'int' } }
 
@@ -910,7 +1011,7 @@
 #
 # No additional information is available about the virtual CPU
 #
-# Since 2.6
+# Since: 2.6
 #
 ##
 { 'struct': 'CpuInfoOther', 'data': { } }
@@ -945,7 +1046,7 @@
 #
 # Returns a list of information about each iothread.
 #
-# Note this list excludes the QEMU main loop thread, which is not declared
+# Note: this list excludes the QEMU main loop thread, which is not declared
 # using the -object iothread command-line option.  It is always the main thread
 # of the process.
 #
@@ -956,7 +1057,7 @@
 { 'command': 'query-iothreads', 'returns': ['IOThreadInfo'] }
 
 ##
-# @NetworkAddressFamily
+# @NetworkAddressFamily:
 #
 # The network address family
 #
@@ -966,15 +1067,17 @@
 #
 # @unix: unix socket
 #
+# @vsock: vsock family (since 2.8)
+#
 # @unknown: otherwise
 #
 # Since: 2.1
 ##
 { 'enum': 'NetworkAddressFamily',
-  'data': [ 'ipv4', 'ipv6', 'unix', 'unknown' ] }
+  'data': [ 'ipv4', 'ipv6', 'unix', 'vsock', 'unknown' ] }
 
 ##
-# @VncBasicInfo
+# @VncBasicInfo:
 #
 # The basic information for vnc network connection
 #
@@ -997,7 +1100,7 @@
             'websocket': 'bool' } }
 
 ##
-# @VncServerInfo
+# @VncServerInfo:
 #
 # The network connection information for server
 #
@@ -1068,7 +1171,7 @@
            '*service': 'str', '*auth': 'str', '*clients': ['VncClientInfo']} }
 
 ##
-# @VncPriAuth:
+# @VncPrimaryAuth:
 #
 # vnc primary authentication method.
 #
@@ -1147,7 +1250,7 @@
 { 'command': 'query-vnc-servers', 'returns': ['VncInfo2'] }
 
 ##
-# @SpiceBasicInfo
+# @SpiceBasicInfo:
 #
 # The basic information for SPICE network connection
 #
@@ -1165,7 +1268,7 @@
             'family': 'NetworkAddressFamily' } }
 
 ##
-# @SpiceServerInfo
+# @SpiceServerInfo:
 #
 # Information about a SPICE server
 #
@@ -1178,7 +1281,7 @@
   'data': { '*auth': 'str' } }
 
 ##
-# @SpiceChannel
+# @SpiceChannel:
 #
 # Information about a SPICE client channel.
 #
@@ -1203,7 +1306,7 @@
            'tls': 'bool'} }
 
 ##
-# @SpiceQueryMouseMode
+# @SpiceQueryMouseMode:
 #
 # An enumeration of Spice mouse states.
 #
@@ -1222,7 +1325,7 @@
   'data': [ 'client', 'server', 'unknown' ] }
 
 ##
-# @SpiceInfo
+# @SpiceInfo:
 #
 # Information about the SPICE session.
 #
@@ -1261,7 +1364,7 @@
            'mouse-mode': 'SpiceQueryMouseMode', '*channels': ['SpiceChannel']} }
 
 ##
-# @query-spice
+# @query-spice:
 #
 # Returns information about the current SPICE server
 #
@@ -1311,7 +1414,7 @@
 { 'struct': 'PciMemoryRange', 'data': {'base': 'int', 'limit': 'int'} }
 
 ##
-# @PciMemoryRegion
+# @PciMemoryRegion:
 #
 # Information about a PCI device I/O region.
 #
@@ -1520,7 +1623,7 @@
 { 'command': 'cpu', 'data': {'index': 'int'} }
 
 ##
-# @cpu-add
+# @cpu-add:
 #
 # Adds CPU with specified ID
 #
@@ -1528,7 +1631,7 @@
 #
 # Returns: Nothing on success
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'command': 'cpu-add', 'data': {'id': 'int'} }
 
@@ -1659,17 +1762,17 @@
 { 'command': 'balloon', 'data': {'value': 'int'} }
 
 ##
-# @Abort
+# @Abort:
 #
 # This action can be used to test transaction failure.
 #
 # Since: 1.6
-###
+##
 { 'struct': 'Abort',
   'data': { } }
 
 ##
-# @ActionCompletionMode
+# @ActionCompletionMode:
 #
 # An enumeration of Transactional completion modes.
 #
@@ -1689,12 +1792,12 @@
   'data': [ 'individual', 'grouped' ] }
 
 ##
-# @TransactionAction
+# @TransactionAction:
 #
 # A discriminated record of operations that can be performed with
 # @transaction.
 #
-# Since 1.1
+# Since: 1.1
 #
 # drive-backup since 1.6
 # abort since 1.6
@@ -1717,7 +1820,7 @@
    } }
 
 ##
-# @TransactionProperties
+# @TransactionProperties:
 #
 # Optional arguments to modify the behavior of a Transaction.
 #
@@ -1734,7 +1837,7 @@
 }
 
 ##
-# @transaction
+# @transaction:
 #
 # Executes a number of transactionable QMP commands atomically. If any
 # operation fails, then the entire set of actions will be abandoned and the
@@ -1754,7 +1857,7 @@
 # information on only one failed operation returned in an error condition, and
 # subsequent actions will not have been attempted.
 #
-# Since 1.1
+# Since: 1.1
 ##
 { 'command': 'transaction',
   'data': { 'actions': [ 'TransactionAction' ],
@@ -1791,7 +1894,7 @@
   'returns': 'str' }
 
 ##
-# @migrate_cancel
+# @migrate_cancel:
 #
 # Cancel the current executing migration process.
 #
@@ -1804,7 +1907,7 @@
 { 'command': 'migrate_cancel' }
 
 ##
-# @migrate_set_downtime
+# @migrate_set_downtime:
 #
 # Set maximum tolerated downtime for migration.
 #
@@ -1812,12 +1915,14 @@
 #
 # Returns: nothing on success
 #
+# Notes: This command is deprecated in favor of 'migrate-set-parameters'
+#
 # Since: 0.14.0
 ##
 { 'command': 'migrate_set_downtime', 'data': {'value': 'number'} }
 
 ##
-# @migrate_set_speed
+# @migrate_set_speed:
 #
 # Set maximum speed for migration.
 #
@@ -1825,14 +1930,14 @@
 #
 # Returns: nothing on success
 #
-# Notes: A value lesser than zero will be automatically round up to zero.
+# Notes: This command is deprecated in favor of 'migrate-set-parameters'
 #
 # Since: 0.14.0
 ##
 { 'command': 'migrate_set_speed', 'data': {'value': 'int'} }
 
 ##
-# @migrate-set-cache-size
+# @migrate-set-cache-size:
 #
 # Set XBZRLE cache size
 #
@@ -1848,7 +1953,7 @@
 { 'command': 'migrate-set-cache-size', 'data': {'value': 'int'} }
 
 ##
-# @query-migrate-cache-size
+# @query-migrate-cache-size:
 #
 # query XBZRLE cache size
 #
@@ -2113,7 +2218,7 @@
   'returns': [ 'DevicePropertyInfo' ] }
 
 ##
-# @migrate
+# @migrate:
 #
 # Migrates the current running guest to another Virtual Machine.
 #
@@ -2134,7 +2239,7 @@
   'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool', '*detach': 'bool' } }
 
 ##
-# @migrate-incoming
+# @migrate-incoming:
 #
 # Start an incoming migration, the qemu must have been started
 # with -incoming defer
@@ -2151,6 +2256,7 @@
 ##
 { 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
 
+##
 # @xen-save-devices-state:
 #
 # Save the state of all devices to file. The RAM and the block devices
@@ -2167,7 +2273,7 @@
 { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
 
 ##
-# @xen-set-global-dirty-log
+# @xen-set-global-dirty-log:
 #
 # Enable or disable the global dirty log mode.
 #
@@ -2179,6 +2285,46 @@
 ##
 { 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
 
+##
+# @device_add:
+#
+# @driver: the name of the new device's driver
+#
+# @bus: #optional the device's parent bus (device tree path)
+#
+# @id: the device's ID, must be unique
+#
+# Additional arguments depend on the type.
+#
+# Add a device.
+#
+# Notes:
+# 1. For detailed information about this command, please refer to the
+#    'docs/qdev-device-use.txt' file.
+#
+# 2. It's possible to list device properties by running QEMU with the
+#    "-device DEVICE,help" command-line argument, where DEVICE is the
+#    device's name
+#
+# Example:
+#
+# -> { "execute": "device_add",
+#      "arguments": { "driver": "e1000", "id": "net1",
+#                     "bus": "pci.0",
+#                     "mac": "52:54:00:12:34:56" } }
+# <- { "return": {} }
+#
+# TODO This command effectively bypasses QAPI completely due to its
+# "additional arguments" business.  It shouldn't have been added to
+# the schema in this form.  It should be qapified properly, or
+# replaced by a properly qapified command.
+#
+# Since: 0.13
+##
+{ 'command': 'device_add',
+  'data': {'driver': 'str', 'id': 'str'},
+  'gen': false } # so we can get the additional arguments
+
 ##
 # @device_del:
 #
@@ -2219,7 +2365,7 @@
   'data': [ 'elf', 'kdump-zlib', 'kdump-lzo', 'kdump-snappy' ] }
 
 ##
-# @dump-guest-memory
+# @dump-guest-memory:
 #
 # Dump guest's memory to vmcore. It is a synchronous operation that can take
 # very long depending on the amount of guest memory.
@@ -2273,7 +2419,7 @@
             '*format': 'DumpGuestMemoryFormat'} }
 
 ##
-# @DumpStatus
+# @DumpStatus:
 #
 # Describe the status of a long-running background guest memory dump.
 #
@@ -2285,13 +2431,13 @@
 #
 # @failed: the last dump has failed.
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'enum': 'DumpStatus',
   'data': [ 'none', 'active', 'completed', 'failed' ] }
 
 ##
-# @DumpQueryResult
+# @DumpQueryResult:
 #
 # The result format for 'query-dump'.
 #
@@ -2301,7 +2447,7 @@
 #
 # @total: total bytes to be written in latest dump (uncompressed)
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'struct': 'DumpQueryResult',
   'data': { 'status': 'DumpStatus',
@@ -2309,7 +2455,7 @@
             'total': 'int' } }
 
 ##
-# @query-dump
+# @query-dump:
 #
 # Query latest dump status.
 #
@@ -2344,7 +2490,7 @@
   'returns': 'DumpGuestMemoryCapability' }
 
 ##
-# @dump-skeys
+# @dump-skeys:
 #
 # Dump guest's storage keys
 #
@@ -2431,17 +2577,17 @@
 { 'command': 'object-del', 'data': {'id': 'str'} }
 
 ##
-# @NetdevNoneOptions
+# @NetdevNoneOptions:
 #
 # Use it alone to have zero network devices.
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevNoneOptions',
   'data': { } }
 
 ##
-# @NetLegacyNicOptions
+# @NetLegacyNicOptions:
 #
 # Create a new Network Interface Card.
 #
@@ -2455,7 +2601,7 @@
 #
 # @vectors: #optional number of MSI-x vectors, 0 to disable MSI-X
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetLegacyNicOptions',
   'data': {
@@ -2466,18 +2612,18 @@
     '*vectors': 'uint32' } }
 
 ##
-# @String
+# @String:
 #
 # A fat type wrapping 'str', to be embedded in lists.
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'String',
   'data': {
     'str': 'str' } }
 
 ##
-# @NetdevUserOptions
+# @NetdevUserOptions:
 #
 # Use the user mode network stack which requires no administrator privilege to
 # run.
@@ -2534,7 +2680,7 @@
 #
 # @guestfwd: #optional forward guest TCP connections
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevUserOptions',
   'data': {
@@ -2560,7 +2706,7 @@
     '*guestfwd':  ['String'] } }
 
 ##
-# @NetdevTapOptions
+# @NetdevTapOptions:
 #
 # Connect the host TAP network interface name to the VLAN.
 #
@@ -2575,6 +2721,8 @@
 #
 # @downscript: #optional script to shut down the interface
 #
+# @br: #optional bridge name (since 2.8)
+#
 # @helper: #optional command to execute to configure bridge
 #
 # @sndbuf: #optional send buffer limit. Understands [TGMKkb] suffixes.
@@ -2595,7 +2743,7 @@
 # @poll-us: #optional maximum number of microseconds that could
 # be spent on busy polling for tap (since 2.7)
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevTapOptions',
   'data': {
@@ -2604,6 +2752,7 @@
     '*fds':        'str',
     '*script':     'str',
     '*downscript': 'str',
+    '*br':         'str',
     '*helper':     'str',
     '*sndbuf':     'size',
     '*vnet_hdr':   'bool',
@@ -2615,7 +2764,7 @@
     '*poll-us':    'uint32'} }
 
 ##
-# @NetdevSocketOptions
+# @NetdevSocketOptions:
 #
 # Connect the VLAN to a remote VLAN in another QEMU virtual machine using a TCP
 # socket connection.
@@ -2632,7 +2781,7 @@
 #
 # @udp: #optional UDP unicast address and port number
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevSocketOptions',
   'data': {
@@ -2644,7 +2793,7 @@
     '*udp':       'str' } }
 
 ##
-# @NetdevL2TPv3Options
+# @NetdevL2TPv3Options:
 #
 # Connect the VLAN to Ethernet over L2TPv3 Static tunnel
 #
@@ -2680,7 +2829,7 @@
 # @offset: #optional additional offset - allows the insertion of
 #          additional application-specific data before the packet payload
 #
-# Since 2.1
+# Since: 2.1
 ##
 { 'struct': 'NetdevL2TPv3Options',
   'data': {
@@ -2700,7 +2849,7 @@
     '*offset':      'uint32' } }
 
 ##
-# @NetdevVdeOptions
+# @NetdevVdeOptions:
 #
 # Connect the VLAN to a vde switch running on the host.
 #
@@ -2712,7 +2861,7 @@
 #
 # @mode: #optional permissions for socket
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevVdeOptions',
   'data': {
@@ -2722,7 +2871,7 @@
     '*mode':  'uint16' } }
 
 ##
-# @NetdevDumpOptions
+# @NetdevDumpOptions:
 #
 # Dump VLAN network traffic to a file.
 #
@@ -2731,7 +2880,7 @@
 #
 # @file: #optional dump file path (default is qemu-vlan0.pcap)
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevDumpOptions',
   'data': {
@@ -2739,7 +2888,7 @@
     '*file': 'str' } }
 
 ##
-# @NetdevBridgeOptions
+# @NetdevBridgeOptions:
 #
 # Connect a host TAP network interface to a host bridge device.
 #
@@ -2747,7 +2896,7 @@
 #
 # @helper: #optional command to execute to configure bridge
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevBridgeOptions',
   'data': {
@@ -2755,20 +2904,20 @@
     '*helper': 'str' } }
 
 ##
-# @NetdevHubPortOptions
+# @NetdevHubPortOptions:
 #
 # Connect two or more net clients through a software hub.
 #
 # @hubid: hub identifier number
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetdevHubPortOptions',
   'data': {
     'hubid':     'int32' } }
 
 ##
-# @NetdevNetmapOptions
+# @NetdevNetmapOptions:
 #
 # Connect a client to a netmap-enabled NIC or to a VALE switch port
 #
@@ -2781,7 +2930,7 @@
 #
 # @devname: #optional path of the netmap device (default: '/dev/netmap').
 #
-# Since 2.0
+# Since: 2.0
 ##
 { 'struct': 'NetdevNetmapOptions',
   'data': {
@@ -2789,7 +2938,7 @@
     '*devname':    'str' } }
 
 ##
-# @NetdevVhostUserOptions
+# @NetdevVhostUserOptions:
 #
 # Vhost-user network backend
 #
@@ -2800,7 +2949,7 @@
 # @queues: #optional number of queues to be created for multiqueue vhost-user
 #          (default: 1) (Since 2.5)
 #
-# Since 2.1
+# Since: 2.1
 ##
 { 'struct': 'NetdevVhostUserOptions',
   'data': {
@@ -2809,18 +2958,18 @@
     '*queues':        'int' } }
 
 ##
-# @NetClientDriver
+# @NetClientDriver:
 #
 # Available netdev drivers.
 #
-# Since 2.7
+# Since: 2.7
 ##
 { 'enum': 'NetClientDriver',
   'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', 'dump',
             'bridge', 'hubport', 'netmap', 'vhost-user' ] }
 
 ##
-# @Netdev
+# @Netdev:
 #
 # Captures the configuration of a network device.
 #
@@ -2828,7 +2977,7 @@
 #
 # @type: Specify the driver used for interpreting remaining arguments.
 #
-# Since 1.2
+# Since: 1.2
 #
 # 'l2tpv3' - since 2.1
 ##
@@ -2850,7 +2999,7 @@
     'vhost-user': 'NetdevVhostUserOptions' } }
 
 ##
-# @NetLegacy
+# @NetLegacy:
 #
 # Captures the configuration of a network device; legacy.
 #
@@ -2862,7 +3011,7 @@
 #
 # @opts: device type specific properties (legacy)
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'struct': 'NetLegacy',
   'data': {
@@ -2872,11 +3021,11 @@
     'opts':  'NetLegacyOptions' } }
 
 ##
-# @NetLegacyOptions
+# @NetLegacyOptions:
 #
 # Like Netdev, but for use only by the legacy command line options
 #
-# Since 1.2
+# Since: 1.2
 ##
 { 'union': 'NetLegacyOptions',
   'data': {
@@ -2893,7 +3042,7 @@
     'vhost-user': 'NetdevVhostUserOptions' } }
 
 ##
-# @NetFilterDirection
+# @NetFilterDirection:
 #
 # Indicates whether a netfilter is attached to a netdev's transmit queue or
 # receive queue or both.
@@ -2907,13 +3056,13 @@
 # @tx: the filter is attached to the transmit queue of the netdev,
 #      where it will receive packets sent by the netdev.
 #
-# Since 2.5
+# Since: 2.5
 ##
 { 'enum': 'NetFilterDirection',
   'data': [ 'all', 'rx', 'tx' ] }
 
 ##
-# @InetSocketAddress
+# @InetSocketAddress:
 #
 # Captures a socket address or address range in the Internet namespace.
 #
@@ -2929,7 +3078,7 @@
 # @ipv6: whether to accept IPv6 addresses, default try both IPv4 and IPv6
 #        #optional
 #
-# Since 1.3
+# Since: 1.3
 ##
 { 'struct': 'InetSocketAddress',
   'data': {
@@ -2940,29 +3089,48 @@
     '*ipv6': 'bool' } }
 
 ##
-# @UnixSocketAddress
+# @UnixSocketAddress:
 #
 # Captures a socket address in the local ("Unix socket") namespace.
 #
 # @path: filesystem path to use
 #
-# Since 1.3
+# Since: 1.3
 ##
 { 'struct': 'UnixSocketAddress',
   'data': {
     'path': 'str' } }
 
 ##
-# @SocketAddress
+# @VsockSocketAddress:
+#
+# Captures a socket address in the vsock namespace.
+#
+# @cid: unique host identifier
+# @port: port
+#
+# Note: string types are used to allow for possible future hostname or
+# service resolution support.
+#
+# Since: 2.8
+##
+{ 'struct': 'VsockSocketAddress',
+  'data': {
+    'cid': 'str',
+    'port': 'str' } }
+
+##
+# @SocketAddress:
 #
 # Captures the address of a socket, which could also be a named file descriptor
 #
-# Since 1.3
+# Since: 1.3
 ##
 { 'union': 'SocketAddress',
   'data': {
     'inet': 'InetSocketAddress',
     'unix': 'UnixSocketAddress',
+    'vsock': 'VsockSocketAddress',
     'fd': 'String' } }
 
 ##
@@ -3006,7 +3174,7 @@
 #
 # @alias: #optional an alias for the machine name
 #
-# @default: #optional whether the machine is default
+# @is-default: #optional whether the machine is default
 #
 # @cpu-max: maximum number of CPUs supported by the machine type
 #           (since 1.5.0)
@@ -3038,10 +3206,43 @@
 #
 # @name: the name of the CPU definition
 #
+# @migration-safe: #optional whether a CPU definition can be safely used for
+#                  migration in combination with a QEMU compatibility machine
+#                  when migrating between different QMU versions and between
+#                  hosts with different sets of (hardware or software)
+#                  capabilities. If not provided, information is not available
+#                  and callers should not assume the CPU definition to be
+#                  migration-safe. (since 2.8)
+#
+# @static: whether a CPU definition is static and will not change depending on
+#          QEMU version, machine type, machine options and accelerator options.
+#          A static model is always migration-safe. (since 2.8)
+#
+# @unavailable-features: #optional List of properties that prevent
+#                        the CPU model from running in the current
+#                        host. (since 2.8)
+#
+# @unavailable-features is a list of QOM property names that
+# represent CPU model attributes that prevent the CPU from running.
+# If the QOM property is read-only, that means there's no known
+# way to make the CPU model run in the current host. Implementations
+# that choose not to provide specific information return the
+# property name "type".
+# If the property is read-write, it means that it MAY be possible
+# to run the CPU model in the current host if that property is
+# changed. Management software can use it as hints to suggest or
+# choose an alternative for the user, or just to generate meaningful
+# error messages explaining why the CPU model can't be used.
+# If @unavailable-features is an empty list, the CPU model is
+# runnable using the current host and machine-type.
+# If @unavailable-features is not present, runnability
+# information for the CPU is not available.
+#
 # Since: 1.2.0
 ##
 { 'struct': 'CpuDefinitionInfo',
-  'data': { 'name': 'str' } }
+  'data': { 'name': 'str', '*migration-safe': 'bool', 'static': 'bool',
+            '*unavailable-features': [ 'str' ] } }
 
 ##
 # @query-cpu-definitions:
@@ -3054,6 +3255,239 @@
 ##
 { 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'] }
 
+##
+# @CpuModelInfo:
+#
+# Virtual CPU model.
+#
+# A CPU model consists of the name of a CPU definition, to which
+# delta changes are applied (e.g. features added/removed). Most magic values
+# that an architecture might require should be hidden behind the name.
+# However, if required, architectures can expose relevant properties.
+#
+# @name: the name of the CPU definition the model is based on
+# @props: #optional a dictionary of QOM properties to be applied
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelInfo',
+  'data': { 'name': 'str',
+            '*props': 'any' } }
+
+##
+# @CpuModelExpansionType:
+#
+# An enumeration of CPU model expansion types.
+#
+# @static: Expand to a static CPU model, a combination of a static base
+#          model name and property delta changes. As the static base model will
+#          never change, the expanded CPU model will be the same, independant of
+#          independent of QEMU version, machine type, machine options, and
+#          accelerator options. Therefore, the resulting model can be used by
+#          tooling without having to specify a compatibility machine - e.g. when
+#          displaying the "host" model. static CPU models are migration-safe.
+#
+# @full: Expand all properties. The produced model is not guaranteed to be
+#        migration-safe, but allows tooling to get an insight and work with
+#        model details.
+#
+# Since: 2.8.0
+##
+{ 'enum': 'CpuModelExpansionType',
+  'data': [ 'static', 'full' ] }
+
+
+##
+# @CpuModelExpansionInfo:
+#
+# The result of a cpu model expansion.
+#
+# @model: the expanded CpuModelInfo.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelExpansionInfo',
+  'data': { 'model': 'CpuModelInfo' } }
+
+
+##
+# @query-cpu-model-expansion:
+#
+# Expands a given CPU model (or a combination of CPU model + additional options)
+# to different granularities, allowing tooling to get an understanding what a
+# specific CPU model looks like in QEMU under a certain configuration.
+#
+# This interface can be used to query the "host" CPU model.
+#
+# The data returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model  may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support all expansion types. s390x supports
+# "full" and "static".
+#
+# Returns: a CpuModelExpansionInfo. Returns an error if expanding CPU models is
+#          not supported, if the model cannot be expanded, if the model contains
+#          an unknown CPU definition name, unknown properties or properties
+#          with a wrong type. Also returns an error if an expansion type is
+#          not supported.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-expansion',
+  'data': { 'type': 'CpuModelExpansionType',
+            'model': 'CpuModelInfo' },
+  'returns': 'CpuModelExpansionInfo' }
+
+##
+# @CpuModelCompareResult:
+#
+# An enumeration of CPU model comparation results. The result is usually
+# calculated using e.g. CPU features or CPU generations.
+#
+# @incompatible: If model A is incompatible to model B, model A is not
+#                guaranteed to run where model B runs and the other way around.
+#
+# @identical: If model A is identical to model B, model A is guaranteed to run
+#             where model B runs and the other way around.
+#
+# @superset: If model A is a superset of model B, model B is guaranteed to run
+#            where model A runs. There are no guarantees about the other way.
+#
+# @subset: If model A is a subset of model B, model A is guaranteed to run
+#          where model B runs. There are no guarantees about the other way.
+#
+# Since: 2.8.0
+##
+{ 'enum': 'CpuModelCompareResult',
+  'data': [ 'incompatible', 'identical', 'superset', 'subset' ] }
+
+##
+# @CpuModelCompareInfo:
+#
+# The result of a CPU model comparison.
+#
+# @result: The result of the compare operation.
+# @responsible-properties: List of properties that led to the comparison result
+#                          not being identical.
+#
+# @responsible-properties is a list of QOM property names that led to
+# both CPUs not being detected as identical. For identical models, this
+# list is empty.
+# If a QOM property is read-only, that means there's no known way to make the
+# CPU models identical. If the special property name "type" is included, the
+# models are by definition not identical and cannot be made identical.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelCompareInfo',
+  'data': {'result': 'CpuModelCompareResult',
+           'responsible-properties': ['str']
+          }
+}
+
+##
+# @query-cpu-model-comparison:
+#
+# Compares two CPU models, returning how they compare in a specific
+# configuration. The results indicates how both models compare regarding
+# runnability. This result can be used by tooling to make decisions if a
+# certain CPU model will run in a certain configuration or if a compatible
+# CPU model has to be created by baselining.
+#
+# Usually, a CPU model is compared against the maximum possible CPU model
+# of a certain configuration (e.g. the "host" model for KVM). If that CPU
+# model is identical or a subset, it will run in that configuration.
+#
+# The result returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support comparing CPU models. s390x supports
+# comparing CPU models.
+#
+# Returns: a CpuModelBaselineInfo. Returns an error if comparing CPU models is
+#          not supported, if a model cannot be used, if a model contains
+#          an unknown cpu definition name, unknown properties or properties
+#          with wrong types.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-comparison',
+  'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' },
+  'returns': 'CpuModelCompareInfo' }
+
+##
+# @CpuModelBaselineInfo:
+#
+# The result of a CPU model baseline.
+#
+# @model: the baselined CpuModelInfo.
+#
+# Since: 2.8.0
+##
+{ 'struct': 'CpuModelBaselineInfo',
+  'data': { 'model': 'CpuModelInfo' } }
+
+##
+# @query-cpu-model-baseline:
+#
+# Baseline two CPU models, creating a compatible third model. The created
+# model will always be a static, migration-safe CPU model (see "static"
+# CPU model expansion for details).
+#
+# This interface can be used by tooling to create a compatible CPU model out
+# two CPU models. The created CPU model will be identical to or a subset of
+# both CPU models when comparing them. Therefore, the created CPU model is
+# guaranteed to run where the given CPU models run.
+#
+# The result returned by this command may be affected by:
+#
+# * QEMU version: CPU models may look different depending on the QEMU version.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine-type: CPU model may look different depending on the machine-type.
+#   (Except for CPU models reported as "static" in query-cpu-definitions.)
+# * machine options (including accelerator): in some architectures, CPU models
+#   may look different depending on machine and accelerator options. (Except for
+#   CPU models reported as "static" in query-cpu-definitions.)
+# * "-cpu" arguments and global properties: arguments to the -cpu option and
+#   global properties may affect expansion of CPU models. Using
+#   query-cpu-model-expansion while using these is not advised.
+#
+# Some architectures may not support baselining CPU models. s390x supports
+# baselining CPU models.
+#
+# Returns: a CpuModelBaselineInfo. Returns an error if baselining CPU models is
+#          not supported, if a model cannot be used, if a model contains
+#          an unknown cpu definition name, unknown properties or properties
+#          with wrong types.
+#
+# Since: 2.8.0
+##
+{ 'command': 'query-cpu-model-baseline',
+  'data': { 'modela': 'CpuModelInfo',
+            'modelb': 'CpuModelInfo' },
+  'returns': 'CpuModelBaselineInfo' }
+
+##
 # @AddfdInfo:
 #
 # Information about a file descriptor that was added to an fd set.
@@ -3178,15 +3612,19 @@
 ##
 # @QKeyCode:
 #
+# @unmapped: since 2.0
+# @pause: since 2.0
+# @ro: since 2.4
+# @kp_comma: since 2.4
+# @kp_equals: since 2.6
+# @power: since 2.6
+#
 # An enumeration of key name.
 #
 # This is used by the send-key command.
 #
 # Since: 1.3.0
 #
-# 'unmapped' and 'pause' since 2.0
-# 'ro' and 'kp_comma' since 2.4
-# 'kp_equals' and 'power' since 2.6
 ##
 { 'enum': 'QKeyCode',
   'data': [ 'unmapped',
@@ -3208,7 +3646,7 @@
             'kp_comma', 'kp_equals', 'power' ] }
 
 ##
-# @KeyValue
+# @KeyValue:
 #
 # Represents a keyboard key.
 #
@@ -3293,7 +3731,6 @@
 #
 # @device: The name of the special file for the device,
 #          i.e. /dev/ttyS0 on Unix or COM1: on Windows
-# @type: What kind of device this is.
 #
 # Since: 1.4
 ##
@@ -3558,7 +3995,7 @@
 #
 # A union referencing different TPM backend types' configuration options
 #
-# @passthrough: The configuration options for the TPM passthrough type
+# @type: 'passthrough' The configuration options for the TPM passthrough type
 #
 # Since: 1.5
 ##
@@ -3566,7 +4003,7 @@
    'data': { 'passthrough' : 'TPMPassthroughOptions' } }
 
 ##
-# @TpmInfo:
+# @TPMInfo:
 #
 # Information about the TPM
 #
@@ -3595,7 +4032,7 @@
 { 'command': 'query-tpm', 'returns': ['TPMInfo'] }
 
 ##
-# @AcpiTableOptions
+# @AcpiTableOptions:
 #
 # Specify an ACPI table on the command line to load.
 #
@@ -3638,7 +4075,7 @@
 #        ACPI table header. At least one file is required. This field excludes
 #        @file.
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'struct': 'AcpiTableOptions',
   'data': {
@@ -3666,7 +4103,7 @@
 # @size: accepts a number followed by an optional suffix (K)ilo,
 #        (M)ega, (G)iga, (T)era
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'enum': 'CommandLineParameterType',
   'data': ['string', 'boolean', 'number', 'size'] }
@@ -3684,7 +4121,7 @@
 #
 # @default: #optional default value string (since 2.1)
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'struct': 'CommandLineParameterInfo',
   'data': { 'name': 'str',
@@ -3701,7 +4138,7 @@
 #
 # @parameters: an array of @CommandLineParameterInfo
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'struct': 'CommandLineOptionInfo',
   'data': { 'option': 'str', 'parameters': ['CommandLineParameterInfo'] } }
@@ -3716,13 +4153,13 @@
 # Returns: list of @CommandLineOptionInfo for all options (or for the given
 #          @option).  Returns an error if the given @option doesn't exist.
 #
-# Since 1.5
+# Since: 1.5
 ##
 {'command': 'query-command-line-options', 'data': { '*option': 'str' },
  'returns': ['CommandLineOptionInfo'] }
 
 ##
-# @X86CPURegister32
+# @X86CPURegister32:
 #
 # A X86 32-bit register
 #
@@ -3732,7 +4169,7 @@
   'data': [ 'EAX', 'EBX', 'ECX', 'EDX', 'ESP', 'EBP', 'ESI', 'EDI' ] }
 
 ##
-# @X86CPUFeatureWordInfo
+# @X86CPUFeatureWordInfo:
 #
 # Information about a X86 CPU feature word
 #
@@ -3754,11 +4191,11 @@
             'features': 'int' } }
 
 ##
-# @DummyForceArrays
+# @DummyForceArrays:
 #
 # Not used by QMP; hack to let us use X86CPUFeatureWordInfoList internally
 #
-# Since 2.5
+# Since: 2.5
 ##
 { 'struct': 'DummyForceArrays',
   'data': { 'unused': ['X86CPUFeatureWordInfo'] } }
@@ -3808,9 +4245,8 @@
 #
 # @multicast-table: a list of multicast macaddr string
 #
-# Since 1.6
+# Since: 1.6
 ##
-
 { 'struct': 'RxFilterInfo',
   'data': {
     'name':               'str',
@@ -3844,7 +4280,7 @@
   'returns': ['RxFilterInfo'] }
 
 ##
-# @InputButton
+# @InputButton:
 #
 # Button of a pointer input device (mouse, tablet).
 #
@@ -3854,7 +4290,7 @@
   'data'  : [ 'left', 'middle', 'right', 'wheel-up', 'wheel-down' ] }
 
 ##
-# @InputAxis
+# @InputAxis:
 #
 # Position axis of a pointer input device (mouse, tablet).
 #
@@ -3864,7 +4300,7 @@
   'data'  : [ 'x', 'y' ] }
 
 ##
-# @InputKeyEvent
+# @InputKeyEvent:
 #
 # Keyboard input event.
 #
@@ -3878,7 +4314,7 @@
               'down'    : 'bool' } }
 
 ##
-# @InputBtnEvent
+# @InputBtnEvent:
 #
 # Pointer button input event.
 #
@@ -3892,7 +4328,7 @@
               'down'    : 'bool' } }
 
 ##
-# @InputMoveEvent
+# @InputMoveEvent:
 #
 # Pointer motion input event.
 #
@@ -3907,14 +4343,15 @@
               'value'   : 'int' } }
 
 ##
-# @InputEvent
+# @InputEvent:
 #
 # Input event union.
 #
-# @key: Input event of Keyboard
-# @btn: Input event of pointer buttons
-# @rel: Input event of relative pointer motion
-# @abs: Input event of absolute pointer motion
+# @type: the input type, one of:
+#  - 'key': Input event of Keyboard
+#  - 'btn': Input event of pointer buttons
+#  - 'rel': Input event of relative pointer motion
+#  - 'abs': Input event of absolute pointer motion
 #
 # Since: 2.0
 ##
@@ -3925,7 +4362,7 @@
               'abs'     : 'InputMoveEvent' } }
 
 ##
-# @input-send-event
+# @input-send-event:
 #
 # Send input event(s) to guest.
 #
@@ -3955,18 +4392,18 @@
             'events' : [ 'InputEvent' ] } }
 
 ##
-# @NumaOptions
+# @NumaOptions:
 #
 # A discriminated record of NUMA options. (for OptsVisitor)
 #
-# Since 2.1
+# Since: 2.1
 ##
 { 'union': 'NumaOptions',
   'data': {
     'node': 'NumaNodeOptions' }}
 
 ##
-# @NumaNodeOptions
+# @NumaNodeOptions:
 #
 # Create a guest NUMA node. (for OptsVisitor)
 #
@@ -3992,7 +4429,7 @@
    '*memdev': 'str' }}
 
 ##
-# @HostMemPolicy
+# @HostMemPolicy:
 #
 # Host memory policy types
 #
@@ -4006,7 +4443,7 @@
 # @interleave: memory allocations are interleaved across the set
 #              of host nodes specified
 #
-# Since 2.1
+# Since: 2.1
 ##
 { 'enum': 'HostMemPolicy',
   'data': [ 'default', 'preferred', 'bind', 'interleave' ] }
@@ -4030,7 +4467,6 @@
 #
 # Since: 2.1
 ##
-
 { 'struct': 'Memdev',
   'data': {
     'size':       'size',
@@ -4096,7 +4532,7 @@
 { 'union': 'MemoryDeviceInfo', 'data': {'dimm': 'PCDIMMDeviceInfo'} }
 
 ##
-# @query-memory-devices
+# @query-memory-devices:
 #
 # Lists available memory devices and their state
 #
@@ -4104,14 +4540,16 @@
 ##
 { 'command': 'query-memory-devices', 'returns': ['MemoryDeviceInfo'] }
 
-## @ACPISlotType
+##
+# @ACPISlotType:
 #
 # @DIMM: memory slot
 # @CPU: logical CPU slot (since 2.7)
-#
+##
 { 'enum': 'ACPISlotType', 'data': [ 'DIMM', 'CPU' ] }
 
-## @ACPIOSTInfo
+##
+# @ACPIOSTInfo:
 #
 # OSPM Status Indication for a device
 # For description of possible values of @source and @status fields
@@ -4137,7 +4575,7 @@
               'status': 'int' } }
 
 ##
-# @query-acpi-ospm-status
+# @query-acpi-ospm-status:
 #
 # Lists ACPI OSPM status of ACPI device objects,
 # which might be reported via _OST method
@@ -4147,7 +4585,7 @@
 { 'command': 'query-acpi-ospm-status', 'returns': ['ACPIOSTInfo'] }
 
 ##
-# @WatchdogExpirationAction
+# @WatchdogExpirationAction:
 #
 # An enumeration of the actions taken when the watchdog device's timer is
 # expired
@@ -4175,7 +4613,7 @@
             'inject-nmi' ] }
 
 ##
-# @IoOperationType
+# @IoOperationType:
 #
 # An enumeration of the I/O operation types
 #
@@ -4189,19 +4627,19 @@
   'data': [ 'read', 'write' ] }
 
 ##
-# @GuestPanicAction
+# @GuestPanicAction:
 #
 # An enumeration of the actions taken when guest OS panic is detected
 #
 # @pause: system pauses
 #
-# Since: 2.1
+# Since: 2.1 (poweroff since 2.8)
 ##
 { 'enum': 'GuestPanicAction',
-  'data': [ 'pause' ] }
+  'data': [ 'pause', 'poweroff' ] }
 
 ##
-# @rtc-reset-reinjection
+# @rtc-reset-reinjection:
 #
 # This command will reset the RTC interrupt reinjection backlog.
 # Can be used if another mechanism to synchronize guest time
@@ -4216,7 +4654,7 @@
 { 'include': 'qapi/rocker.json' }
 
 ##
-# ReplayMode:
+# @ReplayMode:
 #
 # Mode of the replay subsystem.
 #
@@ -4284,7 +4722,7 @@
 { 'command': 'query-gic-capabilities', 'returns': ['GICCapability'] }
 
 ##
-# CpuInstanceProperties
+# @CpuInstanceProperties:
 #
 # List of properties to be used for hotplugging a CPU instance,
 # it should be passed by management with device_add command when
@@ -4312,7 +4750,7 @@
 }
 
 ##
-# @HotpluggableCPU
+# @HotpluggableCPU:
 #
 # @type: CPU object type for usage with device_add command
 # @props: list of properties to be used for hotplugging CPU
@@ -4331,7 +4769,7 @@
 }
 
 ##
-# @query-hotpluggable-cpus
+# @query-hotpluggable-cpus:
 #
 # Returns: a list of HotpluggableCPU objects.
 #
diff --git a/qapi/Makefile.objs b/qapi/Makefile.objs
index 7ea4aebb00..33906ff321 100644
--- a/qapi/Makefile.objs
+++ b/qapi/Makefile.objs
@@ -1,5 +1,5 @@
-util-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qmp-input-visitor.o
-util-obj-y += qmp-output-visitor.o qmp-registry.o qmp-dispatch.o
+util-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qobject-input-visitor.o
+util-obj-y += qobject-output-visitor.o qmp-registry.o qmp-dispatch.o
 util-obj-y += string-input-visitor.o string-output-visitor.o
 util-obj-y += opts-visitor.o qapi-clone-visitor.o
 util-obj-y += qmp-event.o
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5e2d7d78d2..6b42216960 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -6,7 +6,7 @@
 { 'include': 'common.json' }
 
 ##
-# @SnapshotInfo
+# @SnapshotInfo:
 #
 # @id: unique snapshot id
 #
@@ -25,7 +25,6 @@
 # Since: 1.3
 #
 ##
-
 { 'struct': 'SnapshotInfo',
   'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
             'date-sec': 'int', 'date-nsec': 'int',
@@ -81,7 +80,6 @@
 #
 # Since: 1.7
 ##
-
 { 'union': 'ImageInfoSpecific',
   'data': {
       'qcow2': 'ImageInfoSpecificQCow2',
@@ -129,7 +127,6 @@
 # Since: 1.3
 #
 ##
-
 { 'struct': 'ImageInfo',
   'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
            '*actual-size': 'int', 'virtual-size': 'int',
@@ -181,7 +178,6 @@
 # Since: 1.4
 #
 ##
-
 { 'struct': 'ImageCheck',
   'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
            '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
@@ -217,7 +213,7 @@
            '*filename': 'str' } }
 
 ##
-# @BlockdevCacheInfo
+# @BlockdevCacheInfo:
 #
 # Cache mode information for a block device
 #
@@ -247,11 +243,12 @@
 #       0.14.0 this can be: 'blkdebug', 'bochs', 'cloop', 'cow', 'dmg',
 #       'file', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
 #       'http', 'https', 'luks', 'nbd', 'parallels', 'qcow',
-#       'qcow2', 'raw', 'tftp', 'vdi', 'vmdk', 'vpc', 'vvfat'
+#       'qcow2', 'raw', 'vdi', 'vmdk', 'vpc', 'vvfat'
 #       2.2: 'archipelago' added, 'cow' dropped
 #       2.3: 'host_floppy' deprecated
 #       2.5: 'host_floppy' dropped
 #       2.6: 'luks' added
+#       2.8: 'replication' added, 'tftp' dropped
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -381,7 +378,7 @@
 # @offset: if present, the image file stores the data for this range in
 #          raw format at the given offset.
 #
-# Since 1.7
+# Since: 1.7
 ##
 { 'struct': 'BlockDeviceMapEntry',
   'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool',
@@ -517,7 +514,6 @@
 #
 # Since: 2.5
 ##
-
 { 'struct': 'BlockDeviceTimedStats',
   'data': { 'interval_length': 'int', 'min_rd_latency_ns': 'int',
             'max_rd_latency_ns': 'int', 'avg_rd_latency_ns': 'int',
@@ -794,7 +790,7 @@
                                       '*node-name': 'str', 'password': 'str'} }
 
 ##
-# @block_resize
+# @block_resize:
 #
 # Resize a block image while a guest is running.
 #
@@ -816,7 +812,7 @@
                                        'size': 'int' }}
 
 ##
-# @NewImageMode
+# @NewImageMode:
 #
 # An enumeration that tells QEMU how to set the backing file path in
 # a new image file.
@@ -833,7 +829,7 @@
   'data': [ 'existing', 'absolute-paths' ] }
 
 ##
-# @BlockdevSnapshotSync
+# @BlockdevSnapshotSync:
 #
 # Either @device or @node-name must be set but not both.
 #
@@ -856,7 +852,7 @@
             '*format': 'str', '*mode': 'NewImageMode' } }
 
 ##
-# @BlockdevSnapshot
+# @BlockdevSnapshot:
 #
 # @node: device or node name that will have a snapshot created.
 #
@@ -865,18 +861,18 @@
 #           It must not have a current backing file (this can be
 #           achieved by passing "backing": "" to blockdev-add).
 #
-# Since 2.5
+# Since: 2.5
 ##
 { 'struct': 'BlockdevSnapshot',
   'data': { 'node': 'str', 'overlay': 'str' } }
 
 ##
-# @DriveBackup
+# @DriveBackup:
 #
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device: the name of the device which should be copied.
+# @device: the device name or node-name of a root node which should be copied.
 #
 # @target: the target of the new image. If the file exists, or if it
 #          is a device, the existing file/device will be used as the new
@@ -898,6 +894,9 @@
 #          Must be present if sync is "incremental", must NOT be present
 #          otherwise. (Since 2.4)
 #
+# @compress: #optional true to compress data, if the target format supports it.
+#            (default: false) (since 2.8)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -906,26 +905,26 @@
 #                   default 'report' (no limitations, since this applies to
 #                   a different block device than @device).
 #
-# Note that @on-source-error and @on-target-error only affect background I/O.
-# If an error occurs during a guest write request, the device's rerror/werror
-# actions will be used.
+# Note: @on-source-error and @on-target-error only affect background
+# I/O.  If an error occurs during a guest write request, the device's
+# rerror/werror actions will be used.
 #
 # Since: 1.6
 ##
 { 'struct': 'DriveBackup',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             '*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int', '*bitmap': 'str',
+            '*speed': 'int', '*bitmap': 'str', '*compress': 'bool',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
-# @BlockdevBackup
+# @BlockdevBackup:
 #
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device: the name of the device which should be copied.
+# @device: the device name or node-name of a root node which should be copied.
 #
 # @target: the device name or node-name of the backup target node.
 #
@@ -936,6 +935,9 @@
 # @speed: #optional the maximum speed, in bytes per second. The default is 0,
 #         for unlimited.
 #
+# @compress: #optional true to compress data, if the target format supports it.
+#            (default: false) (since 2.8)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -944,9 +946,9 @@
 #                   default 'report' (no limitations, since this applies to
 #                   a different block device than @device).
 #
-# Note that @on-source-error and @on-target-error only affect background I/O.
-# If an error occurs during a guest write request, the device's rerror/werror
-# actions will be used.
+# Note: @on-source-error and @on-target-error only affect background
+# I/O.  If an error occurs during a guest write request, the device's
+# rerror/werror actions will be used.
 #
 # Since: 2.3
 ##
@@ -954,11 +956,12 @@
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
             'sync': 'MirrorSyncMode',
             '*speed': 'int',
+            '*compress': 'bool',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
 ##
-# @blockdev-snapshot-sync
+# @blockdev-snapshot-sync:
 #
 # Generates a synchronous snapshot of a block device.
 #
@@ -967,26 +970,26 @@
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #
-# Since 0.14.0
+# Since: 0.14.0
 ##
 { 'command': 'blockdev-snapshot-sync',
   'data': 'BlockdevSnapshotSync' }
 
 
 ##
-# @blockdev-snapshot
+# @blockdev-snapshot:
 #
 # Generates a snapshot of a block device.
 #
 # For the arguments, see the documentation of BlockdevSnapshot.
 #
-# Since 2.5
+# Since: 2.5
 ##
 { 'command': 'blockdev-snapshot',
   'data': 'BlockdevSnapshot' }
 
 ##
-# @change-backing-file
+# @change-backing-file:
 #
 # Change the backing file in the image file metadata.  This does not
 # cause QEMU to reopen the image file to reparse the backing filename
@@ -998,7 +1001,8 @@
 # @image-node-name: The name of the block driver state node of the
 #                   image to modify.
 #
-# @device:          The name of the device that owns image-node-name.
+# @device:          The device name or node-name of the root node that owns
+#                   image-node-name.
 #
 # @backing-file:    The string to write as the backing file.  This
 #                   string is not validated, so care should be taken
@@ -1012,7 +1016,7 @@
             'backing-file': 'str' } }
 
 ##
-# @block-commit
+# @block-commit:
 #
 # Live commit of data from overlay image nodes into backing nodes - i.e.,
 # writes data between 'top' and 'base' into 'base'.
@@ -1020,7 +1024,7 @@
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device:  the name of the device
+# @device:  the device name or node-name of a root node
 #
 # @base:   #optional The file name of the backing image to write data into.
 #                    If not specified, this is the deepest backing image
@@ -1075,7 +1079,7 @@
             '*backing-file': 'str', '*speed': 'int' } }
 
 ##
-# @drive-backup
+# @drive-backup:
 #
 # Start a point-in-time copy of a block device to a new destination.  The
 # status of ongoing drive-backup operations can be checked with
@@ -1086,14 +1090,15 @@
 # For the arguments, see the documentation of DriveBackup.
 #
 # Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not a valid block device, GenericError
 #
-# Since 1.6
+# Since: 1.6
 ##
-{ 'command': 'drive-backup', 'data': 'DriveBackup' }
+{ 'command': 'drive-backup', 'boxed': true,
+  'data': 'DriveBackup' }
 
 ##
-# @blockdev-backup
+# @blockdev-backup:
 #
 # Start a point-in-time copy of a block device to a new destination.  The
 # status of ongoing blockdev-backup operations can be checked with
@@ -1103,46 +1108,51 @@
 #
 # For the arguments, see the documentation of BlockdevBackup.
 #
-# Since 2.3
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since: 2.3
 ##
-{ 'command': 'blockdev-backup', 'data': 'BlockdevBackup' }
+{ 'command': 'blockdev-backup', 'boxed': true,
+  'data': 'BlockdevBackup' }
 
 
 ##
-# @query-named-block-nodes
+# @query-named-block-nodes:
 #
 # Get the named block driver list
 #
 # Returns: the list of BlockDeviceInfo
 #
-# Since 2.0
+# Since: 2.0
 ##
 { 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] }
 
 ##
-# @drive-mirror
+# @drive-mirror:
 #
 # Start mirroring a block device's writes to a new destination.
 #
 # See DriveMirror for parameter descriptions
 #
 # Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not a valid block device, GenericError
 #
-# Since 1.3
+# Since: 1.3
 ##
 { 'command': 'drive-mirror', 'boxed': true,
   'data': 'DriveMirror' }
 
 ##
-# DriveMirror
+# @DriveMirror:
 #
 # A set of parameters describing drive mirror setup.
 #
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device:  the name of the device whose writes should be mirrored.
+# @device:  the device name or node-name of a root node whose writes should be
+#           mirrored.
 #
 # @target: the target of the new image. If the file exists, or if it
 #          is a device, the existing file/device will be used as the new
@@ -1188,7 +1198,7 @@
 #         written. Both will result in identical contents.
 #         Default is true. (Since 2.4)
 #
-# Since 1.3
+# Since: 1.3
 ##
 { 'struct': 'DriveMirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
@@ -1200,19 +1210,19 @@
             '*unmap': 'bool' } }
 
 ##
-# @BlockDirtyBitmap
+# @BlockDirtyBitmap:
 #
 # @node: name of device/node which the bitmap is tracking
 #
 # @name: name of the dirty bitmap
 #
-# Since 2.4
+# Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmap',
   'data': { 'node': 'str', 'name': 'str' } }
 
 ##
-# @BlockDirtyBitmapAdd
+# @BlockDirtyBitmapAdd:
 #
 # @node: name of device/node which the bitmap is tracking
 #
@@ -1221,13 +1231,13 @@
 # @granularity: #optional the bitmap granularity, default is 64k for
 #               block-dirty-bitmap-add
 #
-# Since 2.4
+# Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmapAdd',
   'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
 
 ##
-# @block-dirty-bitmap-add
+# @block-dirty-bitmap-add:
 #
 # Create a dirty bitmap with a name on the node
 #
@@ -1235,13 +1245,13 @@
 #          If @node is not a valid block device or node, DeviceNotFound
 #          If @name is already taken, GenericError with an explanation
 #
-# Since 2.4
+# Since: 2.4
 ##
 { 'command': 'block-dirty-bitmap-add',
   'data': 'BlockDirtyBitmapAdd' }
 
 ##
-# @block-dirty-bitmap-remove
+# @block-dirty-bitmap-remove:
 #
 # Remove a dirty bitmap on the node
 #
@@ -1250,13 +1260,13 @@
 #          If @name is not found, GenericError with an explanation
 #          if @name is frozen by an operation, GenericError
 #
-# Since 2.4
+# Since: 2.4
 ##
 { 'command': 'block-dirty-bitmap-remove',
   'data': 'BlockDirtyBitmap' }
 
 ##
-# @block-dirty-bitmap-clear
+# @block-dirty-bitmap-clear:
 #
 # Clear (reset) a dirty bitmap on the device
 #
@@ -1264,20 +1274,21 @@
 #          If @node is not a valid block device, DeviceNotFound
 #          If @name is not found, GenericError with an explanation
 #
-# Since 2.4
+# Since: 2.4
 ##
 { 'command': 'block-dirty-bitmap-clear',
   'data': 'BlockDirtyBitmap' }
 
 ##
-# @blockdev-mirror
+# @blockdev-mirror:
 #
 # Start mirroring a block device's writes to a new destination.
 #
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device: the name of the device whose writes should be mirrored.
+# @device: The device name or node-name of a root node whose writes should be
+#          mirrored.
 #
 # @target: the id or node-name of the block device to mirror to. This mustn't be
 #          attached to guest.
@@ -1310,7 +1321,7 @@
 #
 # Returns: nothing on success.
 #
-# Since 2.6
+# Since: 2.6
 ##
 { 'command': 'blockdev-mirror',
   'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
@@ -1357,11 +1368,13 @@
   'data': 'BlockIOThrottle' }
 
 ##
-# BlockIOThrottle
+# @BlockIOThrottle:
 #
 # A set of parameters describing block throttling.
 #
-# @device: The name of the device
+# @device: #optional Block device name (deprecated, use @id instead)
+#
+# @id: #optional The name or QOM path of the guest device (since: 2.8)
 #
 # @bps: total throughput limit in bytes per second
 #
@@ -1430,8 +1443,8 @@
 # Since: 1.1
 ##
 { 'struct': 'BlockIOThrottle',
-  'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
-            'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
+  'data': { '*device': 'str', '*id': 'str', 'bps': 'int', 'bps_rd': 'int',
+            'bps_wr': 'int', 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
             '*bps_max': 'int', '*bps_rd_max': 'int',
             '*bps_wr_max': 'int', '*iops_max': 'int',
             '*iops_rd_max': 'int', '*iops_wr_max': 'int',
@@ -1451,6 +1464,13 @@
 # with query-block-jobs.  The operation can be stopped before it has completed
 # using the block-job-cancel command.
 #
+# The node that receives the data is called the top image, can be located in
+# any part of the chain (but always above the base image; see below) and can be
+# specified using its device or node name. Earlier qemu versions only allowed
+# 'device' to name the top level node; presence of the 'base-node' parameter
+# during introspection can be used as a witness of the enhanced semantics
+# of 'device'.
+#
 # If a base file is specified then sectors are not copied from that base file and
 # its backing chain.  When streaming completes the image file will have the base
 # file as its backing file.  This can be used to stream a subset of the backing
@@ -1462,12 +1482,16 @@
 # @job-id: #optional identifier for the newly-created block job. If
 #          omitted, the device name will be used. (Since 2.7)
 #
-# @device: the device name
+# @device: the device or node name of the top image
+#
+# @base:   #optional the common backing file name.
+#                    It cannot be set if @base-node is also set.
 #
-# @base:   #optional the common backing file name
+# @base-node: #optional the node name of the backing file.
+#                       It cannot be set if @base is also set. (Since 2.8)
 #
-# @backing-file: #optional The backing file string to write into the active
-#                          layer. This filename is not validated.
+# @backing-file: #optional The backing file string to write into the top
+#                          image. This filename is not validated.
 #
 #                          If a pathname string is such that it cannot be
 #                          resolved by QEMU, that means that subsequent QMP or
@@ -1487,14 +1511,11 @@
 #            'stop' and 'enospc' can only be used if the block device
 #            supports io-status (see BlockInfo).  Since 1.3.
 #
-# Returns: Nothing on success
-#          If @device does not exist, DeviceNotFound
-#
 # Since: 1.1
 ##
 { 'command': 'block-stream',
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
-            '*backing-file': 'str', '*speed': 'int',
+            '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
             '*on-error': 'BlockdevOnError' } }
 
 ##
@@ -1629,7 +1650,7 @@
 { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
 
 ##
-# @BlockdevDiscardOptions
+# @BlockdevDiscardOptions:
 #
 # Determines how to handle discard requests.
 #
@@ -1642,7 +1663,7 @@
   'data': [ 'ignore', 'unmap' ] }
 
 ##
-# @BlockdevDetectZeroesOptions
+# @BlockdevDetectZeroesOptions:
 #
 # Describes the operation mode for the automatic conversion of plain
 # zero writes by the OS to driver specific optimized zero write commands.
@@ -1658,7 +1679,7 @@
   'data': [ 'off', 'on', 'unmap' ] }
 
 ##
-# @BlockdevAioOptions
+# @BlockdevAioOptions:
 #
 # Selects the AIO backend to handle I/O requests
 #
@@ -1671,7 +1692,7 @@
   'data': [ 'threads', 'native' ] }
 
 ##
-# @BlockdevCacheOptions
+# @BlockdevCacheOptions:
 #
 # Includes cache-related options for block devices
 #
@@ -1687,37 +1708,44 @@
             '*no-flush': 'bool' } }
 
 ##
-# @BlockdevDriver
+# @BlockdevDriver:
 #
 # Drivers that are supported in block device operations.
 #
-# @host_device, @host_cdrom: Since 2.1
+# @host_device: Since 2.1
+# @host_cdrom: Since 2.1
 # @gluster: Since 2.7
+# @nbd: Since 2.8
+# @nfs: Since 2.8
+# @replication: Since 2.8
+# @ssh: Since 2.8
 #
 # Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
             'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
-            'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
-            'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp',
-            'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+            'host_device', 'http', 'https', 'luks', 'nbd', 'nfs', 'null-aio',
+            'null-co', 'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
+            'replication', 'ssh', 'vdi', 'vhdx', 'vmdk', 'vpc',
+            'vvfat' ] }
 
 ##
-# @BlockdevOptionsFile
+# @BlockdevOptionsFile:
 #
-# Driver specific block device options for the file backend and similar
-# protocols.
+# Driver specific block device options for the file backend.
 #
 # @filename:    path to the image file
+# @aio:         #optional AIO backend (default: threads) (since: 2.8)
 #
 # Since: 1.7
 ##
 { 'struct': 'BlockdevOptionsFile',
-  'data': { 'filename': 'str' } }
+  'data': { 'filename': 'str',
+            '*aio': 'BlockdevAioOptions' } }
 
 ##
-# @BlockdevOptionsNull
+# @BlockdevOptionsNull:
 #
 # Driver specific block device options for the null backend.
 #
@@ -1732,7 +1760,7 @@
   'data': { '*size': 'int', '*latency-ns': 'uint64' } }
 
 ##
-# @BlockdevOptionsVVFAT
+# @BlockdevOptionsVVFAT:
 #
 # Driver specific block device options for the vvfat protocol.
 #
@@ -1753,7 +1781,7 @@
             '*label': 'str', '*rw': 'bool' } }
 
 ##
-# @BlockdevOptionsGenericFormat
+# @BlockdevOptionsGenericFormat:
 #
 # Driver specific block device options for image format that have no option
 # besides their data source.
@@ -1766,7 +1794,7 @@
   'data': { 'file': 'BlockdevRef' } }
 
 ##
-# @BlockdevOptionsLUKS
+# @BlockdevOptionsLUKS:
 #
 # Driver specific block device options for LUKS.
 #
@@ -1782,7 +1810,7 @@
 
 
 ##
-# @BlockdevOptionsGenericCOWFormat
+# @BlockdevOptionsGenericCOWFormat:
 #
 # Driver specific block device options for image format that have no option
 # besides their data source and an optional backing file.
@@ -1799,7 +1827,7 @@
   'data': { '*backing': 'BlockdevRef' } }
 
 ##
-# @Qcow2OverlapCheckMode
+# @Qcow2OverlapCheckMode:
 #
 # General overlap check modes.
 #
@@ -1819,7 +1847,7 @@
   'data': [ 'none', 'constant', 'cached', 'all' ] }
 
 ##
-# @Qcow2OverlapCheckFlags
+# @Qcow2OverlapCheckFlags:
 #
 # Structure of flags for each metadata structure. Setting a field to 'true'
 # makes qemu guard that structure against unintended overwriting. The default
@@ -1842,7 +1870,7 @@
             '*inactive-l2':    'bool' } }
 
 ##
-# @Qcow2OverlapChecks
+# @Qcow2OverlapChecks:
 #
 # Specifies which metadata structures should be guarded against unintended
 # overwriting.
@@ -1859,7 +1887,7 @@
             'mode':  'Qcow2OverlapCheckMode' } }
 
 ##
-# @BlockdevOptionsQcow2
+# @BlockdevOptionsQcow2:
 #
 # Driver specific block device options for qcow2.
 #
@@ -1909,7 +1937,7 @@
 
 
 ##
-# @BlockdevOptionsArchipelago
+# @BlockdevOptionsArchipelago:
 #
 # Driver specific block device options for Archipelago.
 #
@@ -1937,9 +1965,28 @@
             '*vport': 'int',
             '*segment': 'str' } }
 
+##
+# @BlockdevOptionsSsh:
+#
+# @server:              host address
+#
+# @path:                path to the image on the host
+#
+# @user:                #optional user as which to connect, defaults to current
+#                       local user name
+#
+# TODO: Expose the host_key_check option in QMP
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsSsh',
+  'data': { 'server': 'InetSocketAddress',
+            'path': 'str',
+            '*user': 'str' } }
+
 
 ##
-# @BlkdebugEvent
+# @BlkdebugEvent:
 #
 # Trigger events supported by blkdebug.
 #
@@ -1962,7 +2009,7 @@
             'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] }
 
 ##
-# @BlkdebugInjectErrorOptions
+# @BlkdebugInjectErrorOptions:
 #
 # Describes a single error injection for blkdebug.
 #
@@ -1994,7 +2041,7 @@
             '*immediately': 'bool' } }
 
 ##
-# @BlkdebugSetStateOptions
+# @BlkdebugSetStateOptions:
 #
 # Describes a single state-change event for blkdebug.
 #
@@ -2014,7 +2061,7 @@
             'new_state': 'int' } }
 
 ##
-# @BlockdevOptionsBlkdebug
+# @BlockdevOptionsBlkdebug:
 #
 # Driver specific block device options for blkdebug.
 #
@@ -2039,7 +2086,7 @@
             '*set-state': ['BlkdebugSetStateOptions'] } }
 
 ##
-# @BlockdevOptionsBlkverify
+# @BlockdevOptionsBlkverify:
 #
 # Driver specific block device options for blkverify.
 #
@@ -2054,7 +2101,7 @@
             'raw': 'BlockdevRef' } }
 
 ##
-# @QuorumReadPattern
+# @QuorumReadPattern:
 #
 # An enumeration of quorum read patterns.
 #
@@ -2067,7 +2114,7 @@
 { 'enum': 'QuorumReadPattern', 'data': [ 'quorum', 'fifo' ] }
 
 ##
-# @BlockdevOptionsQuorum
+# @BlockdevOptionsQuorum:
 #
 # Driver specific block device options for Quorum
 #
@@ -2094,7 +2141,7 @@
             '*read-pattern': 'QuorumReadPattern' } }
 
 ##
-# @GlusterTransport
+# @GlusterTransport:
 #
 # An enumeration of Gluster transport types
 #
@@ -2109,7 +2156,7 @@
 
 
 ##
-# @GlusterServer
+# @GlusterServer:
 #
 # Captures the address of a socket
 #
@@ -2117,9 +2164,17 @@
 #
 # @type:       Transport type used for gluster connection
 #
-# @unix:       socket file
+# This is similar to SocketAddress, only distinction:
+#
+# 1. GlusterServer is a flat union, SocketAddress is a simple union.
+#    A flat union is nicer than simple because it avoids nesting
+#    (i.e. more {}) on the wire.
+#
+# 2. GlusterServer lacks case 'fd', since gluster doesn't let you
+#    pass in a file descriptor.
 #
-# @tcp:        host address and port number
+# GlusterServer is actually not Gluster-specific, its a
+# compatibility evolved into an alternate for SocketAddress.
 #
 # Since: 2.7
 ##
@@ -2130,7 +2185,7 @@
             'tcp': 'InetSocketAddress' } }
 
 ##
-# @BlockdevOptionsGluster
+# @BlockdevOptionsGluster:
 #
 # Driver specific block device options for Gluster
 #
@@ -2140,7 +2195,10 @@
 #
 # @server:      gluster servers description
 #
-# @debug-level: #optional libgfapi log level (default '4' which is Error)
+# @debug:       #optional libgfapi log level (default '4' which is Error)
+#               (Since 2.8)
+#
+# @logfile:     #optional libgfapi log file (default /dev/stderr) (Since 2.8)
 #
 # Since: 2.7
 ##
@@ -2148,25 +2206,163 @@
   'data': { 'volume': 'str',
             'path': 'str',
             'server': ['GlusterServer'],
-            '*debug-level': 'int' } }
+            '*debug': 'int',
+            '*logfile': 'str' } }
+
+##
+# @ReplicationMode:
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.8
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
+
+##
+# @BlockdevOptionsReplication:
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# @top-id: #optional In secondary mode, node name or device ID of the root
+#          node who owns the replication node chain. Must not be given in
+#          primary mode.
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode',
+            '*top-id': 'str' } }
+
+##
+# @NFSTransport:
+#
+# An enumeration of NFS transport types
+#
+# @inet:        TCP transport
+#
+# Since: 2.8
+##
+{ 'enum': 'NFSTransport',
+  'data': [ 'inet' ] }
+
+##
+# @NFSServer:
+#
+# Captures the address of the socket
+#
+# @type:        transport type used for NFS (only TCP supported)
+#
+# @host:        host address for NFS server
+#
+# Since: 2.8
+##
+{ 'struct': 'NFSServer',
+  'data': { 'type': 'NFSTransport',
+            'host': 'str' } }
+
+##
+# @BlockdevOptionsNfs:
+#
+# Driver specific block device option for NFS
+#
+# @server:                  host address
+#
+# @path:                    path of the image on the host
+#
+# @user:                    #optional UID value to use when talking to the
+#                           server (defaults to 65534 on Windows and getuid()
+#                           on unix)
+#
+# @group:                   #optional GID value to use when talking to the
+#                           server (defaults to 65534 on Windows and getgid()
+#                           in unix)
+#
+# @tcp-syn-count:           #optional number of SYNs during the session
+#                           establishment (defaults to libnfs default)
+#
+# @readahead-size:          #optional set the readahead size in bytes (defaults
+#                           to libnfs default)
+#
+# @page-cache-size:         #optional set the pagecache size in bytes (defaults
+#                           to libnfs default)
+#
+# @debug:                   #optional set the NFS debug level (max 2) (defaults
+#                           to libnfs default)
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsNfs',
+  'data': { 'server': 'NFSServer',
+            'path': 'str',
+            '*user': 'int',
+            '*group': 'int',
+            '*tcp-syn-count': 'int',
+            '*readahead-size': 'int',
+            '*page-cache-size': 'int',
+            '*debug': 'int' } }
+
+##
+# @BlockdevOptionsCurl:
+#
+# Driver specific block device options for the curl backend.
+#
+# @filename:    path to the image file
+#
+# Since: 1.7
+##
+{ 'struct': 'BlockdevOptionsCurl',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsNbd:
+#
+# Driver specific block device options for NBD.
+#
+# @server:      NBD server address
+#
+# @export:      #optional export name
+#
+# @tls-creds:   #optional TLS credentials ID
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsNbd',
+  'data': { 'server': 'SocketAddress',
+            '*export': 'str',
+            '*tls-creds': 'str' } }
 
 ##
-# @BlockdevOptions
+# @BlockdevOptionsRaw:
+#
+# Driver specific block device options for the raw driver.
+#
+# @offset:      #optional position where the block device starts
+# @size:        #optional the assumed size of the device
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsRaw',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*offset': 'int', '*size': 'int' } }
+
+##
+# @BlockdevOptions:
 #
 # Options for creating a block device.  Many options are available for all
 # block devices, independent of the block driver:
 #
 # @driver:        block driver name
-# @id:            #optional id by which the new block device can be referred to.
-#                 This option is only allowed on the top level of blockdev-add.
-#                 A BlockBackend will be created by blockdev-add if and only if
-#                 this option is given.
-# @node-name:     #optional the name of a block driver state node (Since 2.0).
-#                 This option is required on the top level of blockdev-add if
-#                 the @id option is not given there.
+# @node-name:     #optional the node name of the new node (Since 2.0).
+#                 This option is required on the top level of blockdev-add.
 # @discard:       #optional discard-related options (default: ignore)
 # @cache:         #optional cache-related options
-# @aio:           #optional AIO backend (default: threads)
 # @read-only:     #optional whether the block device should be read-only
 #                 (default: false)
 # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
@@ -2178,12 +2374,9 @@
 ##
 { 'union': 'BlockdevOptions',
   'base': { 'driver': 'BlockdevDriver',
-# TODO 'id' is a BB-level option, remove it
-            '*id': 'str',
             '*node-name': 'str',
             '*discard': 'BlockdevDiscardOptions',
             '*cache': 'BlockdevCacheOptions',
-            '*aio': 'BlockdevAioOptions',
             '*read-only': 'bool',
             '*detect-zeroes': 'BlockdevDetectZeroesOptions' },
   'discriminator': 'driver',
@@ -2195,17 +2388,17 @@
       'cloop':      'BlockdevOptionsGenericFormat',
       'dmg':        'BlockdevOptionsGenericFormat',
       'file':       'BlockdevOptionsFile',
-      'ftp':        'BlockdevOptionsFile',
-      'ftps':       'BlockdevOptionsFile',
+      'ftp':        'BlockdevOptionsCurl',
+      'ftps':       'BlockdevOptionsCurl',
       'gluster':    'BlockdevOptionsGluster',
       'host_cdrom': 'BlockdevOptionsFile',
       'host_device':'BlockdevOptionsFile',
-      'http':       'BlockdevOptionsFile',
-      'https':      'BlockdevOptionsFile',
+      'http':       'BlockdevOptionsCurl',
+      'https':      'BlockdevOptionsCurl',
 # TODO iscsi: Wait for structured options
       'luks':       'BlockdevOptionsLUKS',
-# TODO nbd: Should take InetSocketAddress for 'host'?
-# TODO nfs: Wait for structured options
+      'nbd':        'BlockdevOptionsNbd',
+      'nfs':        'BlockdevOptionsNfs',
       'null-aio':   'BlockdevOptionsNull',
       'null-co':    'BlockdevOptionsNull',
       'parallels':  'BlockdevOptionsGenericFormat',
@@ -2213,11 +2406,11 @@
       'qcow':       'BlockdevOptionsGenericCOWFormat',
       'qed':        'BlockdevOptionsGenericCOWFormat',
       'quorum':     'BlockdevOptionsQuorum',
-      'raw':        'BlockdevOptionsGenericFormat',
+      'raw':        'BlockdevOptionsRaw',
 # TODO rbd: Wait for structured options
+      'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
-# TODO ssh: Should take InetSocketAddress for 'host'?
-      'tftp':       'BlockdevOptionsFile',
+      'ssh':        'BlockdevOptionsSsh',
       'vdi':        'BlockdevOptionsGenericFormat',
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
@@ -2226,7 +2419,7 @@
   } }
 
 ##
-# @BlockdevRef
+# @BlockdevRef:
 #
 # Reference to a block device.
 #
@@ -2252,39 +2445,28 @@
 # block drivers among other things.  Stay away from it unless you want
 # to help with its development.
 #
-# @options: block device options for the new device
+# For the arguments, see the documentation of BlockdevOptions.
 #
 # Since: 1.7
 ##
-{ 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
+{ 'command': 'blockdev-add', 'data': 'BlockdevOptions', 'boxed': true }
 
 ##
 # @x-blockdev-del:
 #
 # Deletes a block device that has been added using blockdev-add.
-# The selected device can be either a block backend or a graph node.
-#
-# In the former case the backend will be destroyed, along with its
-# inserted medium if there's any. The command will fail if the backend
-# or its medium are in use.
-#
-# In the latter case the node will be destroyed. The command will fail
-# if the node is attached to a block backend or is otherwise being
-# used.
-#
-# One of @id or @node-name must be specified, but not both.
+# The command will fail if the node is attached to a device or is
+# otherwise being used.
 #
 # This command is still a work in progress and is considered
 # experimental. Stay away from it unless you want to help with its
 # development.
 #
-# @id: #optional Name of the block backend device to delete.
-#
-# @node-name: #optional Name of the graph node to delete.
+# @node-name: Name of the graph node to delete.
 #
 # Since: 2.5
 ##
-{ 'command': 'x-blockdev-del', 'data': { '*id': 'str', '*node-name': 'str' } }
+{ 'command': 'x-blockdev-del', 'data': { 'node-name': 'str' } }
 
 ##
 # @blockdev-open-tray:
@@ -2304,7 +2486,9 @@
 #   to it
 # - if the guest device does not have an actual tray
 #
-# @device: block device name
+# @device: #optional Block device name (deprecated, use @id instead)
+#
+# @id:     #optional The name or QOM path of the guest device (since: 2.8)
 #
 # @force:  #optional if false (the default), an eject request will be sent to
 #          the guest if it has locked the tray (and the tray will not be opened
@@ -2314,7 +2498,8 @@
 # Since: 2.5
 ##
 { 'command': 'blockdev-open-tray',
-  'data': { 'device': 'str',
+  'data': { '*device': 'str',
+            '*id': 'str',
             '*force': 'bool' } }
 
 ##
@@ -2326,12 +2511,15 @@
 #
 # If the tray was already closed before, this will be a no-op.
 #
-# @device: block device name
+# @device:  #optional Block device name (deprecated, use @id instead)
+#
+# @id:      #optional The name or QOM path of the guest device (since: 2.8)
 #
 # Since: 2.5
 ##
 { 'command': 'blockdev-close-tray',
-  'data': { 'device': 'str' } }
+  'data': { '*device': 'str',
+            '*id': 'str' } }
 
 ##
 # @x-blockdev-remove-medium:
@@ -2345,12 +2533,15 @@
 # This command is still a work in progress and is considered experimental.
 # Stay away from it unless you want to help with its development.
 #
-# @device: block device name
+# @device: #optional Block device name (deprecated, use @id instead)
+#
+# @id:     #optional The name or QOM path of the guest device (since: 2.8)
 #
 # Since: 2.5
 ##
 { 'command': 'x-blockdev-remove-medium',
-  'data': { 'device': 'str' } }
+  'data': { '*device': 'str',
+            '*id': 'str' } }
 
 ##
 # @x-blockdev-insert-medium:
@@ -2362,14 +2553,17 @@
 # This command is still a work in progress and is considered experimental.
 # Stay away from it unless you want to help with its development.
 #
-# @device:    block device name
+# @device:    #optional Block device name (deprecated, use @id instead)
+#
+# @id:        #optional The name or QOM path of the guest device (since: 2.8)
 #
 # @node-name: name of a node in the block driver state graph
 #
 # Since: 2.5
 ##
 { 'command': 'x-blockdev-insert-medium',
-  'data': { 'device': 'str',
+  'data': { '*device': 'str',
+            '*id': 'str',
             'node-name': 'str'} }
 
 
@@ -2399,7 +2593,10 @@
 # combines blockdev-open-tray, x-blockdev-remove-medium,
 # x-blockdev-insert-medium and blockdev-close-tray).
 #
-# @device:          block device name
+# @device:          #optional Block device name (deprecated, use @id instead)
+#
+# @id:              #optional The name or QOM path of the guest device
+#                   (since: 2.8)
 #
 # @filename:        filename of the new image to be loaded
 #
@@ -2412,14 +2609,15 @@
 # Since: 2.5
 ##
 { 'command': 'blockdev-change-medium',
-  'data': { 'device': 'str',
+  'data': { '*device': 'str',
+            '*id': 'str',
             'filename': 'str',
             '*format': 'str',
             '*read-only-mode': 'BlockdevChangeReadOnlyMode' } }
 
 
 ##
-# @BlockErrorAction
+# @BlockErrorAction:
 #
 # An enumeration of action that has been taken when a DISK I/O occurs
 #
@@ -2436,7 +2634,7 @@
 
 
 ##
-# @BLOCK_IMAGE_CORRUPTED
+# @BLOCK_IMAGE_CORRUPTED:
 #
 # Emitted when a corruption has been detected in a disk image
 #
@@ -2471,11 +2669,17 @@
             'fatal'      : 'bool' } }
 
 ##
-# @BLOCK_IO_ERROR
+# @BLOCK_IO_ERROR:
 #
 # Emitted when a disk I/O error occurs
 #
-# @device: device name
+# @device: device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @node-name: node name. Note that errors may be reported for the root node
+#             that is directly attached to a guest device rather than for the
+#             node where the error occurred. (Since: 2.8)
 #
 # @operation: I/O operation
 #
@@ -2496,12 +2700,12 @@
 # Since: 0.13.0
 ##
 { 'event': 'BLOCK_IO_ERROR',
-  'data': { 'device': 'str', 'operation': 'IoOperationType',
+  'data': { 'device': 'str', 'node-name': 'str', 'operation': 'IoOperationType',
             'action': 'BlockErrorAction', '*nospace': 'bool',
             'reason': 'str' } }
 
 ##
-# @BLOCK_JOB_COMPLETED
+# @BLOCK_JOB_COMPLETED:
 #
 # Emitted when a block job has completed
 #
@@ -2533,7 +2737,7 @@
             '*error': 'str' } }
 
 ##
-# @BLOCK_JOB_CANCELLED
+# @BLOCK_JOB_CANCELLED:
 #
 # Emitted when a block job has been cancelled
 #
@@ -2559,7 +2763,7 @@
             'speed' : 'int' } }
 
 ##
-# @BLOCK_JOB_ERROR
+# @BLOCK_JOB_ERROR:
 #
 # Emitted when a block job encounters an error
 #
@@ -2578,7 +2782,7 @@
             'action'   : 'BlockErrorAction' } }
 
 ##
-# @BLOCK_JOB_READY
+# @BLOCK_JOB_READY:
 #
 # Emitted when a block job is ready to complete
 #
@@ -2606,7 +2810,8 @@
             'offset': 'int',
             'speed' : 'int' } }
 
-# @PreallocMode
+##
+# @PreallocMode:
 #
 # Preallocation mode of QEMU image file
 #
@@ -2618,13 +2823,13 @@
 #        space is really available. @full preallocation also sets up
 #        metadata correctly.
 #
-# Since 2.2
+# Since: 2.2
 ##
 { 'enum': 'PreallocMode',
   'data': [ 'off', 'metadata', 'falloc', 'full' ] }
 
 ##
-# @BLOCK_WRITE_THRESHOLD
+# @BLOCK_WRITE_THRESHOLD:
 #
 # Emitted when writes on block device reaches or exceeds the
 # configured write threshold. For thin-provisioned devices, this
@@ -2647,7 +2852,7 @@
             'write-threshold': 'uint64' } }
 
 ##
-# @block-set-write-threshold
+# @block-set-write-threshold:
 #
 # Change the write threshold for a block drive. An event will be delivered
 # if a write to this block drive crosses the configured threshold.
@@ -2665,7 +2870,7 @@
   'data': { 'node-name': 'str', 'write-threshold': 'uint64' } }
 
 ##
-# @x-blockdev-change
+# @x-blockdev-change:
 #
 # Dynamically reconfigure the block driver state graph. It can be used
 # to add, remove, insert or replace a graph node. Currently only the
diff --git a/qapi/block.json b/qapi/block.json
index 937337dce5..8e9f59019a 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -40,7 +40,7 @@
   'data': ['auto', 'none', 'lba', 'large', 'rechs']}
 
 ##
-# @FloppyDriveType
+# @FloppyDriveType:
 #
 # Type of Floppy drive to be emulated by the Floppy Disk Controller.
 #
@@ -56,9 +56,10 @@
   'data': ['144', '288', '120', 'none', 'auto']}
 
 ##
-# @BlockdevSnapshotInternal
+# @BlockdevSnapshotInternal:
 #
-# @device: the name of the device to generate the snapshot from
+# @device: the device name or node-name of a root node to generate the snapshot
+#          from
 #
 # @name: the name of the internal snapshot to be created
 #
@@ -72,7 +73,7 @@
   'data': { 'device': 'str', 'name': 'str' } }
 
 ##
-# @blockdev-snapshot-internal-sync
+# @blockdev-snapshot-internal-sync:
 #
 # Synchronously take an internal snapshot of a block device, when the format
 # of the image used supports it.
@@ -80,39 +81,40 @@
 # For the arguments, see the documentation of BlockdevSnapshotInternal.
 #
 # Returns: nothing on success
-#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not a valid block device, GenericError
 #          If any snapshot matching @name exists, or @name is empty,
 #          GenericError
 #          If the format of the image used does not support it,
 #          BlockFormatFeatureNotSupported
 #
-# Since 1.7
+# Since: 1.7
 ##
 { 'command': 'blockdev-snapshot-internal-sync',
   'data': 'BlockdevSnapshotInternal' }
 
 ##
-# @blockdev-snapshot-delete-internal-sync
+# @blockdev-snapshot-delete-internal-sync:
 #
 # Synchronously delete an internal snapshot of a block device, when the format
 # of the image used support it. The snapshot is identified by name or id or
 # both. One of the name or id is required. Return SnapshotInfo for the
 # successfully deleted snapshot.
 #
-# @device: the name of the device to delete the snapshot from
+# @device: the device name or node-name of a root node to delete the snapshot
+#          from
 #
 # @id: optional the snapshot's ID to be deleted
 #
 # @name: optional the snapshot's name to be deleted
 #
 # Returns: SnapshotInfo on success
-#          If @device is not a valid block device, DeviceNotFound
+#          If @device is not a valid block device, GenericError
 #          If snapshot not found, GenericError
 #          If the format of the image used does not support it,
 #          BlockFormatFeatureNotSupported
 #          If @id and @name are both not specified, GenericError
 #
-# Since 1.7
+# Since: 1.7
 ##
 { 'command': 'blockdev-snapshot-delete-internal-sync',
   'data': { 'device': 'str', '*id': 'str', '*name': 'str'},
@@ -123,7 +125,9 @@
 #
 # Ejects a device from a removable drive.
 #
-# @device:  The name of the device
+# @device:  #optional Block device name (deprecated, use @id instead)
+#
+# @id:      #optional The name or QOM path of the guest device (since: 2.8)
 #
 # @force:   @optional If true, eject regardless of whether the drive is locked.
 #           If not specified, the default value is false.
@@ -135,7 +139,10 @@
 #
 # Since: 0.14.0
 ##
-{ 'command': 'eject', 'data': {'device': 'str', '*force': 'bool'} }
+{ 'command': 'eject',
+  'data': { '*device': 'str',
+            '*id': 'str',
+            '*force': 'bool' } }
 
 ##
 # @nbd-server-start:
@@ -159,9 +166,9 @@
 ##
 # @nbd-server-add:
 #
-# Export a device to QEMU's embedded NBD server.
+# Export a block node to QEMU's embedded NBD server.
 #
-# @device: Block device to be exported
+# @device: The device name or node name of the node to be exported
 #
 # @writable: Whether clients should be able to write to the device via the
 #     NBD connection (default false). #optional
@@ -183,22 +190,26 @@
 { 'command': 'nbd-server-stop' }
 
 ##
-# @DEVICE_TRAY_MOVED
+# @DEVICE_TRAY_MOVED:
 #
 # Emitted whenever the tray of a removable device is moved by the guest or by
 # HMP/QMP commands
 #
-# @device: device name
+# @device: Block device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @id: The name or QOM path of the guest device (since 2.8)
 #
 # @tray-open: true if the tray has been opened or false if it has been closed
 #
 # Since: 1.1
 ##
 { 'event': 'DEVICE_TRAY_MOVED',
-  'data': { 'device': 'str', 'tray-open': 'bool' } }
+  'data': { 'device': 'str', 'id': 'str', 'tray-open': 'bool' } }
 
 ##
-# @QuorumOpType
+# @QuorumOpType:
 #
 # An enumeration of the quorum operation types
 #
diff --git a/qapi/common.json b/qapi/common.json
index 9353a7b377..624a8619c8 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -3,7 +3,7 @@
 # QAPI common definitions
 
 ##
-# @QapiErrorClass
+# @QapiErrorClass:
 #
 # QEMU error classes
 #
@@ -30,15 +30,15 @@
             'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
 
 ##
-# @VersionTriple
+# @VersionTriple:
 #
 # A three-part version number.
 #
-# @qemu.major:  The major version number.
+# @major:  The major version number.
 #
-# @qemu.minor:  The minor version number.
+# @minor:  The minor version number.
 #
-# @qemu.micro:  The micro version number.
+# @micro:  The micro version number.
 #
 # Since: 2.4
 ##
@@ -101,7 +101,7 @@
 { 'command': 'query-commands', 'returns': ['CommandInfo'] }
 
 ##
-# @OnOffAuto
+# @OnOffAuto:
 #
 # An enumeration of three options: on, off, and auto
 #
@@ -117,7 +117,7 @@
   'data': [ 'auto', 'on', 'off' ] }
 
 ##
-# @OnOffSplit
+# @OnOffSplit:
 #
 # An enumeration of three values: on, off, and split
 #
diff --git a/qapi/crypto.json b/qapi/crypto.json
index 34d2583154..15d296e3c1 100644
--- a/qapi/crypto.json
+++ b/qapi/crypto.json
@@ -3,7 +3,7 @@
 # QAPI crypto definitions
 
 ##
-# QCryptoTLSCredsEndpoint:
+# @QCryptoTLSCredsEndpoint:
 #
 # The type of network endpoint that will be using the credentials.
 # Most types of credential require different setup / structures
@@ -22,7 +22,7 @@
 
 
 ##
-# QCryptoSecretFormat:
+# @QCryptoSecretFormat:
 #
 # The data format that the secret is provided in
 #
@@ -36,7 +36,7 @@
 
 
 ##
-# QCryptoHashAlgorithm:
+# @QCryptoHashAlgorithm:
 #
 # The supported algorithms for computing content digests
 #
@@ -55,7 +55,7 @@
 
 
 ##
-# QCryptoCipherAlgorithm:
+# @QCryptoCipherAlgorithm:
 #
 # The supported algorithms for content encryption ciphers
 #
@@ -82,22 +82,23 @@
 
 
 ##
-# QCryptoCipherMode:
+# @QCryptoCipherMode:
 #
 # The supported modes for content encryption ciphers
 #
 # @ecb: Electronic Code Book
 # @cbc: Cipher Block Chaining
 # @xts: XEX with tweaked code book and ciphertext stealing
+# @ctr: Counter (Since 2.8)
 # Since: 2.6
 ##
 { 'enum': 'QCryptoCipherMode',
   'prefix': 'QCRYPTO_CIPHER_MODE',
-  'data': ['ecb', 'cbc', 'xts']}
+  'data': ['ecb', 'cbc', 'xts', 'ctr']}
 
 
 ##
-# QCryptoIVGenAlgorithm:
+# @QCryptoIVGenAlgorithm:
 #
 # The supported algorithms for generating initialization
 # vectors for full disk encryption. The 'plain' generator
@@ -115,7 +116,7 @@
   'data': ['plain', 'plain64', 'essiv']}
 
 ##
-# QCryptoBlockFormat:
+# @QCryptoBlockFormat:
 #
 # The supported full disk encryption formats
 #
@@ -130,7 +131,7 @@
   'data': ['qcow', 'luks']}
 
 ##
-# QCryptoBlockOptionsBase:
+# @QCryptoBlockOptionsBase:
 #
 # The common options that apply to all full disk
 # encryption formats
@@ -143,7 +144,7 @@
   'data': { 'format': 'QCryptoBlockFormat' }}
 
 ##
-# QCryptoBlockOptionsQCow:
+# @QCryptoBlockOptionsQCow:
 #
 # The options that apply to QCow/QCow2 AES-CBC encryption format
 #
@@ -157,7 +158,7 @@
   'data': { '*key-secret': 'str' }}
 
 ##
-# QCryptoBlockOptionsLUKS:
+# @QCryptoBlockOptionsLUKS:
 #
 # The options that apply to LUKS encryption format
 #
@@ -171,7 +172,7 @@
 
 
 ##
-# QCryptoBlockCreateOptionsLUKS:
+# @QCryptoBlockCreateOptionsLUKS:
 #
 # The options that apply to LUKS encryption format initialization
 #
@@ -185,6 +186,9 @@
 #                  Currently defaults to 'sha256'
 # @hash-alg: #optional the master key hash algorithm
 #            Currently defaults to 'sha256'
+# @iter-time: #optional number of milliseconds to spend in
+#             PBKDF passphrase processing. Currently defaults
+#             to 2000. (since 2.8)
 # Since: 2.6
 ##
 { 'struct': 'QCryptoBlockCreateOptionsLUKS',
@@ -193,11 +197,12 @@
             '*cipher-mode': 'QCryptoCipherMode',
             '*ivgen-alg': 'QCryptoIVGenAlgorithm',
             '*ivgen-hash-alg': 'QCryptoHashAlgorithm',
-            '*hash-alg': 'QCryptoHashAlgorithm'}}
+            '*hash-alg': 'QCryptoHashAlgorithm',
+            '*iter-time': 'int'}}
 
 
 ##
-# QCryptoBlockOpenOptions:
+# @QCryptoBlockOpenOptions:
 #
 # The options that are available for all encryption formats
 # when opening an existing volume
@@ -212,7 +217,7 @@
 
 
 ##
-# QCryptoBlockCreateOptions:
+# @QCryptoBlockCreateOptions:
 #
 # The options that are available for all encryption formats
 # when initializing a new volume
@@ -227,7 +232,7 @@
 
 
 ##
-# QCryptoBlockInfoBase:
+# @QCryptoBlockInfoBase:
 #
 # The common information that applies to all full disk
 # encryption formats
@@ -241,7 +246,7 @@
 
 
 ##
-# QCryptoBlockInfoLUKSSlot:
+# @QCryptoBlockInfoLUKSSlot:
 #
 # Information about the LUKS block encryption key
 # slot options
@@ -261,7 +266,7 @@
 
 
 ##
-# QCryptoBlockInfoLUKS:
+# @QCryptoBlockInfoLUKS:
 #
 # Information about the LUKS block encryption options
 #
@@ -289,7 +294,7 @@
            'slots': [ 'QCryptoBlockInfoLUKSSlot' ] }}
 
 ##
-# QCryptoBlockInfoQCow:
+# @QCryptoBlockInfoQCow:
 #
 # Information about the QCow block encryption options
 #
@@ -300,7 +305,7 @@
 
 
 ##
-# QCryptoBlockInfo:
+# @QCryptoBlockInfo:
 #
 # Information about the block encryption options
 #
diff --git a/qapi/event.json b/qapi/event.json
index 8642052ebc..37bf34ed6d 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -1,5 +1,5 @@
 ##
-# @SHUTDOWN
+# @SHUTDOWN:
 #
 # Emitted when the virtual machine has shut down, indicating that qemu is
 # about to exit.
@@ -12,7 +12,7 @@
 { 'event': 'SHUTDOWN' }
 
 ##
-# @POWERDOWN
+# @POWERDOWN:
 #
 # Emitted when the virtual machine is powered down through the power control
 # system, such as via ACPI.
@@ -22,7 +22,7 @@
 { 'event': 'POWERDOWN' }
 
 ##
-# @RESET
+# @RESET:
 #
 # Emitted when the virtual machine is reset
 #
@@ -31,7 +31,7 @@
 { 'event': 'RESET' }
 
 ##
-# @STOP
+# @STOP:
 #
 # Emitted when the virtual machine is stopped
 #
@@ -40,7 +40,7 @@
 { 'event': 'STOP' }
 
 ##
-# @RESUME
+# @RESUME:
 #
 # Emitted when the virtual machine resumes execution
 #
@@ -49,7 +49,7 @@
 { 'event': 'RESUME' }
 
 ##
-# @SUSPEND
+# @SUSPEND:
 #
 # Emitted when guest enters a hardware suspension state, for example, S3 state,
 # which is sometimes called standby state
@@ -59,7 +59,7 @@
 { 'event': 'SUSPEND' }
 
 ##
-# @SUSPEND_DISK
+# @SUSPEND_DISK:
 #
 # Emitted when guest enters a hardware suspension state with data saved on
 # disk, for example, S4 state, which is sometimes called hibernate state
@@ -71,7 +71,7 @@
 { 'event': 'SUSPEND_DISK' }
 
 ##
-# @WAKEUP
+# @WAKEUP:
 #
 # Emitted when the guest has woken up from suspend state and is running
 #
@@ -80,7 +80,7 @@
 { 'event': 'WAKEUP' }
 
 ##
-# @RTC_CHANGE
+# @RTC_CHANGE:
 #
 # Emitted when the guest changes the RTC time.
 #
@@ -93,7 +93,7 @@
   'data': { 'offset': 'int' } }
 
 ##
-# @WATCHDOG
+# @WATCHDOG:
 #
 # Emitted when the watchdog device's timer is expired
 #
@@ -108,7 +108,7 @@
   'data': { 'action': 'WatchdogExpirationAction' } }
 
 ##
-# @DEVICE_DELETED
+# @DEVICE_DELETED:
 #
 # Emitted whenever the device removal completion is acknowledged by the guest.
 # At this point, it's safe to reuse the specified device ID. Device removal can
@@ -124,7 +124,7 @@
   'data': { '*device': 'str', 'path': 'str' } }
 
 ##
-# @NIC_RX_FILTER_CHANGED
+# @NIC_RX_FILTER_CHANGED:
 #
 # Emitted once until the 'query-rx-filter' command is executed, the first event
 # will always be emitted
@@ -139,7 +139,7 @@
   'data': { '*name': 'str', 'path': 'str' } }
 
 ##
-# @VNC_CONNECTED
+# @VNC_CONNECTED:
 #
 # Emitted when a VNC client establishes a connection
 #
@@ -157,7 +157,7 @@
             'client': 'VncBasicInfo' } }
 
 ##
-# @VNC_INITIALIZED
+# @VNC_INITIALIZED:
 #
 # Emitted after authentication takes place (if any) and the VNC session is
 # made active
@@ -173,7 +173,7 @@
             'client': 'VncClientInfo' } }
 
 ##
-# @VNC_DISCONNECTED
+# @VNC_DISCONNECTED:
 #
 # Emitted when the connection is closed
 #
@@ -188,7 +188,7 @@
             'client': 'VncClientInfo' } }
 
 ##
-# @SPICE_CONNECTED
+# @SPICE_CONNECTED:
 #
 # Emitted when a SPICE client establishes a connection
 #
@@ -203,7 +203,7 @@
             'client': 'SpiceBasicInfo' } }
 
 ##
-# @SPICE_INITIALIZED
+# @SPICE_INITIALIZED:
 #
 # Emitted after initial handshake and authentication takes place (if any)
 # and the SPICE channel is up and running
@@ -219,7 +219,7 @@
             'client': 'SpiceChannel' } }
 
 ##
-# @SPICE_DISCONNECTED
+# @SPICE_DISCONNECTED:
 #
 # Emitted when the SPICE connection is closed
 #
@@ -234,7 +234,7 @@
             'client': 'SpiceBasicInfo' } }
 
 ##
-# @SPICE_MIGRATE_COMPLETED
+# @SPICE_MIGRATE_COMPLETED:
 #
 # Emitted when SPICE migration has completed
 #
@@ -243,7 +243,7 @@
 { 'event': 'SPICE_MIGRATE_COMPLETED' }
 
 ##
-# @MIGRATION
+# @MIGRATION:
 #
 # Emitted when a migration event happens
 #
@@ -255,7 +255,7 @@
   'data': {'status': 'MigrationStatus'}}
 
 ##
-# @MIGRATION_PASS
+# @MIGRATION_PASS:
 #
 # Emitted from the source side of a migration at the start of each pass
 # (when it syncs the dirty bitmap)
@@ -268,7 +268,7 @@
   'data': { 'pass': 'int' } }
 
 ##
-# @ACPI_DEVICE_OST
+# @ACPI_DEVICE_OST:
 #
 # Emitted when guest executes ACPI _OST method.
 #
@@ -280,7 +280,7 @@
      'data': { 'info': 'ACPIOSTInfo' } }
 
 ##
-# @BALLOON_CHANGE
+# @BALLOON_CHANGE:
 #
 # Emitted when the guest changes the actual BALLOON level. This value is
 # equivalent to the @actual field return by the 'query-balloon' command
@@ -293,7 +293,7 @@
   'data': { 'actual': 'int' } }
 
 ##
-# @GUEST_PANICKED
+# @GUEST_PANICKED:
 #
 # Emitted when guest OS panic is detected
 #
@@ -305,7 +305,7 @@
   'data': { 'action': 'GuestPanicAction' } }
 
 ##
-# @QUORUM_FAILURE
+# @QUORUM_FAILURE:
 #
 # Emitted by the Quorum block driver if it fails to establish a quorum
 #
@@ -321,7 +321,7 @@
   'data': { 'reference': 'str', 'sector-num': 'int', 'sectors-count': 'int' } }
 
 ##
-# @QUORUM_REPORT_BAD
+# @QUORUM_REPORT_BAD:
 #
 # Emitted to report a corruption of a Quorum file
 #
@@ -345,7 +345,7 @@
             'sector-num': 'int', 'sectors-count': 'int' } }
 
 ##
-# @VSERPORT_CHANGE
+# @VSERPORT_CHANGE:
 #
 # Emitted when the guest opens or closes a virtio-serial port.
 #
@@ -359,7 +359,7 @@
   'data': { 'id': 'str', 'open': 'bool' } }
 
 ##
-# @MEM_UNPLUG_ERROR
+# @MEM_UNPLUG_ERROR:
 #
 # Emitted when memory hot unplug error occurs.
 #
@@ -373,7 +373,7 @@
   'data': { 'device': 'str', 'msg': 'str' } }
 
 ##
-# @DUMP_COMPLETED
+# @DUMP_COMPLETED:
 #
 # Emitted when background dump has completed
 #
diff --git a/qapi/introspect.json b/qapi/introspect.json
index 3fd81fb540..fd4dc84196 100644
--- a/qapi/introspect.json
+++ b/qapi/introspect.json
@@ -11,7 +11,7 @@
 # See the COPYING file in the top-level directory.
 
 ##
-# @query-qmp-schema
+# @query-qmp-schema:
 #
 # Command query-qmp-schema exposes the QMP wire ABI as an array of
 # SchemaInfo.  This lets QMP clients figure out what commands and
@@ -49,7 +49,7 @@
   'gen': false }                # just to simplify qmp_query_json()
 
 ##
-# @SchemaMetaType
+# @SchemaMetaType:
 #
 # This is a @SchemaInfo's meta type, i.e. the kind of entity it
 # describes.
@@ -75,7 +75,7 @@
             'command', 'event' ] }
 
 ##
-# @SchemaInfo
+# @SchemaInfo:
 #
 # @name: the entity's name, inherited from @base.
 #        Commands and events have the name defined in the QAPI schema.
@@ -105,7 +105,7 @@
       'event': 'SchemaInfoEvent' } }
 
 ##
-# @SchemaInfoBuiltin
+# @SchemaInfoBuiltin:
 #
 # Additional SchemaInfo members for meta-type 'builtin'.
 #
@@ -117,7 +117,7 @@
   'data': { 'json-type': 'JSONType' } }
 
 ##
-# @JSONType
+# @JSONType:
 #
 # The four primitive and two structured types according to RFC 7159
 # section 1, plus 'int' (split off 'number'), plus the obvious top
@@ -130,7 +130,7 @@
             'object', 'array', 'value' ] }
 
 ##
-# @SchemaInfoEnum
+# @SchemaInfoEnum:
 #
 # Additional SchemaInfo members for meta-type 'enum'.
 #
@@ -144,7 +144,7 @@
   'data': { 'values': ['str'] } }
 
 ##
-# @SchemaInfoArray
+# @SchemaInfoArray:
 #
 # Additional SchemaInfo members for meta-type 'array'.
 #
@@ -158,7 +158,7 @@
   'data': { 'element-type': 'str' } }
 
 ##
-# @SchemaInfoObject
+# @SchemaInfoObject:
 #
 # Additional SchemaInfo members for meta-type 'object'.
 #
@@ -183,7 +183,7 @@
             '*variants': [ 'SchemaInfoObjectVariant' ] } }
 
 ##
-# @SchemaInfoObjectMember
+# @SchemaInfoObjectMember:
 #
 # An object member.
 #
@@ -206,7 +206,7 @@
 # @default's type must be null or match @type
 
 ##
-# @SchemaInfoObjectVariant
+# @SchemaInfoObjectVariant:
 #
 # The variant members for a value of the type tag.
 #
@@ -221,7 +221,7 @@
   'data': { 'case': 'str', 'type': 'str' } }
 
 ##
-# @SchemaInfoAlternate
+# @SchemaInfoAlternate:
 #
 # Additional SchemaInfo members for meta-type 'alternate'.
 #
@@ -237,7 +237,7 @@
   'data': { 'members': [ 'SchemaInfoAlternateMember' ] } }
 
 ##
-# @SchemaInfoAlternateMember
+# @SchemaInfoAlternateMember:
 #
 # An alternate member.
 #
@@ -249,7 +249,7 @@
   'data': { 'type': 'str' } }
 
 ##
-# @SchemaInfoCommand
+# @SchemaInfoCommand:
 #
 # Additional SchemaInfo members for meta-type 'command'.
 #
@@ -266,7 +266,7 @@
   'data': { 'arg-type': 'str', 'ret-type': 'str' } }
 
 ##
-# @SchemaInfoEvent
+# @SchemaInfoEvent:
 #
 # Additional SchemaInfo members for meta-type 'event'.
 #
diff --git a/qapi/qapi-clone-visitor.c b/qapi/qapi-clone-visitor.c
index 0bb8216372..34086cbfc0 100644
--- a/qapi/qapi-clone-visitor.c
+++ b/qapi/qapi-clone-visitor.c
@@ -110,7 +110,7 @@ static void qapi_clone_type_str(Visitor *v, const char *name, char **obj,
     assert(qcv->depth);
     /*
      * Pointer was already cloned by g_memdup; create fresh copy.
-     * Note that as long as qmp-output-visitor accepts NULL instead of
+     * Note that as long as qobject-output-visitor accepts NULL instead of
      * "", then we must do likewise. However, we want to obey the
      * input visitor semantics of never producing NULL when the empty
      * string is intended.
diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
index 55f5876dc0..63bd97b341 100644
--- a/qapi/qapi-visit-core.c
+++ b/qapi/qapi-visit-core.c
@@ -19,10 +19,12 @@
 #include "qapi/qmp/qerror.h"
 #include "qapi/visitor.h"
 #include "qapi/visitor-impl.h"
+#include "trace.h"
 
 void visit_complete(Visitor *v, void *opaque)
 {
     assert(v->type != VISITOR_OUTPUT || v->complete);
+    trace_visit_complete(v, opaque);
     if (v->complete) {
         v->complete(v, opaque);
     }
@@ -30,6 +32,7 @@ void visit_complete(Visitor *v, void *opaque)
 
 void visit_free(Visitor *v)
 {
+    trace_visit_free(v);
     if (v) {
         v->free(v);
     }
@@ -40,6 +43,7 @@ void visit_start_struct(Visitor *v, const char *name, void **obj,
 {
     Error *err = NULL;
 
+    trace_visit_start_struct(v, name, obj, size);
     if (obj) {
         assert(size);
         assert(!(v->type & VISITOR_OUTPUT) || *obj);
@@ -53,6 +57,7 @@ void visit_start_struct(Visitor *v, const char *name, void **obj,
 
 void visit_check_struct(Visitor *v, Error **errp)
 {
+    trace_visit_check_struct(v);
     if (v->check_struct) {
         v->check_struct(v, errp);
     }
@@ -60,6 +65,7 @@ void visit_check_struct(Visitor *v, Error **errp)
 
 void visit_end_struct(Visitor *v, void **obj)
 {
+    trace_visit_end_struct(v, obj);
     v->end_struct(v, obj);
 }
 
@@ -69,6 +75,7 @@ void visit_start_list(Visitor *v, const char *name, GenericList **list,
     Error *err = NULL;
 
     assert(!list || size >= sizeof(GenericList));
+    trace_visit_start_list(v, name, list, size);
     v->start_list(v, name, list, size, &err);
     if (list && (v->type & VISITOR_INPUT)) {
         assert(!(err && *list));
@@ -79,11 +86,13 @@ void visit_start_list(Visitor *v, const char *name, GenericList **list,
 GenericList *visit_next_list(Visitor *v, GenericList *tail, size_t size)
 {
     assert(tail && size >= sizeof(GenericList));
+    trace_visit_next_list(v, tail, size);
     return v->next_list(v, tail, size);
 }
 
 void visit_end_list(Visitor *v, void **obj)
 {
+    trace_visit_end_list(v, obj);
     v->end_list(v, obj);
 }
 
@@ -95,6 +104,7 @@ void visit_start_alternate(Visitor *v, const char *name,
 
     assert(obj && size >= sizeof(GenericAlternate));
     assert(!(v->type & VISITOR_OUTPUT) || *obj);
+    trace_visit_start_alternate(v, name, obj, size, promote_int);
     if (v->start_alternate) {
         v->start_alternate(v, name, obj, size, promote_int, &err);
     }
@@ -106,6 +116,7 @@ void visit_start_alternate(Visitor *v, const char *name,
 
 void visit_end_alternate(Visitor *v, void **obj)
 {
+    trace_visit_end_alternate(v, obj);
     if (v->end_alternate) {
         v->end_alternate(v, obj);
     }
@@ -113,6 +124,7 @@ void visit_end_alternate(Visitor *v, void **obj)
 
 bool visit_optional(Visitor *v, const char *name, bool *present)
 {
+    trace_visit_optional(v, name, present);
     if (v->optional) {
         v->optional(v, name, present);
     }
@@ -127,6 +139,7 @@ bool visit_is_input(Visitor *v)
 void visit_type_int(Visitor *v, const char *name, int64_t *obj, Error **errp)
 {
     assert(obj);
+    trace_visit_type_int(v, name, obj);
     v->type_int64(v, name, obj, errp);
 }
 
@@ -150,7 +163,10 @@ static void visit_type_uintN(Visitor *v, uint64_t *obj, const char *name,
 void visit_type_uint8(Visitor *v, const char *name, uint8_t *obj,
                       Error **errp)
 {
-    uint64_t value = *obj;
+    uint64_t value;
+
+    trace_visit_type_uint8(v, name, obj);
+    value = *obj;
     visit_type_uintN(v, &value, name, UINT8_MAX, "uint8_t", errp);
     *obj = value;
 }
@@ -158,7 +174,10 @@ void visit_type_uint8(Visitor *v, const char *name, uint8_t *obj,
 void visit_type_uint16(Visitor *v, const char *name, uint16_t *obj,
                        Error **errp)
 {
-    uint64_t value = *obj;
+    uint64_t value;
+
+    trace_visit_type_uint16(v, name, obj);
+    value = *obj;
     visit_type_uintN(v, &value, name, UINT16_MAX, "uint16_t", errp);
     *obj = value;
 }
@@ -166,7 +185,10 @@ void visit_type_uint16(Visitor *v, const char *name, uint16_t *obj,
 void visit_type_uint32(Visitor *v, const char *name, uint32_t *obj,
                        Error **errp)
 {
-    uint64_t value = *obj;
+    uint64_t value;
+
+    trace_visit_type_uint32(v, name, obj);
+    value = *obj;
     visit_type_uintN(v, &value, name, UINT32_MAX, "uint32_t", errp);
     *obj = value;
 }
@@ -175,6 +197,7 @@ void visit_type_uint64(Visitor *v, const char *name, uint64_t *obj,
                        Error **errp)
 {
     assert(obj);
+    trace_visit_type_uint64(v, name, obj);
     v->type_uint64(v, name, obj, errp);
 }
 
@@ -198,7 +221,10 @@ static void visit_type_intN(Visitor *v, int64_t *obj, const char *name,
 
 void visit_type_int8(Visitor *v, const char *name, int8_t *obj, Error **errp)
 {
-    int64_t value = *obj;
+    int64_t value;
+
+    trace_visit_type_int8(v, name, obj);
+    value = *obj;
     visit_type_intN(v, &value, name, INT8_MIN, INT8_MAX, "int8_t", errp);
     *obj = value;
 }
@@ -206,7 +232,10 @@ void visit_type_int8(Visitor *v, const char *name, int8_t *obj, Error **errp)
 void visit_type_int16(Visitor *v, const char *name, int16_t *obj,
                       Error **errp)
 {
-    int64_t value = *obj;
+    int64_t value;
+
+    trace_visit_type_int16(v, name, obj);
+    value = *obj;
     visit_type_intN(v, &value, name, INT16_MIN, INT16_MAX, "int16_t", errp);
     *obj = value;
 }
@@ -214,7 +243,10 @@ void visit_type_int16(Visitor *v, const char *name, int16_t *obj,
 void visit_type_int32(Visitor *v, const char *name, int32_t *obj,
                       Error **errp)
 {
-    int64_t value = *obj;
+    int64_t value;
+
+    trace_visit_type_int32(v, name, obj);
+    value = *obj;
     visit_type_intN(v, &value, name, INT32_MIN, INT32_MAX, "int32_t", errp);
     *obj = value;
 }
@@ -223,6 +255,7 @@ void visit_type_int64(Visitor *v, const char *name, int64_t *obj,
                       Error **errp)
 {
     assert(obj);
+    trace_visit_type_int64(v, name, obj);
     v->type_int64(v, name, obj, errp);
 }
 
@@ -230,6 +263,7 @@ void visit_type_size(Visitor *v, const char *name, uint64_t *obj,
                      Error **errp)
 {
     assert(obj);
+    trace_visit_type_size(v, name, obj);
     if (v->type_size) {
         v->type_size(v, name, obj, errp);
     } else {
@@ -240,6 +274,7 @@ void visit_type_size(Visitor *v, const char *name, uint64_t *obj,
 void visit_type_bool(Visitor *v, const char *name, bool *obj, Error **errp)
 {
     assert(obj);
+    trace_visit_type_bool(v, name, obj);
     v->type_bool(v, name, obj, errp);
 }
 
@@ -252,6 +287,7 @@ void visit_type_str(Visitor *v, const char *name, char **obj, Error **errp)
      * can enable:
     assert(!(v->type & VISITOR_OUTPUT) || *obj);
      */
+    trace_visit_type_str(v, name, obj);
     v->type_str(v, name, obj, &err);
     if (v->type & VISITOR_INPUT) {
         assert(!err != !*obj);
@@ -263,6 +299,7 @@ void visit_type_number(Visitor *v, const char *name, double *obj,
                        Error **errp)
 {
     assert(obj);
+    trace_visit_type_number(v, name, obj);
     v->type_number(v, name, obj, errp);
 }
 
@@ -272,6 +309,7 @@ void visit_type_any(Visitor *v, const char *name, QObject **obj, Error **errp)
 
     assert(obj);
     assert(v->type != VISITOR_OUTPUT || *obj);
+    trace_visit_type_any(v, name, obj);
     v->type_any(v, name, obj, &err);
     if (v->type == VISITOR_INPUT) {
         assert(!err != !*obj);
@@ -281,6 +319,7 @@ void visit_type_any(Visitor *v, const char *name, QObject **obj, Error **errp)
 
 void visit_type_null(Visitor *v, const char *name, Error **errp)
 {
+    trace_visit_type_null(v, name);
     v->type_null(v, name, errp);
 }
 
diff --git a/qapi/qmp-event.c b/qapi/qmp-event.c
index 8bba165bfb..802ede48e3 100644
--- a/qapi/qmp-event.c
+++ b/qapi/qmp-event.c
@@ -35,21 +35,12 @@ static void timestamp_put(QDict *qdict)
     int err;
     QObject *obj;
     qemu_timeval tv;
-    int64_t sec, usec;
 
     err = qemu_gettimeofday(&tv);
-    if (err < 0) {
-        /* Put -1 to indicate failure of getting host time */
-        sec = -1;
-        usec = -1;
-    } else {
-        sec = tv.tv_sec;
-        usec = tv.tv_usec;
-    }
-
-    obj = qobject_from_jsonf("{ 'seconds': %" PRId64 ", "
-                             "'microseconds': %" PRId64 " }",
-                             sec, usec);
+    /* Put -1 to indicate failure of getting host time */
+    obj = qobject_from_jsonf("{ 'seconds': %lld, 'microseconds': %lld }",
+                             err < 0 ? -1LL : (long long)tv.tv_sec,
+                             err < 0 ? -1LL : (long long)tv.tv_usec);
     qdict_put_obj(qdict, "timestamp", obj);
 }
 
diff --git a/qapi/qmp-output-visitor.c b/qapi/qmp-output-visitor.c
deleted file mode 100644
index 9e3b67ce13..0000000000
--- a/qapi/qmp-output-visitor.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * Core Definitions for QAPI/QMP Command Registry
- *
- * Copyright (C) 2012-2016 Red Hat, Inc.
- * Copyright IBM, Corp. 2011
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qapi/qmp-output-visitor.h"
-#include "qapi/visitor-impl.h"
-#include "qemu/queue.h"
-#include "qemu-common.h"
-#include "qapi/qmp/types.h"
-
-typedef struct QStackEntry
-{
-    QObject *value;
-    void *qapi; /* sanity check that caller uses same pointer */
-    QSLIST_ENTRY(QStackEntry) node;
-} QStackEntry;
-
-struct QmpOutputVisitor
-{
-    Visitor visitor;
-    QSLIST_HEAD(, QStackEntry) stack; /* Stack of unfinished containers */
-    QObject *root; /* Root of the output visit */
-    QObject **result; /* User's storage location for result */
-};
-
-#define qmp_output_add(qov, name, value) \
-    qmp_output_add_obj(qov, name, QOBJECT(value))
-#define qmp_output_push(qov, value, qapi) \
-    qmp_output_push_obj(qov, QOBJECT(value), qapi)
-
-static QmpOutputVisitor *to_qov(Visitor *v)
-{
-    return container_of(v, QmpOutputVisitor, visitor);
-}
-
-/* Push @value onto the stack of current QObjects being built */
-static void qmp_output_push_obj(QmpOutputVisitor *qov, QObject *value,
-                                void *qapi)
-{
-    QStackEntry *e = g_malloc0(sizeof(*e));
-
-    assert(qov->root);
-    assert(value);
-    e->value = value;
-    e->qapi = qapi;
-    QSLIST_INSERT_HEAD(&qov->stack, e, node);
-}
-
-/* Pop a value off the stack of QObjects being built, and return it. */
-static QObject *qmp_output_pop(QmpOutputVisitor *qov, void *qapi)
-{
-    QStackEntry *e = QSLIST_FIRST(&qov->stack);
-    QObject *value;
-
-    assert(e);
-    assert(e->qapi == qapi);
-    QSLIST_REMOVE_HEAD(&qov->stack, node);
-    value = e->value;
-    assert(value);
-    g_free(e);
-    return value;
-}
-
-/* Add @value to the current QObject being built.
- * If the stack is visiting a dictionary or list, @value is now owned
- * by that container. Otherwise, @value is now the root.  */
-static void qmp_output_add_obj(QmpOutputVisitor *qov, const char *name,
-                               QObject *value)
-{
-    QStackEntry *e = QSLIST_FIRST(&qov->stack);
-    QObject *cur = e ? e->value : NULL;
-
-    if (!cur) {
-        /* Don't allow reuse of visitor on more than one root */
-        assert(!qov->root);
-        qov->root = value;
-    } else {
-        switch (qobject_type(cur)) {
-        case QTYPE_QDICT:
-            assert(name);
-            qdict_put_obj(qobject_to_qdict(cur), name, value);
-            break;
-        case QTYPE_QLIST:
-            assert(!name);
-            qlist_append_obj(qobject_to_qlist(cur), value);
-            break;
-        default:
-            g_assert_not_reached();
-        }
-    }
-}
-
-static void qmp_output_start_struct(Visitor *v, const char *name, void **obj,
-                                    size_t unused, Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    QDict *dict = qdict_new();
-
-    qmp_output_add(qov, name, dict);
-    qmp_output_push(qov, dict, obj);
-}
-
-static void qmp_output_end_struct(Visitor *v, void **obj)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    QObject *value = qmp_output_pop(qov, obj);
-    assert(qobject_type(value) == QTYPE_QDICT);
-}
-
-static void qmp_output_start_list(Visitor *v, const char *name,
-                                  GenericList **listp, size_t size,
-                                  Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    QList *list = qlist_new();
-
-    qmp_output_add(qov, name, list);
-    qmp_output_push(qov, list, listp);
-}
-
-static GenericList *qmp_output_next_list(Visitor *v, GenericList *tail,
-                                         size_t size)
-{
-    return tail->next;
-}
-
-static void qmp_output_end_list(Visitor *v, void **obj)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    QObject *value = qmp_output_pop(qov, obj);
-    assert(qobject_type(value) == QTYPE_QLIST);
-}
-
-static void qmp_output_type_int64(Visitor *v, const char *name, int64_t *obj,
-                                  Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    qmp_output_add(qov, name, qint_from_int(*obj));
-}
-
-static void qmp_output_type_uint64(Visitor *v, const char *name, uint64_t *obj,
-                                   Error **errp)
-{
-    /* FIXME: QMP outputs values larger than INT64_MAX as negative */
-    QmpOutputVisitor *qov = to_qov(v);
-    qmp_output_add(qov, name, qint_from_int(*obj));
-}
-
-static void qmp_output_type_bool(Visitor *v, const char *name, bool *obj,
-                                 Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    qmp_output_add(qov, name, qbool_from_bool(*obj));
-}
-
-static void qmp_output_type_str(Visitor *v, const char *name, char **obj,
-                                Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    if (*obj) {
-        qmp_output_add(qov, name, qstring_from_str(*obj));
-    } else {
-        qmp_output_add(qov, name, qstring_from_str(""));
-    }
-}
-
-static void qmp_output_type_number(Visitor *v, const char *name, double *obj,
-                                   Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    qmp_output_add(qov, name, qfloat_from_double(*obj));
-}
-
-static void qmp_output_type_any(Visitor *v, const char *name, QObject **obj,
-                                Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    qobject_incref(*obj);
-    qmp_output_add_obj(qov, name, *obj);
-}
-
-static void qmp_output_type_null(Visitor *v, const char *name, Error **errp)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    qmp_output_add_obj(qov, name, qnull());
-}
-
-/* Finish building, and return the root object.
- * The root object is never null. The caller becomes the object's
- * owner, and should use qobject_decref() when done with it.  */
-static void qmp_output_complete(Visitor *v, void *opaque)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-
-    /* A visit must have occurred, with each start paired with end.  */
-    assert(qov->root && QSLIST_EMPTY(&qov->stack));
-    assert(opaque == qov->result);
-
-    qobject_incref(qov->root);
-    *qov->result = qov->root;
-    qov->result = NULL;
-}
-
-static void qmp_output_free(Visitor *v)
-{
-    QmpOutputVisitor *qov = to_qov(v);
-    QStackEntry *e;
-
-    while (!QSLIST_EMPTY(&qov->stack)) {
-        e = QSLIST_FIRST(&qov->stack);
-        QSLIST_REMOVE_HEAD(&qov->stack, node);
-        g_free(e);
-    }
-
-    qobject_decref(qov->root);
-    g_free(qov);
-}
-
-Visitor *qmp_output_visitor_new(QObject **result)
-{
-    QmpOutputVisitor *v;
-
-    v = g_malloc0(sizeof(*v));
-
-    v->visitor.type = VISITOR_OUTPUT;
-    v->visitor.start_struct = qmp_output_start_struct;
-    v->visitor.end_struct = qmp_output_end_struct;
-    v->visitor.start_list = qmp_output_start_list;
-    v->visitor.next_list = qmp_output_next_list;
-    v->visitor.end_list = qmp_output_end_list;
-    v->visitor.type_int64 = qmp_output_type_int64;
-    v->visitor.type_uint64 = qmp_output_type_uint64;
-    v->visitor.type_bool = qmp_output_type_bool;
-    v->visitor.type_str = qmp_output_type_str;
-    v->visitor.type_number = qmp_output_type_number;
-    v->visitor.type_any = qmp_output_type_any;
-    v->visitor.type_null = qmp_output_type_null;
-    v->visitor.complete = qmp_output_complete;
-    v->visitor.free = qmp_output_free;
-
-    *result = NULL;
-    v->result = result;
-
-    return &v->visitor;
-}
diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
index 68b24c98b0..e8053686f3 100644
--- a/qapi/qmp-registry.c
+++ b/qapi/qmp-registry.c
@@ -30,6 +30,14 @@ void qmp_register_command(const char *name, QmpCommandFunc *fn,
     QTAILQ_INSERT_TAIL(&qmp_commands, cmd, node);
 }
 
+void qmp_unregister_command(const char *name)
+{
+    QmpCommand *cmd = qmp_find_command(name);
+
+    QTAILQ_REMOVE(&qmp_commands, cmd, node);
+    g_free(cmd);
+}
+
 QmpCommand *qmp_find_command(const char *name)
 {
     QmpCommand *cmd;
diff --git a/qapi/qmp-input-visitor.c b/qapi/qobject-input-visitor.c
similarity index 52%
rename from qapi/qmp-input-visitor.c
rename to qapi/qobject-input-visitor.c
index 64dd392e6f..0063327b3b 100644
--- a/qapi/qmp-input-visitor.c
+++ b/qapi/qobject-input-visitor.c
@@ -14,7 +14,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qapi/visitor-impl.h"
 #include "qemu/queue.h"
 #include "qemu-common.h"
@@ -34,7 +34,7 @@ typedef struct StackObject
     QSLIST_ENTRY(StackObject) node;
 } StackObject;
 
-struct QmpInputVisitor
+struct QObjectInputVisitor
 {
     Visitor visitor;
 
@@ -49,14 +49,14 @@ struct QmpInputVisitor
     bool strict;
 };
 
-static QmpInputVisitor *to_qiv(Visitor *v)
+static QObjectInputVisitor *to_qiv(Visitor *v)
 {
-    return container_of(v, QmpInputVisitor, visitor);
+    return container_of(v, QObjectInputVisitor, visitor);
 }
 
-static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
-                                     const char *name,
-                                     bool consume)
+static QObject *qobject_input_get_object(QObjectInputVisitor *qiv,
+                                         const char *name,
+                                         bool consume, Error **errp)
 {
     StackObject *tos;
     QObject *qobj;
@@ -64,6 +64,7 @@ static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
 
     if (QSLIST_EMPTY(&qiv->stack)) {
         /* Starting at root, name is ignored. */
+        assert(qiv->root);
         return qiv->root;
     }
 
@@ -79,10 +80,14 @@ static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
             bool removed = g_hash_table_remove(tos->h, name);
             assert(removed);
         }
+        if (!ret) {
+            error_setg(errp, QERR_MISSING_PARAMETER, name);
+        }
     } else {
         assert(qobject_type(qobj) == QTYPE_QLIST);
         assert(!name);
         ret = qlist_entry_obj(tos->entry);
+        assert(ret);
         if (consume) {
             tos->entry = qlist_next(tos->entry);
         }
@@ -97,8 +102,9 @@ static void qdict_add_key(const char *key, QObject *obj, void *opaque)
     g_hash_table_insert(h, (gpointer) key, NULL);
 }
 
-static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
-                                        void *qapi, Error **errp)
+static const QListEntry *qobject_input_push(QObjectInputVisitor *qiv,
+                                            QObject *obj, void *qapi,
+                                            Error **errp)
 {
     GHashTable *h;
     StackObject *tos = g_new0(StackObject, 1);
@@ -120,9 +126,9 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
 }
 
 
-static void qmp_input_check_struct(Visitor *v, Error **errp)
+static void qobject_input_check_struct(Visitor *v, Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
+    QObjectInputVisitor *qiv = to_qiv(v);
     StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
     assert(tos && !tos->entry);
@@ -140,7 +146,7 @@ static void qmp_input_check_struct(Visitor *v, Error **errp)
     }
 }
 
-static void qmp_input_stack_object_free(StackObject *tos)
+static void qobject_input_stack_object_free(StackObject *tos)
 {
     if (tos->h) {
         g_hash_table_unref(tos->h);
@@ -149,33 +155,36 @@ static void qmp_input_stack_object_free(StackObject *tos)
     g_free(tos);
 }
 
-static void qmp_input_pop(Visitor *v, void **obj)
+static void qobject_input_pop(Visitor *v, void **obj)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
+    QObjectInputVisitor *qiv = to_qiv(v);
     StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
     assert(tos && tos->qapi == obj);
     QSLIST_REMOVE_HEAD(&qiv->stack, node);
-    qmp_input_stack_object_free(tos);
+    qobject_input_stack_object_free(tos);
 }
 
-static void qmp_input_start_struct(Visitor *v, const char *name, void **obj,
-                                   size_t size, Error **errp)
+static void qobject_input_start_struct(Visitor *v, const char *name, void **obj,
+                                       size_t size, Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
     Error *err = NULL;
 
     if (obj) {
         *obj = NULL;
     }
-    if (!qobj || qobject_type(qobj) != QTYPE_QDICT) {
+    if (!qobj) {
+        return;
+    }
+    if (qobject_type(qobj) != QTYPE_QDICT) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "QDict");
         return;
     }
 
-    qmp_input_push(qiv, qobj, obj, &err);
+    qobject_input_push(qiv, qobj, obj, &err);
     if (err) {
         error_propagate(errp, err);
         return;
@@ -187,14 +196,18 @@ static void qmp_input_start_struct(Visitor *v, const char *name, void **obj,
 }
 
 
-static void qmp_input_start_list(Visitor *v, const char *name,
-                                 GenericList **list, size_t size, Error **errp)
+static void qobject_input_start_list(Visitor *v, const char *name,
+                                     GenericList **list, size_t size,
+                                     Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
     const QListEntry *entry;
 
-    if (!qobj || qobject_type(qobj) != QTYPE_QLIST) {
+    if (!qobj) {
+        return;
+    }
+    if (qobject_type(qobj) != QTYPE_QLIST) {
         if (list) {
             *list = NULL;
         }
@@ -203,7 +216,7 @@ static void qmp_input_start_list(Visitor *v, const char *name,
         return;
     }
 
-    entry = qmp_input_push(qiv, qobj, list, errp);
+    entry = qobject_input_push(qiv, qobj, list, errp);
     if (list) {
         if (entry) {
             *list = g_malloc0(size);
@@ -213,10 +226,10 @@ static void qmp_input_start_list(Visitor *v, const char *name,
     }
 }
 
-static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail,
-                                        size_t size)
+static GenericList *qobject_input_next_list(Visitor *v, GenericList *tail,
+                                            size_t size)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
+    QObjectInputVisitor *qiv = to_qiv(v);
     StackObject *so = QSLIST_FIRST(&qiv->stack);
 
     if (!so->entry) {
@@ -227,16 +240,15 @@ static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail,
 }
 
 
-static void qmp_input_start_alternate(Visitor *v, const char *name,
-                                      GenericAlternate **obj, size_t size,
-                                      bool promote_int, Error **errp)
+static void qobject_input_start_alternate(Visitor *v, const char *name,
+                                          GenericAlternate **obj, size_t size,
+                                          bool promote_int, Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, false);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, false, errp);
 
     if (!qobj) {
         *obj = NULL;
-        error_setg(errp, QERR_MISSING_PARAMETER, name ? name : "null");
         return;
     }
     *obj = g_malloc0(size);
@@ -246,12 +258,17 @@ static void qmp_input_start_alternate(Visitor *v, const char *name,
     }
 }
 
-static void qmp_input_type_int64(Visitor *v, const char *name, int64_t *obj,
-                                 Error **errp)
+static void qobject_input_type_int64(Visitor *v, const char *name, int64_t *obj,
+                                     Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QInt *qint = qobject_to_qint(qmp_input_get_object(qiv, name, true));
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QInt *qint;
 
+    if (!qobj) {
+        return;
+    }
+    qint = qobject_to_qint(qobj);
     if (!qint) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "integer");
@@ -261,13 +278,18 @@ static void qmp_input_type_int64(Visitor *v, const char *name, int64_t *obj,
     *obj = qint_get_int(qint);
 }
 
-static void qmp_input_type_uint64(Visitor *v, const char *name, uint64_t *obj,
-                                  Error **errp)
+static void qobject_input_type_uint64(Visitor *v, const char *name,
+                                      uint64_t *obj, Error **errp)
 {
     /* FIXME: qobject_to_qint mishandles values over INT64_MAX */
-    QmpInputVisitor *qiv = to_qiv(v);
-    QInt *qint = qobject_to_qint(qmp_input_get_object(qiv, name, true));
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QInt *qint;
 
+    if (!qobj) {
+        return;
+    }
+    qint = qobject_to_qint(qobj);
     if (!qint) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "integer");
@@ -277,12 +299,17 @@ static void qmp_input_type_uint64(Visitor *v, const char *name, uint64_t *obj,
     *obj = qint_get_int(qint);
 }
 
-static void qmp_input_type_bool(Visitor *v, const char *name, bool *obj,
-                                Error **errp)
+static void qobject_input_type_bool(Visitor *v, const char *name, bool *obj,
+                                    Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QBool *qbool = qobject_to_qbool(qmp_input_get_object(qiv, name, true));
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QBool *qbool;
 
+    if (!qobj) {
+        return;
+    }
+    qbool = qobject_to_qbool(qobj);
     if (!qbool) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "boolean");
@@ -292,14 +319,19 @@ static void qmp_input_type_bool(Visitor *v, const char *name, bool *obj,
     *obj = qbool_get_bool(qbool);
 }
 
-static void qmp_input_type_str(Visitor *v, const char *name, char **obj,
-                               Error **errp)
+static void qobject_input_type_str(Visitor *v, const char *name, char **obj,
+                                   Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QString *qstr = qobject_to_qstring(qmp_input_get_object(qiv, name, true));
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+    QString *qstr;
 
+    *obj = NULL;
+    if (!qobj) {
+        return;
+    }
+    qstr = qobject_to_qstring(qobj);
     if (!qstr) {
-        *obj = NULL;
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
                    "string");
         return;
@@ -308,14 +340,17 @@ static void qmp_input_type_str(Visitor *v, const char *name, char **obj,
     *obj = g_strdup(qstring_get_str(qstr));
 }
 
-static void qmp_input_type_number(Visitor *v, const char *name, double *obj,
-                                  Error **errp)
+static void qobject_input_type_number(Visitor *v, const char *name, double *obj,
+                                      Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
     QInt *qint;
     QFloat *qfloat;
 
+    if (!qobj) {
+        return;
+    }
     qint = qobject_to_qint(qobj);
     if (qint) {
         *obj = qint_get_int(qobject_to_qint(qobj));
@@ -332,20 +367,29 @@ static void qmp_input_type_number(Visitor *v, const char *name, double *obj,
                "number");
 }
 
-static void qmp_input_type_any(Visitor *v, const char *name, QObject **obj,
-                               Error **errp)
+static void qobject_input_type_any(Visitor *v, const char *name, QObject **obj,
+                                   Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+
+    *obj = NULL;
+    if (!qobj) {
+        return;
+    }
 
     qobject_incref(qobj);
     *obj = qobj;
 }
 
-static void qmp_input_type_null(Visitor *v, const char *name, Error **errp)
+static void qobject_input_type_null(Visitor *v, const char *name, Error **errp)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, true);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, true, errp);
+
+    if (!qobj) {
+        return;
+    }
 
     if (qobject_type(qobj) != QTYPE_QNULL) {
         error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
@@ -353,10 +397,10 @@ static void qmp_input_type_null(Visitor *v, const char *name, Error **errp)
     }
 }
 
-static void qmp_input_optional(Visitor *v, const char *name, bool *present)
+static void qobject_input_optional(Visitor *v, const char *name, bool *present)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    QObject *qobj = qmp_input_get_object(qiv, name, false);
+    QObjectInputVisitor *qiv = to_qiv(v);
+    QObject *qobj = qobject_input_get_object(qiv, name, false, NULL);
 
     if (!qobj) {
         *present = false;
@@ -366,43 +410,44 @@ static void qmp_input_optional(Visitor *v, const char *name, bool *present)
     *present = true;
 }
 
-static void qmp_input_free(Visitor *v)
+static void qobject_input_free(Visitor *v)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
+    QObjectInputVisitor *qiv = to_qiv(v);
     while (!QSLIST_EMPTY(&qiv->stack)) {
         StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
         QSLIST_REMOVE_HEAD(&qiv->stack, node);
-        qmp_input_stack_object_free(tos);
+        qobject_input_stack_object_free(tos);
     }
 
     qobject_decref(qiv->root);
     g_free(qiv);
 }
 
-Visitor *qmp_input_visitor_new(QObject *obj, bool strict)
+Visitor *qobject_input_visitor_new(QObject *obj, bool strict)
 {
-    QmpInputVisitor *v;
+    QObjectInputVisitor *v;
 
+    assert(obj);
     v = g_malloc0(sizeof(*v));
 
     v->visitor.type = VISITOR_INPUT;
-    v->visitor.start_struct = qmp_input_start_struct;
-    v->visitor.check_struct = qmp_input_check_struct;
-    v->visitor.end_struct = qmp_input_pop;
-    v->visitor.start_list = qmp_input_start_list;
-    v->visitor.next_list = qmp_input_next_list;
-    v->visitor.end_list = qmp_input_pop;
-    v->visitor.start_alternate = qmp_input_start_alternate;
-    v->visitor.type_int64 = qmp_input_type_int64;
-    v->visitor.type_uint64 = qmp_input_type_uint64;
-    v->visitor.type_bool = qmp_input_type_bool;
-    v->visitor.type_str = qmp_input_type_str;
-    v->visitor.type_number = qmp_input_type_number;
-    v->visitor.type_any = qmp_input_type_any;
-    v->visitor.type_null = qmp_input_type_null;
-    v->visitor.optional = qmp_input_optional;
-    v->visitor.free = qmp_input_free;
+    v->visitor.start_struct = qobject_input_start_struct;
+    v->visitor.check_struct = qobject_input_check_struct;
+    v->visitor.end_struct = qobject_input_pop;
+    v->visitor.start_list = qobject_input_start_list;
+    v->visitor.next_list = qobject_input_next_list;
+    v->visitor.end_list = qobject_input_pop;
+    v->visitor.start_alternate = qobject_input_start_alternate;
+    v->visitor.type_int64 = qobject_input_type_int64;
+    v->visitor.type_uint64 = qobject_input_type_uint64;
+    v->visitor.type_bool = qobject_input_type_bool;
+    v->visitor.type_str = qobject_input_type_str;
+    v->visitor.type_number = qobject_input_type_number;
+    v->visitor.type_any = qobject_input_type_any;
+    v->visitor.type_null = qobject_input_type_null;
+    v->visitor.optional = qobject_input_optional;
+    v->visitor.free = qobject_input_free;
     v->strict = strict;
 
     v->root = obj;
diff --git a/qapi/qobject-output-visitor.c b/qapi/qobject-output-visitor.c
new file mode 100644
index 0000000000..871127079d
--- /dev/null
+++ b/qapi/qobject-output-visitor.c
@@ -0,0 +1,254 @@
+/*
+ * Core Definitions for QAPI/QMP Command Registry
+ *
+ * Copyright (C) 2012-2016 Red Hat, Inc.
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/visitor-impl.h"
+#include "qemu/queue.h"
+#include "qemu-common.h"
+#include "qapi/qmp/types.h"
+
+typedef struct QStackEntry {
+    QObject *value;
+    void *qapi; /* sanity check that caller uses same pointer */
+    QSLIST_ENTRY(QStackEntry) node;
+} QStackEntry;
+
+struct QObjectOutputVisitor {
+    Visitor visitor;
+    QSLIST_HEAD(, QStackEntry) stack; /* Stack of unfinished containers */
+    QObject *root; /* Root of the output visit */
+    QObject **result; /* User's storage location for result */
+};
+
+#define qobject_output_add(qov, name, value) \
+    qobject_output_add_obj(qov, name, QOBJECT(value))
+#define qobject_output_push(qov, value, qapi) \
+    qobject_output_push_obj(qov, QOBJECT(value), qapi)
+
+static QObjectOutputVisitor *to_qov(Visitor *v)
+{
+    return container_of(v, QObjectOutputVisitor, visitor);
+}
+
+/* Push @value onto the stack of current QObjects being built */
+static void qobject_output_push_obj(QObjectOutputVisitor *qov, QObject *value,
+                                    void *qapi)
+{
+    QStackEntry *e = g_malloc0(sizeof(*e));
+
+    assert(qov->root);
+    assert(value);
+    e->value = value;
+    e->qapi = qapi;
+    QSLIST_INSERT_HEAD(&qov->stack, e, node);
+}
+
+/* Pop a value off the stack of QObjects being built, and return it. */
+static QObject *qobject_output_pop(QObjectOutputVisitor *qov, void *qapi)
+{
+    QStackEntry *e = QSLIST_FIRST(&qov->stack);
+    QObject *value;
+
+    assert(e);
+    assert(e->qapi == qapi);
+    QSLIST_REMOVE_HEAD(&qov->stack, node);
+    value = e->value;
+    assert(value);
+    g_free(e);
+    return value;
+}
+
+/* Add @value to the current QObject being built.
+ * If the stack is visiting a dictionary or list, @value is now owned
+ * by that container. Otherwise, @value is now the root.  */
+static void qobject_output_add_obj(QObjectOutputVisitor *qov, const char *name,
+                                   QObject *value)
+{
+    QStackEntry *e = QSLIST_FIRST(&qov->stack);
+    QObject *cur = e ? e->value : NULL;
+
+    if (!cur) {
+        /* Don't allow reuse of visitor on more than one root */
+        assert(!qov->root);
+        qov->root = value;
+    } else {
+        switch (qobject_type(cur)) {
+        case QTYPE_QDICT:
+            assert(name);
+            qdict_put_obj(qobject_to_qdict(cur), name, value);
+            break;
+        case QTYPE_QLIST:
+            assert(!name);
+            qlist_append_obj(qobject_to_qlist(cur), value);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static void qobject_output_start_struct(Visitor *v, const char *name,
+                                        void **obj, size_t unused, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QDict *dict = qdict_new();
+
+    qobject_output_add(qov, name, dict);
+    qobject_output_push(qov, dict, obj);
+}
+
+static void qobject_output_end_struct(Visitor *v, void **obj)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QObject *value = qobject_output_pop(qov, obj);
+    assert(qobject_type(value) == QTYPE_QDICT);
+}
+
+static void qobject_output_start_list(Visitor *v, const char *name,
+                                      GenericList **listp, size_t size,
+                                      Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QList *list = qlist_new();
+
+    qobject_output_add(qov, name, list);
+    qobject_output_push(qov, list, listp);
+}
+
+static GenericList *qobject_output_next_list(Visitor *v, GenericList *tail,
+                                             size_t size)
+{
+    return tail->next;
+}
+
+static void qobject_output_end_list(Visitor *v, void **obj)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QObject *value = qobject_output_pop(qov, obj);
+    assert(qobject_type(value) == QTYPE_QLIST);
+}
+
+static void qobject_output_type_int64(Visitor *v, const char *name,
+                                      int64_t *obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qint_from_int(*obj));
+}
+
+static void qobject_output_type_uint64(Visitor *v, const char *name,
+                                       uint64_t *obj, Error **errp)
+{
+    /* FIXME values larger than INT64_MAX become negative */
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qint_from_int(*obj));
+}
+
+static void qobject_output_type_bool(Visitor *v, const char *name, bool *obj,
+                                     Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qbool_from_bool(*obj));
+}
+
+static void qobject_output_type_str(Visitor *v, const char *name, char **obj,
+                                    Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    if (*obj) {
+        qobject_output_add(qov, name, qstring_from_str(*obj));
+    } else {
+        qobject_output_add(qov, name, qstring_from_str(""));
+    }
+}
+
+static void qobject_output_type_number(Visitor *v, const char *name,
+                                       double *obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add(qov, name, qfloat_from_double(*obj));
+}
+
+static void qobject_output_type_any(Visitor *v, const char *name,
+                                    QObject **obj, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_incref(*obj);
+    qobject_output_add_obj(qov, name, *obj);
+}
+
+static void qobject_output_type_null(Visitor *v, const char *name, Error **errp)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    qobject_output_add_obj(qov, name, qnull());
+}
+
+/* Finish building, and return the root object.
+ * The root object is never null. The caller becomes the object's
+ * owner, and should use qobject_decref() when done with it.  */
+static void qobject_output_complete(Visitor *v, void *opaque)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+
+    /* A visit must have occurred, with each start paired with end.  */
+    assert(qov->root && QSLIST_EMPTY(&qov->stack));
+    assert(opaque == qov->result);
+
+    qobject_incref(qov->root);
+    *qov->result = qov->root;
+    qov->result = NULL;
+}
+
+static void qobject_output_free(Visitor *v)
+{
+    QObjectOutputVisitor *qov = to_qov(v);
+    QStackEntry *e;
+
+    while (!QSLIST_EMPTY(&qov->stack)) {
+        e = QSLIST_FIRST(&qov->stack);
+        QSLIST_REMOVE_HEAD(&qov->stack, node);
+        g_free(e);
+    }
+
+    qobject_decref(qov->root);
+    g_free(qov);
+}
+
+Visitor *qobject_output_visitor_new(QObject **result)
+{
+    QObjectOutputVisitor *v;
+
+    v = g_malloc0(sizeof(*v));
+
+    v->visitor.type = VISITOR_OUTPUT;
+    v->visitor.start_struct = qobject_output_start_struct;
+    v->visitor.end_struct = qobject_output_end_struct;
+    v->visitor.start_list = qobject_output_start_list;
+    v->visitor.next_list = qobject_output_next_list;
+    v->visitor.end_list = qobject_output_end_list;
+    v->visitor.type_int64 = qobject_output_type_int64;
+    v->visitor.type_uint64 = qobject_output_type_uint64;
+    v->visitor.type_bool = qobject_output_type_bool;
+    v->visitor.type_str = qobject_output_type_str;
+    v->visitor.type_number = qobject_output_type_number;
+    v->visitor.type_any = qobject_output_type_any;
+    v->visitor.type_null = qobject_output_type_null;
+    v->visitor.complete = qobject_output_complete;
+    v->visitor.free = qobject_output_free;
+
+    *result = NULL;
+    v->result = result;
+
+    return &v->visitor;
+}
diff --git a/qapi/rocker.json b/qapi/rocker.json
index 2fe7fdfa66..ace27760f1 100644
--- a/qapi/rocker.json
+++ b/qapi/rocker.json
@@ -1,5 +1,5 @@
 ##
-# @Rocker:
+# @RockerSwitch:
 #
 # Rocker switch information.
 #
diff --git a/qapi/trace-events b/qapi/trace-events
new file mode 100644
index 0000000000..2c5d3bc7d7
--- /dev/null
+++ b/qapi/trace-events
@@ -0,0 +1,33 @@
+# qapi-visit-core.c
+visit_free(void *v) "v=%p"
+visit_complete(void *v, void *opaque) "v=%p opaque=%p"
+
+visit_start_struct(void *v, const char *name, void *obj, size_t size) "v=%p name=%s obj=%p size=%zu"
+visit_check_struct(void *v) "v=%p"
+visit_end_struct(void *v, void *obj) "v=%p obj=%p"
+
+visit_start_list(void *v, const char *name, void *obj, size_t size) "v=%p name=%s obj=%p size=%zu"
+visit_next_list(void *v, void *tail, size_t size) "v=%p tail=%p size=%zu"
+visit_end_list(void *v, void *obj) "v=%p obj=%p"
+
+visit_start_alternate(void *v, const char *name, void *obj, size_t size, bool promote_int) "v=%p name=%s obj=%p size=%zu promote_int=%d"
+visit_end_alternate(void *v, void *obj) "v=%p obj=%p"
+
+visit_optional(void *v, const char *name, bool *present) "v=%p name=%s present=%p"
+
+visit_type_enum(void *v, const char *name, int *obj) "v=%p name=%s obj=%p"
+visit_type_int(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint8(void *v, const char *name, uint8_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint16(void *v, const char *name, uint16_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint32(void *v, const char *name, uint32_t *obj) "v=%p name=%s obj=%p"
+visit_type_uint64(void *v, const char *name, uint64_t *obj) "v=%p name=%s obj=%p"
+visit_type_int8(void *v, const char *name, int8_t *obj) "v=%p name=%s obj=%p"
+visit_type_int16(void *v, const char *name, int16_t *obj) "v=%p name=%s obj=%p"
+visit_type_int32(void *v, const char *name, int32_t *obj) "v=%p name=%s obj=%p"
+visit_type_int64(void *v, const char *name, int64_t *obj) "v=%p name=%s obj=%p"
+visit_type_size(void *v, const char *name, uint64_t *obj) "v=%p name=%s obj=%p"
+visit_type_bool(void *v, const char *name, bool *obj) "v=%p name=%s obj=%p"
+visit_type_str(void *v, const char *name, char **obj) "v=%p name=%s obj=%p"
+visit_type_number(void *v, const char *name, double *obj) "v=%p name=%s obj=%p"
+visit_type_any(void *v, const char *name, void *obj) "v=%p name=%s obj=%p"
+visit_type_null(void *v, const char *name) "v=%p name=%s"
diff --git a/qapi/trace.json b/qapi/trace.json
index e87214677c..4fd39b7792 100644
--- a/qapi/trace.json
+++ b/qapi/trace.json
@@ -17,7 +17,7 @@
 #
 # @enabled: The event is dynamically enabled.
 #
-# Since 2.2
+# Since: 2.2
 ##
 { 'enum': 'TraceEventState',
   'data': ['unavailable', 'disabled', 'enabled'] }
@@ -34,7 +34,7 @@
 # An event is per-vCPU if it has the "vcpu" property in the "trace-events"
 # files.
 #
-# Since 2.2
+# Since: 2.2
 ##
 { 'struct': 'TraceEventInfo',
   'data': {'name': 'str', 'state': 'TraceEventState', 'vcpu': 'bool'} }
@@ -58,7 +58,7 @@
 # exact match, @vcpu is given and the event does not have the "vcpu" property,
 # an error is returned.
 #
-# Since 2.2
+# Since: 2.2
 ##
 { 'command': 'trace-event-get-state',
   'data': {'name': 'str', '*vcpu': 'int'},
@@ -83,7 +83,7 @@
 # match, @vcpu is given and the event does not have the "vcpu" property, an
 # error is returned.
 #
-# Since 2.2
+# Since: 2.2
 ##
 { 'command': 'trace-event-set-state',
   'data': {'name': 'str', 'enable': 'bool', '*ignore-unavailable': 'bool',
diff --git a/qdev-monitor.c b/qdev-monitor.c
index e19617fa8b..c73410c02e 100644
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -28,6 +28,7 @@
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
 #include "qemu/help_option.h"
+#include "sysemu/block-backend.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -538,10 +539,28 @@ static BusState *qbus_find(const char *path, Error **errp)
     return bus;
 }
 
+void qdev_set_id(DeviceState *dev, const char *id)
+{
+    if (id) {
+        dev->id = id;
+    }
+
+    if (dev->id) {
+        object_property_add_child(qdev_get_peripheral(), dev->id,
+                                  OBJECT(dev), NULL);
+    } else {
+        static int anon_count;
+        gchar *name = g_strdup_printf("device[%d]", anon_count++);
+        object_property_add_child(qdev_get_peripheral_anon(), name,
+                                  OBJECT(dev), NULL);
+        g_free(name);
+    }
+}
+
 DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
 {
     DeviceClass *dc;
-    const char *driver, *path, *id;
+    const char *driver, *path;
     DeviceState *dev;
     BusState *bus = NULL;
     Error *err = NULL;
@@ -590,21 +609,7 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
         qdev_set_parent_bus(dev, bus);
     }
 
-    id = qemu_opts_id(opts);
-    if (id) {
-        dev->id = id;
-    }
-
-    if (dev->id) {
-        object_property_add_child(qdev_get_peripheral(), dev->id,
-                                  OBJECT(dev), NULL);
-    } else {
-        static int anon_count;
-        gchar *name = g_strdup_printf("device[%d]", anon_count++);
-        object_property_add_child(qdev_get_peripheral_anon(), name,
-                                  OBJECT(dev), NULL);
-        g_free(name);
-    }
+    qdev_set_id(dev, qemu_opts_id(opts));
 
     /* set properties */
     if (qemu_opt_foreach(opts, set_property, dev, &err)) {
@@ -801,7 +806,7 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
     object_unref(OBJECT(dev));
 }
 
-void qmp_device_del(const char *id, Error **errp)
+static DeviceState *find_device_state(const char *id, Error **errp)
 {
     Object *obj;
 
@@ -819,15 +824,40 @@ void qmp_device_del(const char *id, Error **errp)
     if (!obj) {
         error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
                   "Device '%s' not found", id);
-        return;
+        return NULL;
     }
 
     if (!object_dynamic_cast(obj, TYPE_DEVICE)) {
         error_setg(errp, "%s is not a hotpluggable device", id);
-        return;
+        return NULL;
     }
 
-    qdev_unplug(DEVICE(obj), errp);
+    return DEVICE(obj);
+}
+
+void qmp_device_del(const char *id, Error **errp)
+{
+    DeviceState *dev = find_device_state(id, errp);
+    if (dev != NULL) {
+        qdev_unplug(dev, errp);
+    }
+}
+
+BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
+{
+    DeviceState *dev;
+    BlockBackend *blk;
+
+    dev = find_device_state(id, errp);
+    if (dev == NULL) {
+        return NULL;
+    }
+
+    blk = blk_by_dev(dev);
+    if (!blk) {
+        error_setg(errp, "Device does not have a block device backend");
+    }
+    return blk;
 }
 
 void qdev_machine_init(void)
diff --git a/qemu-char.c b/qemu-char.c
index c0cce9a72b..f73b6f80b8 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -41,6 +41,7 @@
 #include "io/channel-file.h"
 #include "io/channel-tls.h"
 #include "sysemu/replay.h"
+#include "qemu/help_option.h"
 
 #include <zlib.h>
 
@@ -94,6 +95,8 @@
 #define READ_RETRIES 10
 #define TCP_MAX_FDS 16
 
+typedef struct MuxDriver MuxDriver;
+
 /***********************************************************/
 /* Socket address helpers */
 
@@ -195,6 +198,8 @@ CharDriverState *qemu_chr_alloc(ChardevCommon *backend, Error **errp)
 
 void qemu_chr_be_event(CharDriverState *s, int event)
 {
+    CharBackend *be = s->be;
+
     /* Keep track if the char device is open */
     switch (event) {
         case CHR_EVENT_OPENED:
@@ -205,9 +210,11 @@ void qemu_chr_be_event(CharDriverState *s, int event)
             break;
     }
 
-    if (!s->chr_event)
+    if (!be || !be->chr_event) {
         return;
-    s->chr_event(s->handler_opaque, event);
+    }
+
+    be->chr_event(be->opaque, event);
 }
 
 void qemu_chr_be_generic_open(CharDriverState *s)
@@ -271,10 +278,15 @@ static int qemu_chr_fe_write_buffer(CharDriverState *s, const uint8_t *buf, int
     return res;
 }
 
-int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len)
+int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
 {
+    CharDriverState *s = be->chr;
     int ret;
 
+    if (!s) {
+        return 0;
+    }
+
     if (s->replay && replay_mode == REPLAY_MODE_PLAY) {
         int offset;
         replay_char_write_event_load(&ret, &offset);
@@ -299,7 +311,7 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len)
     return ret;
 }
 
-int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len)
+static int qemu_chr_write_all(CharDriverState *s, const uint8_t *buf, int len)
 {
     int offset;
     int res;
@@ -323,15 +335,27 @@ int qemu_chr_fe_write_all(CharDriverState *s, const uint8_t *buf, int len)
     return offset;
 }
 
-int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len)
+int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
 {
+    CharDriverState *s = be->chr;
+
+    if (!s) {
+        return 0;
+    }
+
+    return qemu_chr_write_all(s, buf, len);
+}
+
+int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
+{
+    CharDriverState *s = be->chr;
     int offset = 0, counter = 10;
     int res;
 
-    if (!s->chr_sync_read) {
+    if (!s || !s->chr_sync_read) {
         return 0;
     }
-    
+
     if (s->replay && replay_mode == REPLAY_MODE_PLAY) {
         return replay_char_read_all_load(buf);
     }
@@ -368,10 +392,12 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, int len)
     return offset;
 }
 
-int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg)
+int qemu_chr_fe_ioctl(CharBackend *be, int cmd, void *arg)
 {
+    CharDriverState *s = be->chr;
     int res;
-    if (!s->chr_ioctl || s->replay) {
+
+    if (!s || !s->chr_ioctl || s->replay) {
         res = -ENOTSUP;
     } else {
         res = s->chr_ioctl(s, cmd, arg);
@@ -382,15 +408,21 @@ int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg)
 
 int qemu_chr_be_can_write(CharDriverState *s)
 {
-    if (!s->chr_can_read)
+    CharBackend *be = s->be;
+
+    if (!be || !be->chr_can_read) {
         return 0;
-    return s->chr_can_read(s->handler_opaque);
+    }
+
+    return be->chr_can_read(be->opaque);
 }
 
 void qemu_chr_be_write_impl(CharDriverState *s, uint8_t *buf, int len)
 {
-    if (s->chr_read) {
-        s->chr_read(s->handler_opaque, buf, len);
+    CharBackend *be = s->be;
+
+    if (be && be->chr_read) {
+        be->chr_read(be->opaque, buf, len);
     }
 }
 
@@ -406,11 +438,12 @@ void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len)
     }
 }
 
-int qemu_chr_fe_get_msgfd(CharDriverState *s)
+int qemu_chr_fe_get_msgfd(CharBackend *be)
 {
+    CharDriverState *s = be->chr;
     int fd;
-    int res = (qemu_chr_fe_get_msgfds(s, &fd, 1) == 1) ? fd : -1;
-    if (s->replay) {
+    int res = (qemu_chr_fe_get_msgfds(be, &fd, 1) == 1) ? fd : -1;
+    if (s && s->replay) {
         fprintf(stderr,
                 "Replay: get msgfd is not supported for serial devices yet\n");
         exit(1);
@@ -418,13 +451,25 @@ int qemu_chr_fe_get_msgfd(CharDriverState *s)
     return res;
 }
 
-int qemu_chr_fe_get_msgfds(CharDriverState *s, int *fds, int len)
+int qemu_chr_fe_get_msgfds(CharBackend *be, int *fds, int len)
 {
+    CharDriverState *s = be->chr;
+
+    if (!s) {
+        return -1;
+    }
+
     return s->get_msgfds ? s->get_msgfds(s, fds, len) : -1;
 }
 
-int qemu_chr_fe_set_msgfds(CharDriverState *s, int *fds, int num)
+int qemu_chr_fe_set_msgfds(CharBackend *be, int *fds, int num)
 {
+    CharDriverState *s = be->chr;
+
+    if (!s) {
+        return -1;
+    }
+
     return s->set_msgfds ? s->set_msgfds(s, fds, num) : -1;
 }
 
@@ -433,56 +478,34 @@ int qemu_chr_add_client(CharDriverState *s, int fd)
     return s->chr_add_client ? s->chr_add_client(s, fd) : -1;
 }
 
-void qemu_chr_accept_input(CharDriverState *s)
+void qemu_chr_fe_accept_input(CharBackend *be)
 {
+    CharDriverState *s = be->chr;
+
+    if (!s) {
+        return;
+    }
+
     if (s->chr_accept_input)
         s->chr_accept_input(s);
     qemu_notify_event();
 }
 
-void qemu_chr_fe_printf(CharDriverState *s, const char *fmt, ...)
+void qemu_chr_fe_printf(CharBackend *be, const char *fmt, ...)
 {
     char buf[READ_BUF_LEN];
     va_list ap;
     va_start(ap, fmt);
     vsnprintf(buf, sizeof(buf), fmt, ap);
-    qemu_chr_fe_write(s, (uint8_t *)buf, strlen(buf));
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_fe_write_all(be, (uint8_t *)buf, strlen(buf));
     va_end(ap);
 }
 
 static void remove_fd_in_watch(CharDriverState *chr);
-
-void qemu_chr_add_handlers(CharDriverState *s,
-                           IOCanReadHandler *fd_can_read,
-                           IOReadHandler *fd_read,
-                           IOEventHandler *fd_event,
-                           void *opaque)
-{
-    int fe_open;
-
-    if (!opaque && !fd_can_read && !fd_read && !fd_event) {
-        fe_open = 0;
-        remove_fd_in_watch(s);
-    } else {
-        fe_open = 1;
-    }
-    s->chr_can_read = fd_can_read;
-    s->chr_read = fd_read;
-    s->chr_event = fd_event;
-    s->handler_opaque = opaque;
-    if (fe_open && s->chr_update_read_handler)
-        s->chr_update_read_handler(s);
-
-    if (!s->explicit_fe_open) {
-        qemu_chr_fe_set_open(s, fe_open);
-    }
-
-    /* We're connecting to an already opened device, so let's make sure we
-       also get the open event */
-    if (fe_open && s->be_open) {
-        qemu_chr_be_generic_open(s);
-    }
-}
+static void mux_chr_set_handlers(CharDriverState *chr, GMainContext *context);
+static void mux_set_focus(MuxDriver *d, int focus);
 
 static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
@@ -492,6 +515,7 @@ static int null_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 static CharDriverState *qemu_chr_open_null(const char *id,
                                            ChardevBackend *backend,
                                            ChardevReturn *ret,
+                                           bool *be_opened,
                                            Error **errp)
 {
     CharDriverState *chr;
@@ -502,7 +526,7 @@ static CharDriverState *qemu_chr_open_null(const char *id,
         return NULL;
     }
     chr->chr_write = null_chr_write;
-    chr->explicit_be_open = true;
+    *be_opened = false;
     return chr;
 }
 
@@ -510,12 +534,9 @@ static CharDriverState *qemu_chr_open_null(const char *id,
 #define MAX_MUX 4
 #define MUX_BUFFER_SIZE 32	/* Must be a power of 2.  */
 #define MUX_BUFFER_MASK (MUX_BUFFER_SIZE - 1)
-typedef struct {
-    IOCanReadHandler *chr_can_read[MAX_MUX];
-    IOReadHandler *chr_read[MAX_MUX];
-    IOEventHandler *chr_event[MAX_MUX];
-    void *ext_opaque[MAX_MUX];
-    CharDriverState *drv;
+struct MuxDriver {
+    CharBackend *backends[MAX_MUX];
+    CharBackend chr;
     int focus;
     int mux_cnt;
     int term_got_escape;
@@ -531,8 +552,7 @@ typedef struct {
     /* Protected by the CharDriverState chr_write_lock.  */
     int linestart;
     int64_t timestamps_start;
-} MuxDriver;
-
+};
 
 /* Called with chr_write_lock held.  */
 static int mux_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
@@ -540,7 +560,7 @@ static int mux_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
     MuxDriver *d = chr->opaque;
     int ret;
     if (!d->timestamps) {
-        ret = qemu_chr_fe_write(d->drv, buf, len);
+        ret = qemu_chr_fe_write(&d->chr, buf, len);
     } else {
         int i;
 
@@ -562,10 +582,13 @@ static int mux_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
                          (secs / 60) % 60,
                          secs % 60,
                          (int)(ti % 1000));
-                qemu_chr_fe_write(d->drv, (uint8_t *)buf1, strlen(buf1));
+                /* XXX this blocks entire thread. Rewrite to use
+                 * qemu_chr_fe_write and background I/O callbacks */
+                qemu_chr_fe_write_all(&d->chr,
+                                      (uint8_t *)buf1, strlen(buf1));
                 d->linestart = 0;
             }
-            ret += qemu_chr_fe_write(d->drv, buf+i, 1);
+            ret += qemu_chr_fe_write(&d->chr, buf + i, 1);
             if (buf[i] == '\n') {
                 d->linestart = 1;
             }
@@ -600,21 +623,26 @@ static void mux_print_help(CharDriverState *chr)
                  "\n\rEscape-Char set to Ascii: 0x%02x\n\r\n\r",
                  term_escape_char);
     }
-    qemu_chr_fe_write(chr, (uint8_t *)cbuf, strlen(cbuf));
+    /* XXX this blocks entire thread. Rewrite to use
+     * qemu_chr_fe_write and background I/O callbacks */
+    qemu_chr_write_all(chr, (uint8_t *)cbuf, strlen(cbuf));
     for (i = 0; mux_help[i] != NULL; i++) {
         for (j=0; mux_help[i][j] != '\0'; j++) {
             if (mux_help[i][j] == '%')
-                qemu_chr_fe_write(chr, (uint8_t *)ebuf, strlen(ebuf));
+                qemu_chr_write_all(chr, (uint8_t *)ebuf, strlen(ebuf));
             else
-                qemu_chr_fe_write(chr, (uint8_t *)&mux_help[i][j], 1);
+                qemu_chr_write_all(chr, (uint8_t *)&mux_help[i][j], 1);
         }
     }
 }
 
 static void mux_chr_send_event(MuxDriver *d, int mux_nr, int event)
 {
-    if (d->chr_event[mux_nr])
-        d->chr_event[mux_nr](d->ext_opaque[mux_nr], event);
+    CharBackend *be = d->backends[mux_nr];
+
+    if (be && be->chr_event) {
+        be->chr_event(be->opaque, event);
+    }
 }
 
 static int mux_proc_byte(CharDriverState *chr, MuxDriver *d, int ch)
@@ -631,7 +659,7 @@ static int mux_proc_byte(CharDriverState *chr, MuxDriver *d, int ch)
         case 'x':
             {
                  const char *term =  "QEMU: Terminated\n\r";
-                 qemu_chr_fe_write(chr, (uint8_t *)term, strlen(term));
+                 qemu_chr_write_all(chr, (uint8_t *)term, strlen(term));
                  exit(0);
                  break;
             }
@@ -642,12 +670,9 @@ static int mux_proc_byte(CharDriverState *chr, MuxDriver *d, int ch)
             qemu_chr_be_event(chr, CHR_EVENT_BREAK);
             break;
         case 'c':
+            assert(d->mux_cnt > 0); /* handler registered with first fe */
             /* Switch to the next registered device */
-            mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
-            d->focus++;
-            if (d->focus >= d->mux_cnt)
-                d->focus = 0;
-            mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
+            mux_set_focus(d, (d->focus + 1) % d->mux_cnt);
             break;
         case 't':
             d->timestamps = !d->timestamps;
@@ -668,12 +693,12 @@ static void mux_chr_accept_input(CharDriverState *chr)
 {
     MuxDriver *d = chr->opaque;
     int m = d->focus;
+    CharBackend *be = d->backends[m];
 
-    while (d->prod[m] != d->cons[m] &&
-           d->chr_can_read[m] &&
-           d->chr_can_read[m](d->ext_opaque[m])) {
-        d->chr_read[m](d->ext_opaque[m],
-                       &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
+    while (be && d->prod[m] != d->cons[m] &&
+           be->chr_can_read && be->chr_can_read(be->opaque)) {
+        be->chr_read(be->opaque,
+                     &d->buffer[m][d->cons[m]++ & MUX_BUFFER_MASK], 1);
     }
 }
 
@@ -682,11 +707,16 @@ static int mux_chr_can_read(void *opaque)
     CharDriverState *chr = opaque;
     MuxDriver *d = chr->opaque;
     int m = d->focus;
+    CharBackend *be = d->backends[m];
 
-    if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE)
+    if ((d->prod[m] - d->cons[m]) < MUX_BUFFER_SIZE) {
         return 1;
-    if (d->chr_can_read[m])
-        return d->chr_can_read[m](d->ext_opaque[m]);
+    }
+
+    if (be && be->chr_can_read) {
+        return be->chr_can_read(be->opaque);
+    }
+
     return 0;
 }
 
@@ -695,59 +725,39 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
     CharDriverState *chr = opaque;
     MuxDriver *d = chr->opaque;
     int m = d->focus;
+    CharBackend *be = d->backends[m];
     int i;
 
-    mux_chr_accept_input (opaque);
+    mux_chr_accept_input(opaque);
 
-    for(i = 0; i < size; i++)
+    for (i = 0; i < size; i++)
         if (mux_proc_byte(chr, d, buf[i])) {
             if (d->prod[m] == d->cons[m] &&
-                d->chr_can_read[m] &&
-                d->chr_can_read[m](d->ext_opaque[m]))
-                d->chr_read[m](d->ext_opaque[m], &buf[i], 1);
+                be && be->chr_can_read &&
+                be->chr_can_read(be->opaque))
+                be->chr_read(be->opaque, &buf[i], 1);
             else
                 d->buffer[m][d->prod[m]++ & MUX_BUFFER_MASK] = buf[i];
         }
 }
 
+static bool muxes_realized;
+
 static void mux_chr_event(void *opaque, int event)
 {
     CharDriverState *chr = opaque;
     MuxDriver *d = chr->opaque;
     int i;
 
+    if (!muxes_realized) {
+        return;
+    }
+
     /* Send the event to all registered listeners */
     for (i = 0; i < d->mux_cnt; i++)
         mux_chr_send_event(d, i, event);
 }
 
-static void mux_chr_update_read_handler(CharDriverState *chr)
-{
-    MuxDriver *d = chr->opaque;
-
-    if (d->mux_cnt >= MAX_MUX) {
-        fprintf(stderr, "Cannot add I/O handlers, MUX array is full\n");
-        return;
-    }
-    d->ext_opaque[d->mux_cnt] = chr->handler_opaque;
-    d->chr_can_read[d->mux_cnt] = chr->chr_can_read;
-    d->chr_read[d->mux_cnt] = chr->chr_read;
-    d->chr_event[d->mux_cnt] = chr->chr_event;
-    /* Fix up the real driver with mux routines */
-    if (d->mux_cnt == 0) {
-        qemu_chr_add_handlers(d->drv, mux_chr_can_read, mux_chr_read,
-                              mux_chr_event, chr);
-    }
-    if (d->focus != -1) {
-        mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
-    }
-    d->focus = d->mux_cnt;
-    d->mux_cnt++;
-    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
-}
-
-static bool muxes_realized;
-
 /**
  * Called after processing of default and command-line-specified
  * chardevs to deliver CHR_EVENT_OPENED events to any FEs attached
@@ -789,19 +799,57 @@ static Notifier muxes_realize_notify = {
 static GSource *mux_chr_add_watch(CharDriverState *s, GIOCondition cond)
 {
     MuxDriver *d = s->opaque;
-    return d->drv->chr_add_watch(d->drv, cond);
+    CharDriverState *chr = qemu_chr_fe_get_driver(&d->chr);
+
+    return chr->chr_add_watch(chr, cond);
 }
 
-static void mux_chr_close(struct CharDriverState *chr)
+static void mux_chr_free(struct CharDriverState *chr)
 {
     MuxDriver *d = chr->opaque;
+    int i;
 
+    for (i = 0; i < d->mux_cnt; i++) {
+        CharBackend *be = d->backends[i];
+        if (be) {
+            be->chr = NULL;
+        }
+    }
+    qemu_chr_fe_deinit(&d->chr);
     g_free(d);
 }
 
+static void mux_chr_set_handlers(CharDriverState *chr, GMainContext *context)
+{
+    MuxDriver *d = chr->opaque;
+
+    /* Fix up the real driver with mux routines */
+    qemu_chr_fe_set_handlers(&d->chr,
+                             mux_chr_can_read,
+                             mux_chr_read,
+                             mux_chr_event,
+                             chr,
+                             context, true);
+}
+
+static void mux_set_focus(MuxDriver *d, int focus)
+{
+    assert(focus >= 0);
+    assert(focus < d->mux_cnt);
+
+    if (d->focus != -1) {
+        mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_OUT);
+    }
+
+    d->focus = focus;
+    mux_chr_send_event(d, d->focus, CHR_EVENT_MUX_IN);
+}
+
 static CharDriverState *qemu_chr_open_mux(const char *id,
                                           ChardevBackend *backend,
-                                          ChardevReturn *ret, Error **errp)
+                                          ChardevReturn *ret,
+                                          bool *be_opened,
+                                          Error **errp)
 {
     ChardevMux *mux = backend->u.mux.data;
     CharDriverState *chr, *drv;
@@ -821,11 +869,9 @@ static CharDriverState *qemu_chr_open_mux(const char *id,
     d = g_new0(MuxDriver, 1);
 
     chr->opaque = d;
-    d->drv = drv;
     d->focus = -1;
-    chr->chr_close = mux_chr_close;
+    chr->chr_free = mux_chr_free;
     chr->chr_write = mux_chr_write;
-    chr->chr_update_read_handler = mux_chr_update_read_handler;
     chr->chr_accept_input = mux_chr_accept_input;
     /* Frontend guest-open / -close notification is not support with muxes */
     chr->chr_set_fe_open = NULL;
@@ -835,12 +881,133 @@ static CharDriverState *qemu_chr_open_mux(const char *id,
     /* only default to opened state if we've realized the initial
      * set of muxes
      */
-    chr->explicit_be_open = muxes_realized ? 0 : 1;
+    *be_opened = muxes_realized;
     chr->is_mux = 1;
+    if (!qemu_chr_fe_init(&d->chr, drv, errp)) {
+        qemu_chr_free(chr);
+        return NULL;
+    }
 
     return chr;
 }
 
+CharDriverState *qemu_chr_fe_get_driver(CharBackend *be)
+{
+    return be->chr;
+}
+
+bool qemu_chr_fe_init(CharBackend *b, CharDriverState *s, Error **errp)
+{
+    int tag = 0;
+
+    if (s->is_mux) {
+        MuxDriver *d = s->opaque;
+
+        if (d->mux_cnt >= MAX_MUX) {
+            goto unavailable;
+        }
+
+        d->backends[d->mux_cnt] = b;
+        tag = d->mux_cnt++;
+    } else if (s->be) {
+        goto unavailable;
+    } else {
+        s->be = b;
+    }
+
+    b->fe_open = false;
+    b->tag = tag;
+    b->chr = s;
+    return true;
+
+unavailable:
+    error_setg(errp, QERR_DEVICE_IN_USE, s->label);
+    return false;
+}
+
+static bool qemu_chr_is_busy(CharDriverState *s)
+{
+    if (s->is_mux) {
+        MuxDriver *d = s->opaque;
+        return d->mux_cnt >= 0;
+    } else {
+        return s->be != NULL;
+    }
+}
+
+void qemu_chr_fe_deinit(CharBackend *b)
+{
+    assert(b);
+
+    if (b->chr) {
+        qemu_chr_fe_set_handlers(b, NULL, NULL, NULL, NULL, NULL, true);
+        b->chr->be = NULL;
+        if (b->chr->is_mux) {
+            MuxDriver *d = b->chr->opaque;
+            d->backends[b->tag] = NULL;
+        }
+        b->chr = NULL;
+    }
+}
+
+void qemu_chr_fe_set_handlers(CharBackend *b,
+                              IOCanReadHandler *fd_can_read,
+                              IOReadHandler *fd_read,
+                              IOEventHandler *fd_event,
+                              void *opaque,
+                              GMainContext *context,
+                              bool set_open)
+{
+    CharDriverState *s;
+    int fe_open;
+
+    s = b->chr;
+    if (!s) {
+        return;
+    }
+
+    if (!opaque && !fd_can_read && !fd_read && !fd_event) {
+        fe_open = 0;
+        remove_fd_in_watch(s);
+    } else {
+        fe_open = 1;
+    }
+    b->chr_can_read = fd_can_read;
+    b->chr_read = fd_read;
+    b->chr_event = fd_event;
+    b->opaque = opaque;
+    if (s->chr_update_read_handler) {
+        s->chr_update_read_handler(s, context);
+    }
+
+    if (set_open) {
+        qemu_chr_fe_set_open(b, fe_open);
+    }
+
+    if (fe_open) {
+        qemu_chr_fe_take_focus(b);
+        /* We're connecting to an already opened device, so let's make sure we
+           also get the open event */
+        if (s->be_open) {
+            qemu_chr_be_generic_open(s);
+        }
+    }
+
+    if (s->is_mux) {
+        mux_chr_set_handlers(s, context);
+    }
+}
+
+void qemu_chr_fe_take_focus(CharBackend *b)
+{
+    if (!b->chr) {
+        return;
+    }
+
+    if (b->chr->is_mux) {
+        mux_set_focus(b->chr->opaque, b->tag);
+    }
+}
 
 typedef struct IOWatchPoll
 {
@@ -852,6 +1019,7 @@ typedef struct IOWatchPoll
     IOCanReadHandler *fd_can_read;
     GSourceFunc fd_read;
     void *opaque;
+    GMainContext *context;
 } IOWatchPoll;
 
 static IOWatchPoll *io_watch_poll_from_source(GSource *source)
@@ -859,7 +1027,8 @@ static IOWatchPoll *io_watch_poll_from_source(GSource *source)
     return container_of(source, IOWatchPoll, parent);
 }
 
-static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
+static gboolean io_watch_poll_prepare(GSource *source,
+                                      gint *timeout_)
 {
     IOWatchPoll *iwp = io_watch_poll_from_source(source);
     bool now_active = iwp->fd_can_read(iwp->opaque) > 0;
@@ -872,7 +1041,7 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_)
         iwp->src = qio_channel_create_watch(
             iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
         g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
-        g_source_attach(iwp->src, NULL);
+        g_source_attach(iwp->src, iwp->context);
     } else {
         g_source_destroy(iwp->src);
         g_source_unref(iwp->src);
@@ -916,22 +1085,31 @@ static GSourceFuncs io_watch_poll_funcs = {
 };
 
 /* Can only be used for read */
-static guint io_add_watch_poll(QIOChannel *ioc,
+static guint io_add_watch_poll(CharDriverState *chr,
+                               QIOChannel *ioc,
                                IOCanReadHandler *fd_can_read,
                                QIOChannelFunc fd_read,
-                               gpointer user_data)
+                               gpointer user_data,
+                               GMainContext *context)
 {
     IOWatchPoll *iwp;
     int tag;
+    char *name;
 
-    iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll));
+    iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs,
+                                       sizeof(IOWatchPoll));
     iwp->fd_can_read = fd_can_read;
     iwp->opaque = user_data;
     iwp->ioc = ioc;
     iwp->fd_read = (GSourceFunc) fd_read;
     iwp->src = NULL;
+    iwp->context = context;
 
-    tag = g_source_attach(&iwp->parent, NULL);
+    name = g_strdup_printf("chardev-iowatch-%s", chr->label);
+    g_source_set_name((GSource *)iwp, name);
+    g_free(name);
+
+    tag = g_source_attach(&iwp->parent, context);
     g_source_unref(&iwp->parent);
     return tag;
 }
@@ -1063,19 +1241,21 @@ static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond)
     return qio_channel_create_watch(s->ioc_out, cond);
 }
 
-static void fd_chr_update_read_handler(CharDriverState *chr)
+static void fd_chr_update_read_handler(CharDriverState *chr,
+                                       GMainContext *context)
 {
     FDCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
     if (s->ioc_in) {
-        chr->fd_in_tag = io_add_watch_poll(s->ioc_in,
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
                                            fd_chr_read_poll,
-                                           fd_chr_read, chr);
+                                           fd_chr_read, chr,
+                                           context);
     }
 }
 
-static void fd_chr_close(struct CharDriverState *chr)
+static void fd_chr_free(struct CharDriverState *chr)
 {
     FDCharDriver *s = chr->opaque;
 
@@ -1097,6 +1277,7 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out,
 {
     CharDriverState *chr;
     FDCharDriver *s;
+    char *name;
 
     chr = qemu_chr_alloc(backend, errp);
     if (!chr) {
@@ -1104,14 +1285,20 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out,
     }
     s = g_new0(FDCharDriver, 1);
     s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in));
+    name = g_strdup_printf("chardev-file-in-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_in), name);
+    g_free(name);
     s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out));
+    name = g_strdup_printf("chardev-file-out-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc_out), name);
+    g_free(name);
     qemu_set_nonblock(fd_out);
     s->chr = chr;
     chr->opaque = s;
     chr->chr_add_watch = fd_chr_add_watch;
     chr->chr_write = fd_chr_write;
     chr->chr_update_read_handler = fd_chr_update_read_handler;
-    chr->chr_close = fd_chr_close;
+    chr->chr_free = fd_chr_free;
 
     return chr;
 }
@@ -1119,6 +1306,7 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out,
 static CharDriverState *qemu_chr_open_pipe(const char *id,
                                            ChardevBackend *backend,
                                            ChardevReturn *ret,
+                                           bool *be_opened,
                                            Error **errp)
 {
     ChardevHostdev *opts = backend->u.pipe.data;
@@ -1192,15 +1380,16 @@ static void qemu_chr_set_echo_stdio(CharDriverState *chr, bool echo)
     tcsetattr (0, TCSANOW, &tty);
 }
 
-static void qemu_chr_close_stdio(struct CharDriverState *chr)
+static void qemu_chr_free_stdio(struct CharDriverState *chr)
 {
     term_exit();
-    fd_chr_close(chr);
+    fd_chr_free(chr);
 }
 
 static CharDriverState *qemu_chr_open_stdio(const char *id,
                                             ChardevBackend *backend,
                                             ChardevReturn *ret,
+                                            bool *be_opened,
                                             Error **errp)
 {
     ChardevStdio *opts = backend->u.stdio.data;
@@ -1229,12 +1418,15 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
     sigaction(SIGCONT, &act, NULL);
 
     chr = qemu_chr_open_fd(0, 1, common, errp);
-    chr->chr_close = qemu_chr_close_stdio;
+    if (!chr) {
+        return NULL;
+    }
+    chr->chr_free = qemu_chr_free_stdio;
     chr->chr_set_echo = qemu_chr_set_echo_stdio;
     if (opts->has_signal) {
         stdio_allow_signal = opts->signal;
     }
-    qemu_chr_fe_set_echo(chr, false);
+    qemu_chr_set_echo_stdio(chr, false);
 
     return chr;
 }
@@ -1279,6 +1471,7 @@ static gboolean pty_chr_timer(gpointer opaque)
 static void pty_chr_rearm_timer(CharDriverState *chr, int ms)
 {
     PtyCharDriver *s = chr->opaque;
+    char *name;
 
     if (s->timer_tag) {
         g_source_remove(s->timer_tag);
@@ -1286,10 +1479,14 @@ static void pty_chr_rearm_timer(CharDriverState *chr, int ms)
     }
 
     if (ms == 1000) {
+        name = g_strdup_printf("pty-timer-secs-%s", chr->label);
         s->timer_tag = g_timeout_add_seconds(1, pty_chr_timer, chr);
     } else {
+        name = g_strdup_printf("pty-timer-ms-%s", chr->label);
         s->timer_tag = g_timeout_add(ms, pty_chr_timer, chr);
     }
+    g_source_set_name_by_id(s->timer_tag, name);
+    g_free(name);
 }
 
 /* Called with chr_write_lock held.  */
@@ -1315,7 +1512,8 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr)
     }
 }
 
-static void pty_chr_update_read_handler(CharDriverState *chr)
+static void pty_chr_update_read_handler(CharDriverState *chr,
+                                        GMainContext *context)
 {
     qemu_mutex_lock(&chr->chr_write_lock);
     pty_chr_update_read_handler_locked(chr);
@@ -1417,14 +1615,15 @@ static void pty_chr_state(CharDriverState *chr, int connected)
             s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr);
         }
         if (!chr->fd_in_tag) {
-            chr->fd_in_tag = io_add_watch_poll(s->ioc,
+            chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
                                                pty_chr_read_poll,
-                                               pty_chr_read, chr);
+                                               pty_chr_read,
+                                               chr, NULL);
         }
     }
 }
 
-static void pty_chr_close(struct CharDriverState *chr)
+static void pty_chr_free(struct CharDriverState *chr)
 {
     PtyCharDriver *s = chr->opaque;
 
@@ -1443,6 +1642,7 @@ static void pty_chr_close(struct CharDriverState *chr)
 static CharDriverState *qemu_chr_open_pty(const char *id,
                                           ChardevBackend *backend,
                                           ChardevReturn *ret,
+                                          bool *be_opened,
                                           Error **errp)
 {
     CharDriverState *chr;
@@ -1450,6 +1650,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id,
     int master_fd, slave_fd;
     char pty_name[PATH_MAX];
     ChardevCommon *common = backend->u.pty.data;
+    char *name;
 
     master_fd = qemu_openpty_raw(&slave_fd, pty_name);
     if (master_fd < 0) {
@@ -1477,11 +1678,14 @@ static CharDriverState *qemu_chr_open_pty(const char *id,
     chr->opaque = s;
     chr->chr_write = pty_chr_write;
     chr->chr_update_read_handler = pty_chr_update_read_handler;
-    chr->chr_close = pty_chr_close;
+    chr->chr_free = pty_chr_free;
     chr->chr_add_watch = pty_chr_add_watch;
-    chr->explicit_be_open = true;
+    *be_opened = false;
 
     s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd));
+    name = g_strdup_printf("chardev-pty-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(s->ioc), name);
+    g_free(name);
     s->timer_tag = 0;
 
     return chr;
@@ -1672,21 +1876,25 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg)
     return 0;
 }
 
-static void qemu_chr_close_tty(CharDriverState *chr)
+static void qemu_chr_free_tty(CharDriverState *chr)
 {
-    fd_chr_close(chr);
+    fd_chr_free(chr);
 }
 
 static CharDriverState *qemu_chr_open_tty_fd(int fd,
                                              ChardevCommon *backend,
+                                             bool *be_opened,
                                              Error **errp)
 {
     CharDriverState *chr;
 
     tty_serial_init(fd, 115200, 'N', 8, 1);
     chr = qemu_chr_open_fd(fd, fd, backend, errp);
+    if (!chr) {
+        return NULL;
+    }
     chr->chr_ioctl = tty_serial_ioctl;
-    chr->chr_close = qemu_chr_close_tty;
+    chr->chr_free = qemu_chr_free_tty;
     return chr;
 }
 #endif /* __linux__ || __sun__ */
@@ -1792,7 +2000,7 @@ static int pp_ioctl(CharDriverState *chr, int cmd, void *arg)
     return 0;
 }
 
-static void pp_close(CharDriverState *chr)
+static void pp_free(CharDriverState *chr)
 {
     ParallelCharDriver *drv = chr->opaque;
     int fd = drv->fd;
@@ -1806,6 +2014,7 @@ static void pp_close(CharDriverState *chr)
 
 static CharDriverState *qemu_chr_open_pp_fd(int fd,
                                             ChardevCommon *backend,
+                                            bool *be_opened,
                                             Error **errp)
 {
     CharDriverState *chr;
@@ -1826,7 +2035,7 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd,
     chr->opaque = drv;
     chr->chr_write = null_chr_write;
     chr->chr_ioctl = pp_ioctl;
-    chr->chr_close = pp_close;
+    chr->chr_free = pp_free;
 
     drv->fd = fd;
     drv->mode = IEEE1284_MODE_COMPAT;
@@ -1878,6 +2087,7 @@ static int pp_ioctl(CharDriverState *chr, int cmd, void *arg)
 
 static CharDriverState *qemu_chr_open_pp_fd(int fd,
                                             ChardevCommon *backend,
+                                            bool *be_opened,
                                             Error **errp)
 {
     CharDriverState *chr;
@@ -1889,7 +2099,7 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd,
     chr->opaque = (void *)(intptr_t)fd;
     chr->chr_write = null_chr_write;
     chr->chr_ioctl = pp_ioctl;
-    chr->explicit_be_open = true;
+    *be_opened = false;
     return chr;
 }
 #endif
@@ -1925,7 +2135,7 @@ typedef struct {
 static int win_chr_poll(void *opaque);
 static int win_chr_pipe_poll(void *opaque);
 
-static void win_chr_close(CharDriverState *chr)
+static void win_chr_free(CharDriverState *chr)
 {
     WinCharState *s = chr->opaque;
 
@@ -2012,7 +2222,7 @@ static int win_chr_init(CharDriverState *chr, const char *filename, Error **errp
     return 0;
 
  fail:
-    win_chr_close(chr);
+    win_chr_free(chr);
     return -1;
 }
 
@@ -2124,7 +2334,7 @@ static CharDriverState *qemu_chr_open_win_path(const char *filename,
     s = g_new0(WinCharState, 1);
     chr->opaque = s;
     chr->chr_write = win_chr_write;
-    chr->chr_close = win_chr_close;
+    chr->chr_free = win_chr_free;
 
     if (win_chr_init(chr, filename, errp) < 0) {
         g_free(s);
@@ -2210,7 +2420,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
     return 0;
 
  fail:
-    win_chr_close(chr);
+    win_chr_free(chr);
     return -1;
 }
 
@@ -2218,6 +2428,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename,
 static CharDriverState *qemu_chr_open_pipe(const char *id,
                                            ChardevBackend *backend,
                                            ChardevReturn *ret,
+                                           bool *be_opened,
                                            Error **errp)
 {
     ChardevHostdev *opts = backend->u.pipe.data;
@@ -2233,7 +2444,7 @@ static CharDriverState *qemu_chr_open_pipe(const char *id,
     s = g_new0(WinCharState, 1);
     chr->opaque = s;
     chr->chr_write = win_chr_write;
-    chr->chr_close = win_chr_close;
+    chr->chr_free = win_chr_free;
 
     if (win_chr_pipe_init(chr, filename, errp) < 0) {
         g_free(s);
@@ -2264,6 +2475,7 @@ static CharDriverState *qemu_chr_open_win_file(HANDLE fd_out,
 static CharDriverState *qemu_chr_open_win_con(const char *id,
                                               ChardevBackend *backend,
                                               ChardevReturn *ret,
+                                              bool *be_opened,
                                               Error **errp)
 {
     ChardevCommon *common = backend->u.console.data;
@@ -2389,7 +2601,7 @@ static void qemu_chr_set_echo_win_stdio(CharDriverState *chr, bool echo)
     }
 }
 
-static void win_stdio_close(CharDriverState *chr)
+static void win_stdio_free(CharDriverState *chr)
 {
     WinStdioCharState *stdio = chr->opaque;
 
@@ -2404,12 +2616,12 @@ static void win_stdio_close(CharDriverState *chr)
     }
 
     g_free(chr->opaque);
-    g_free(chr);
 }
 
 static CharDriverState *qemu_chr_open_stdio(const char *id,
                                             ChardevBackend *backend,
                                             ChardevReturn *ret,
+                                            bool *be_opened,
                                             Error **errp)
 {
     CharDriverState   *chr;
@@ -2434,7 +2646,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
 
     chr->opaque    = stdio;
     chr->chr_write = win_stdio_write;
-    chr->chr_close = win_stdio_close;
+    chr->chr_free = win_stdio_free;
 
     if (is_console) {
         if (qemu_add_wait_object(stdio->hStdIn,
@@ -2477,7 +2689,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
     SetConsoleMode(stdio->hStdIn, dwMode);
 
     chr->chr_set_echo = qemu_chr_set_echo_win_stdio;
-    qemu_chr_fe_set_echo(chr, false);
+    qemu_chr_set_echo_win_stdio(chr, false);
 
     return chr;
 
@@ -2492,7 +2704,6 @@ static CharDriverState *qemu_chr_open_stdio(const char *id,
 }
 #endif /* !_WIN32 */
 
-
 /***********************************************************/
 /* UDP Net console */
 
@@ -2558,19 +2769,21 @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
     return TRUE;
 }
 
-static void udp_chr_update_read_handler(CharDriverState *chr)
+static void udp_chr_update_read_handler(CharDriverState *chr,
+                                        GMainContext *context)
 {
     NetCharDriver *s = chr->opaque;
 
     remove_fd_in_watch(chr);
     if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
                                            udp_chr_read_poll,
-                                           udp_chr_read, chr);
+                                           udp_chr_read, chr,
+                                           context);
     }
 }
 
-static void udp_chr_close(CharDriverState *chr)
+static void udp_chr_free(CharDriverState *chr)
 {
     NetCharDriver *s = chr->opaque;
 
@@ -2584,6 +2797,7 @@ static void udp_chr_close(CharDriverState *chr)
 
 static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc,
                                           ChardevCommon *backend,
+                                          bool *be_opened,
                                           Error **errp)
 {
     CharDriverState *chr = NULL;
@@ -2601,9 +2815,9 @@ static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc,
     chr->opaque = s;
     chr->chr_write = udp_chr_write;
     chr->chr_update_read_handler = udp_chr_update_read_handler;
-    chr->chr_close = udp_chr_close;
+    chr->chr_free = udp_chr_free;
     /* be isn't opened until we get a connection */
-    chr->explicit_be_open = true;
+    *be_opened = false;
     return chr;
 }
 
@@ -2640,9 +2854,13 @@ static gboolean socket_reconnect_timeout(gpointer opaque);
 static void qemu_chr_socket_restart_timer(CharDriverState *chr)
 {
     TCPCharDriver *s = chr->opaque;
+    char *name;
     assert(s->connected == 0);
     s->reconnect_timer = g_timeout_add_seconds(s->reconnect_time,
                                                socket_reconnect_timeout, chr);
+    name = g_strdup_printf("chardev-socket-reconnect-%s", chr->label);
+    g_source_set_name_by_id(s->reconnect_timer, name);
+    g_free(name);
 }
 
 static void check_report_connect_error(CharDriverState *chr,
@@ -2967,14 +3185,16 @@ static void tcp_chr_connect(void *opaque)
 
     s->connected = 1;
     if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
                                            tcp_chr_read_poll,
-                                           tcp_chr_read, chr);
+                                           tcp_chr_read,
+                                           chr, NULL);
     }
     qemu_chr_be_generic_open(chr);
 }
 
-static void tcp_chr_update_read_handler(CharDriverState *chr)
+static void tcp_chr_update_read_handler(CharDriverState *chr,
+                                        GMainContext *context)
 {
     TCPCharDriver *s = chr->opaque;
 
@@ -2984,9 +3204,10 @@ static void tcp_chr_update_read_handler(CharDriverState *chr)
 
     remove_fd_in_watch(chr);
     if (s->ioc) {
-        chr->fd_in_tag = io_add_watch_poll(s->ioc,
+        chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
                                            tcp_chr_read_poll,
-                                           tcp_chr_read, chr);
+                                           tcp_chr_read, chr,
+                                           context);
     }
 }
 
@@ -3081,6 +3302,7 @@ static void tcp_chr_tls_init(CharDriverState *chr)
     TCPCharDriver *s = chr->opaque;
     QIOChannelTLS *tioc;
     Error *err = NULL;
+    gchar *name;
 
     if (s->is_listen) {
         tioc = qio_channel_tls_new_server(
@@ -3096,7 +3318,13 @@ static void tcp_chr_tls_init(CharDriverState *chr)
     if (tioc == NULL) {
         error_free(err);
         tcp_chr_disconnect(chr);
+        return;
     }
+    name = g_strdup_printf("chardev-tls-%s-%s",
+                           s->is_listen ? "server" : "client",
+                           chr->label);
+    qio_channel_set_name(QIO_CHANNEL(tioc), name);
+    g_free(name);
     object_unref(OBJECT(s->ioc));
     s->ioc = QIO_CHANNEL(tioc);
 
@@ -3107,6 +3335,19 @@ static void tcp_chr_tls_init(CharDriverState *chr)
 }
 
 
+static void tcp_chr_set_client_ioc_name(CharDriverState *chr,
+                                        QIOChannelSocket *sioc)
+{
+    TCPCharDriver *s = chr->opaque;
+    char *name;
+    name = g_strdup_printf("chardev-tcp-%s-%s",
+                           s->is_listen ? "server" : "client",
+                           chr->label);
+    qio_channel_set_name(QIO_CHANNEL(sioc), name);
+    g_free(name);
+
+}
+
 static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc)
 {
     TCPCharDriver *s = chr->opaque;
@@ -3152,6 +3393,7 @@ static int tcp_chr_add_client(CharDriverState *chr, int fd)
     if (!sioc) {
         return -1;
     }
+    tcp_chr_set_client_ioc_name(chr, sioc);
     ret = tcp_chr_new_client(chr, sioc);
     object_unref(OBJECT(sioc));
     return ret;
@@ -3291,6 +3533,7 @@ static int tcp_chr_wait_connected(CharDriverState *chr, Error **errp)
             qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL);
         } else {
             sioc = qio_channel_socket_new();
+            tcp_chr_set_client_ioc_name(chr, sioc);
             if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) {
                 object_unref(OBJECT(sioc));
                 return -1;
@@ -3303,7 +3546,7 @@ static int tcp_chr_wait_connected(CharDriverState *chr, Error **errp)
     return 0;
 }
 
-int qemu_chr_wait_connected(CharDriverState *chr, Error **errp)
+static int qemu_chr_wait_connected(CharDriverState *chr, Error **errp)
 {
     if (chr->chr_wait_connected) {
         return chr->chr_wait_connected(chr, errp);
@@ -3312,7 +3555,17 @@ int qemu_chr_wait_connected(CharDriverState *chr, Error **errp)
     return 0;
 }
 
-static void tcp_chr_close(CharDriverState *chr)
+int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp)
+{
+    if (!be->chr) {
+        error_setg(errp, "missing associated backend");
+        return -1;
+    }
+
+    return qemu_chr_wait_connected(be->chr, errp);
+}
+
+static void tcp_chr_free(CharDriverState *chr)
 {
     TCPCharDriver *s = chr->opaque;
 
@@ -3390,7 +3643,7 @@ static int ringbuf_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
         }
     }
 
-    return 0;
+    return len;
 }
 
 static int ringbuf_chr_read(CharDriverState *chr, uint8_t *buf, int len)
@@ -3407,7 +3660,7 @@ static int ringbuf_chr_read(CharDriverState *chr, uint8_t *buf, int len)
     return i;
 }
 
-static void ringbuf_chr_close(struct CharDriverState *chr)
+static void ringbuf_chr_free(struct CharDriverState *chr)
 {
     RingBufCharDriver *d = chr->opaque;
 
@@ -3419,6 +3672,7 @@ static void ringbuf_chr_close(struct CharDriverState *chr)
 static CharDriverState *qemu_chr_open_ringbuf(const char *id,
                                               ChardevBackend *backend,
                                               ChardevReturn *ret,
+                                              bool *be_opened,
                                               Error **errp)
 {
     ChardevRingbuf *opts = backend->u.ringbuf.data;
@@ -3446,7 +3700,7 @@ static CharDriverState *qemu_chr_open_ringbuf(const char *id,
 
     chr->opaque = d;
     chr->chr_write = ringbuf_chr_write;
-    chr->chr_close = ringbuf_chr_close;
+    chr->chr_free = ringbuf_chr_free;
 
     return chr;
 
@@ -3947,17 +4201,14 @@ static void qemu_chr_parse_udp(QemuOpts *opts, ChardevBackend *backend,
 typedef struct CharDriver {
     const char *name;
     ChardevBackendKind kind;
-    void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp);
-    CharDriverState *(*create)(const char *id, ChardevBackend *backend,
-                               ChardevReturn *ret, Error **errp);
+    CharDriverParse *parse;
+    CharDriverCreate *create;
 } CharDriver;
 
 static GSList *backends;
 
 void register_char_driver(const char *name, ChardevBackendKind kind,
-        void (*parse)(QemuOpts *opts, ChardevBackend *backend, Error **errp),
-        CharDriverState *(*create)(const char *id, ChardevBackend *backend,
-                                   ChardevReturn *ret, Error **errp))
+                          CharDriverParse *parse, CharDriverCreate *create)
 {
     CharDriver *s;
 
@@ -3971,8 +4222,7 @@ void register_char_driver(const char *name, ChardevBackendKind kind,
 }
 
 CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
-                                    void (*init)(struct CharDriverState *s),
-                                    Error **errp)
+                                        Error **errp)
 {
     Error *local_err = NULL;
     CharDriver *cd;
@@ -3983,16 +4233,26 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
     const char *id = qemu_opts_id(opts);
     char *bid = NULL;
 
-    if (id == NULL) {
-        error_setg(errp, "chardev: no id specified");
-        goto err;
-    }
-
     if (qemu_opt_get(opts, "backend") == NULL) {
         error_setg(errp, "chardev: \"%s\" missing backend",
                    qemu_opts_id(opts));
         goto err;
     }
+
+    if (is_help_option(qemu_opt_get(opts, "backend"))) {
+        fprintf(stderr, "Available chardev backend types:\n");
+        for (i = backends; i; i = i->next) {
+            cd = i->data;
+            fprintf(stderr, "%s\n", cd->name);
+        }
+        exit(!is_help_option(qemu_opt_get(opts, "backend")));
+    }
+
+    if (id == NULL) {
+        error_setg(errp, "chardev: no id specified");
+        goto err;
+    }
+
     for (i = backends; i; i = i->next) {
         cd = i->data;
 
@@ -4048,7 +4308,6 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
     }
 
     chr = qemu_chr_find(id);
-    chr->opts = opts;
 
 qapi_out:
     qapi_free_ChardevBackend(backend);
@@ -4057,12 +4316,10 @@ CharDriverState *qemu_chr_new_from_opts(QemuOpts *opts,
     return chr;
 
 err:
-    qemu_opts_del(opts);
     return NULL;
 }
 
-CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename,
-                                       void (*init)(struct CharDriverState *s))
+CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename)
 {
     const char *p;
     CharDriverState *chr;
@@ -4077,21 +4334,21 @@ CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename,
     if (!opts)
         return NULL;
 
-    chr = qemu_chr_new_from_opts(opts, init, &err);
+    chr = qemu_chr_new_from_opts(opts, &err);
     if (err) {
         error_report_err(err);
     }
     if (chr && qemu_opt_get_bool(opts, "mux", 0)) {
-        qemu_chr_fe_claim_no_fail(chr);
         monitor_init(chr, MONITOR_USE_READLINE);
     }
+    qemu_opts_del(opts);
     return chr;
 }
 
-CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*init)(struct CharDriverState *s))
+CharDriverState *qemu_chr_new(const char *label, const char *filename)
 {
     CharDriverState *chr;
-    chr = qemu_chr_new_noreplay(label, filename, init);
+    chr = qemu_chr_new_noreplay(label, filename);
     if (chr) {
         chr->replay = replay_mode != REPLAY_MODE_NONE;
         if (chr->replay && chr->chr_ioctl) {
@@ -4103,38 +4360,40 @@ CharDriverState *qemu_chr_new(const char *label, const char *filename, void (*in
     return chr;
 }
 
-void qemu_chr_fe_set_echo(struct CharDriverState *chr, bool echo)
+void qemu_chr_fe_set_echo(CharBackend *be, bool echo)
 {
-    if (chr->chr_set_echo) {
+    CharDriverState *chr = be->chr;
+
+    if (chr && chr->chr_set_echo) {
         chr->chr_set_echo(chr, echo);
     }
 }
 
-void qemu_chr_fe_set_open(struct CharDriverState *chr, int fe_open)
+void qemu_chr_fe_set_open(CharBackend *be, int fe_open)
 {
-    if (chr->fe_open == fe_open) {
+    CharDriverState *chr = be->chr;
+
+    if (!chr) {
         return;
     }
-    chr->fe_open = fe_open;
+
+    if (be->fe_open == fe_open) {
+        return;
+    }
+    be->fe_open = fe_open;
     if (chr->chr_set_fe_open) {
         chr->chr_set_fe_open(chr, fe_open);
     }
 }
 
-void qemu_chr_fe_event(struct CharDriverState *chr, int event)
-{
-    if (chr->chr_fe_event) {
-        chr->chr_fe_event(chr, event);
-    }
-}
-
-guint qemu_chr_fe_add_watch(CharDriverState *s, GIOCondition cond,
+guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
                             GIOFunc func, void *user_data)
 {
+    CharDriverState *s = be->chr;
     GSource *src;
     guint tag;
 
-    if (s->chr_add_watch == NULL) {
+    if (!s || s->chr_add_watch == NULL) {
         return 0;
     }
 
@@ -4150,41 +4409,22 @@ guint qemu_chr_fe_add_watch(CharDriverState *s, GIOCondition cond,
     return tag;
 }
 
-int qemu_chr_fe_claim(CharDriverState *s)
+void qemu_chr_fe_disconnect(CharBackend *be)
 {
-    if (s->avail_connections < 1) {
-        return -1;
-    }
-    s->avail_connections--;
-    return 0;
-}
-
-void qemu_chr_fe_claim_no_fail(CharDriverState *s)
-{
-    if (qemu_chr_fe_claim(s) != 0) {
-        fprintf(stderr, "%s: error chardev \"%s\" already used\n",
-                __func__, s->label);
-        exit(1);
-    }
-}
+    CharDriverState *chr = be->chr;
 
-void qemu_chr_fe_release(CharDriverState *s)
-{
-    s->avail_connections++;
-}
-
-void qemu_chr_disconnect(CharDriverState *chr)
-{
-    if (chr->chr_disconnect) {
+    if (chr && chr->chr_disconnect) {
         chr->chr_disconnect(chr);
     }
 }
 
 static void qemu_chr_free_common(CharDriverState *chr)
 {
+    if (chr->be) {
+        chr->be->chr = NULL;
+    }
     g_free(chr->filename);
     g_free(chr->label);
-    qemu_opts_del(chr->opts);
     if (chr->logfd != -1) {
         close(chr->logfd);
     }
@@ -4194,8 +4434,8 @@ static void qemu_chr_free_common(CharDriverState *chr)
 
 void qemu_chr_free(CharDriverState *chr)
 {
-    if (chr->chr_close) {
-        chr->chr_close(chr);
+    if (chr->chr_free) {
+        chr->chr_free(chr);
     }
     qemu_chr_free_common(chr);
 }
@@ -4216,7 +4456,7 @@ ChardevInfoList *qmp_query_chardev(Error **errp)
         info->value = g_malloc0(sizeof(*info->value));
         info->value->label = g_strdup(chr->label);
         info->value->filename = g_strdup(chr->filename);
-        info->value->frontend_open = chr->fe_open;
+        info->value->frontend_open = chr->be && chr->be->fe_open;
 
         info->next = chr_list;
         chr_list = info;
@@ -4355,6 +4595,7 @@ QemuOptsList qemu_chardev_opts = {
 static CharDriverState *qmp_chardev_open_file(const char *id,
                                               ChardevBackend *backend,
                                               ChardevReturn *ret,
+                                              bool *be_opened,
                                               Error **errp)
 {
     ChardevFile *file = backend->u.file.data;
@@ -4390,6 +4631,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id,
 static CharDriverState *qmp_chardev_open_serial(const char *id,
                                                 ChardevBackend *backend,
                                                 ChardevReturn *ret,
+                                                bool *be_opened,
                                                 Error **errp)
 {
     ChardevHostdev *serial = backend->u.serial.data;
@@ -4414,6 +4656,7 @@ static int qmp_chardev_open_file_source(char *src, int flags,
 static CharDriverState *qmp_chardev_open_file(const char *id,
                                               ChardevBackend *backend,
                                               ChardevReturn *ret,
+                                              bool *be_opened,
                                               Error **errp)
 {
     ChardevFile *file = backend->u.file.data;
@@ -4448,6 +4691,7 @@ static CharDriverState *qmp_chardev_open_file(const char *id,
 static CharDriverState *qmp_chardev_open_serial(const char *id,
                                                 ChardevBackend *backend,
                                                 ChardevReturn *ret,
+                                                bool *be_opened,
                                                 Error **errp)
 {
     ChardevHostdev *serial = backend->u.serial.data;
@@ -4459,7 +4703,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id,
         return NULL;
     }
     qemu_set_nonblock(fd);
-    return qemu_chr_open_tty_fd(fd, common, errp);
+    return qemu_chr_open_tty_fd(fd, common, be_opened, errp);
 }
 #endif
 
@@ -4467,6 +4711,7 @@ static CharDriverState *qmp_chardev_open_serial(const char *id,
 static CharDriverState *qmp_chardev_open_parallel(const char *id,
                                                   ChardevBackend *backend,
                                                   ChardevReturn *ret,
+                                                  bool *be_opened,
                                                   Error **errp)
 {
     ChardevHostdev *parallel = backend->u.parallel.data;
@@ -4477,7 +4722,7 @@ static CharDriverState *qmp_chardev_open_parallel(const char *id,
     if (fd < 0) {
         return NULL;
     }
-    return qemu_chr_open_pp_fd(fd, common, errp);
+    return qemu_chr_open_pp_fd(fd, common, be_opened, errp);
 }
 #endif
 
@@ -4496,6 +4741,7 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
     }
 
     sioc = qio_channel_socket_new();
+    tcp_chr_set_client_ioc_name(chr, sioc);
     qio_channel_socket_connect_async(sioc, s->addr,
                                      qemu_chr_socket_connected,
                                      chr, NULL);
@@ -4506,6 +4752,7 @@ static gboolean socket_reconnect_timeout(gpointer opaque)
 static CharDriverState *qmp_chardev_open_socket(const char *id,
                                                 ChardevBackend *backend,
                                                 ChardevReturn *ret,
+                                                bool *be_opened,
                                                 Error **errp)
 {
     CharDriverState *chr;
@@ -4565,11 +4812,16 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
 
     s->addr = QAPI_CLONE(SocketAddress, sock->addr);
 
+    qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_RECONNECTABLE);
+    if (s->is_unix) {
+        qemu_chr_set_feature(chr, QEMU_CHAR_FEATURE_FD_PASS);
+    }
+
     chr->opaque = s;
     chr->chr_wait_connected = tcp_chr_wait_connected;
     chr->chr_write = tcp_chr_write;
     chr->chr_sync_read = tcp_chr_sync_read;
-    chr->chr_close = tcp_chr_close;
+    chr->chr_free = tcp_chr_free;
     chr->chr_disconnect = tcp_chr_disconnect;
     chr->get_msgfds = tcp_get_msgfds;
     chr->set_msgfds = tcp_set_msgfds;
@@ -4577,7 +4829,7 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
     chr->chr_add_watch = tcp_chr_add_watch;
     chr->chr_update_read_handler = tcp_chr_update_read_handler;
     /* be isn't opened until we get a connection */
-    chr->explicit_be_open = true;
+    *be_opened = false;
 
     chr->filename = SocketAddress_to_str("disconnected:",
                                          addr, is_listen, is_telnet);
@@ -4592,12 +4844,19 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
 
     if (s->reconnect_time) {
         sioc = qio_channel_socket_new();
+        tcp_chr_set_client_ioc_name(chr, sioc);
         qio_channel_socket_connect_async(sioc, s->addr,
                                          qemu_chr_socket_connected,
                                          chr, NULL);
     } else {
         if (s->is_listen) {
+            char *name;
             sioc = qio_channel_socket_new();
+
+            name = g_strdup_printf("chardev-tcp-listener-%s", chr->label);
+            qio_channel_set_name(QIO_CHANNEL(sioc), name);
+            g_free(name);
+
             if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) {
                 goto error;
             }
@@ -4633,11 +4892,14 @@ static CharDriverState *qmp_chardev_open_socket(const char *id,
 static CharDriverState *qmp_chardev_open_udp(const char *id,
                                              ChardevBackend *backend,
                                              ChardevReturn *ret,
+                                             bool *be_opened,
                                              Error **errp)
 {
     ChardevUdp *udp = backend->u.udp.data;
     ChardevCommon *common = qapi_ChardevUdp_base(udp);
     QIOChannelSocket *sioc = qio_channel_socket_new();
+    char *name;
+    CharDriverState *chr;
 
     if (qio_channel_socket_dgram_sync(sioc,
                                       udp->local, udp->remote,
@@ -4645,7 +4907,26 @@ static CharDriverState *qmp_chardev_open_udp(const char *id,
         object_unref(OBJECT(sioc));
         return NULL;
     }
-    return qemu_chr_open_udp(sioc, common, errp);
+    chr = qemu_chr_open_udp(sioc, common, be_opened, errp);
+
+    name = g_strdup_printf("chardev-udp-%s", chr->label);
+    qio_channel_set_name(QIO_CHANNEL(sioc), name);
+    g_free(name);
+
+    return chr;
+}
+
+
+bool qemu_chr_has_feature(CharDriverState *chr,
+                          CharDriverFeature feature)
+{
+    return test_bit(feature, chr->features);
+}
+
+void qemu_chr_set_feature(CharDriverState *chr,
+                           CharDriverFeature feature)
+{
+    return set_bit(feature, chr->features);
 }
 
 ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
@@ -4656,19 +4937,19 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
     Error *local_err = NULL;
     GSList *i;
     CharDriver *cd;
+    bool be_opened = true;
 
     chr = qemu_chr_find(id);
     if (chr) {
         error_setg(errp, "Chardev '%s' already exists", id);
-        g_free(ret);
-        return NULL;
+        goto out_error;
     }
 
     for (i = backends; i; i = i->next) {
         cd = i->data;
 
         if (cd->kind == backend->type) {
-            chr = cd->create(id, backend, ret, &local_err);
+            chr = cd->create(id, backend, ret, &be_opened, &local_err);
             if (local_err) {
                 error_propagate(errp, local_err);
                 goto out_error;
@@ -4684,12 +4965,10 @@ ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend,
     }
 
     chr->label = g_strdup(id);
-    chr->avail_connections =
-        (backend->type == CHARDEV_BACKEND_KIND_MUX) ? MAX_MUX : 1;
     if (!chr->filename) {
         chr->filename = g_strdup(ChardevBackendKind_lookup[backend->type]);
     }
-    if (!chr->explicit_be_open) {
+    if (be_opened) {
         qemu_chr_be_event(chr, CHR_EVENT_OPENED);
     }
     QTAILQ_INSERT_TAIL(&chardevs, chr, next);
@@ -4709,8 +4988,7 @@ void qmp_chardev_remove(const char *id, Error **errp)
         error_setg(errp, "Chardev '%s' not found", id);
         return;
     }
-    if (chr->chr_can_read || chr->chr_read ||
-        chr->chr_event || chr->handler_opaque) {
+    if (qemu_chr_is_busy(chr)) {
         error_setg(errp, "Chardev '%s' is busy", id);
         return;
     }
diff --git a/qemu-doc.texi b/qemu-doc.texi
index b92c374de1..c7bd7b3f78 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -32,11 +32,10 @@
 
 @menu
 * Introduction::
-* Installation::
 * QEMU PC System emulator::
 * QEMU System emulator for non PC targets::
 * QEMU User space emulator::
-* compilation:: Compilation from the sources
+* Implementation notes::
 * License::
 * Index::
 @end menu
@@ -57,98 +56,69 @@
 QEMU is a FAST! processor emulator using dynamic translation to
 achieve good emulation speed.
 
+@cindex operating modes
 QEMU has two operating modes:
 
 @itemize
-@cindex operating modes
-
-@item
 @cindex system emulation
-Full system emulation. In this mode, QEMU emulates a full system (for
+@item Full system emulation. In this mode, QEMU emulates a full system (for
 example a PC), including one or several processors and various
 peripherals. It can be used to launch different Operating Systems
 without rebooting the PC or to debug system code.
 
-@item
 @cindex user mode emulation
-User mode emulation. In this mode, QEMU can launch
+@item User mode emulation. In this mode, QEMU can launch
 processes compiled for one CPU on another CPU. It can be used to
 launch the Wine Windows API emulator (@url{http://www.winehq.org}) or
 to ease cross-compilation and cross-debugging.
 
 @end itemize
 
-QEMU can run without a host kernel driver and yet gives acceptable
-performance.
+QEMU has the following features:
 
-For system emulation, the following hardware targets are supported:
 @itemize
-@cindex emulated target systems
-@cindex supported target systems
-@item PC (x86 or x86_64 processor)
-@item ISA PC (old style PC without PCI bus)
-@item PREP (PowerPC processor)
-@item G3 Beige PowerMac (PowerPC processor)
-@item Mac99 PowerMac (PowerPC processor, in progress)
-@item Sun4m/Sun4c/Sun4d (32-bit Sparc processor)
-@item Sun4u/Sun4v (64-bit Sparc processor, in progress)
-@item Malta board (32-bit and 64-bit MIPS processors)
-@item MIPS Magnum (64-bit MIPS processor)
-@item ARM Integrator/CP (ARM)
-@item ARM Versatile baseboard (ARM)
-@item ARM RealView Emulation/Platform baseboard (ARM)
-@item Spitz, Akita, Borzoi, Terrier and Tosa PDAs (PXA270 processor)
-@item Luminary Micro LM3S811EVB (ARM Cortex-M3)
-@item Luminary Micro LM3S6965EVB (ARM Cortex-M3)
-@item Freescale MCF5208EVB (ColdFire V2).
-@item Arnewsh MCF5206 evaluation board (ColdFire V2).
-@item Palm Tungsten|E PDA (OMAP310 processor)
-@item N800 and N810 tablets (OMAP2420 processor)
-@item MusicPal (MV88W8618 ARM processor)
-@item Gumstix "Connex" and "Verdex" motherboards (PXA255/270).
-@item Siemens SX1 smartphone (OMAP310 processor)
-@item AXIS-Devboard88 (CRISv32 ETRAX-FS).
-@item Petalogix Spartan 3aDSP1800 MMU ref design (MicroBlaze).
-@item Avnet LX60/LX110/LX200 boards (Xtensa)
-@end itemize
+@item QEMU can run without a host kernel driver and yet gives acceptable
+performance.  It uses dynamic translation to native code for reasonable speed,
+with support for self-modifying code and precise exceptions.
+
+@item It is portable to several operating systems (GNU/Linux, *BSD, Mac OS X,
+Windows) and architectures.
 
-@cindex supported user mode targets
-For user emulation, x86 (32 and 64 bit), PowerPC (32 and 64 bit),
-ARM, MIPS (32 bit only), Sparc (32 and 64 bit),
-Alpha, ColdFire(m68k), CRISv32 and MicroBlaze CPUs are supported.
+@item It performs accurate software emulation of the FPU.
+@end itemize
 
-@node Installation
-@chapter Installation
+QEMU user mode emulation has the following features:
+@itemize
+@item Generic Linux system call converter, including most ioctls.
 
-If you want to compile QEMU yourself, see @ref{compilation}.
+@item clone() emulation using native CPU clone() to use Linux scheduler for threads.
 
-@menu
-* install_linux::   Linux
-* install_windows:: Windows
-* install_mac::     Macintosh
-@end menu
+@item Accurate signal handling by remapping host signals to target signals.
+@end itemize
 
-@node install_linux
-@section Linux
-@cindex installation (Linux)
+QEMU full system emulation has the following features:
+@itemize
+@item
+QEMU uses a full software MMU for maximum portability.
 
-If a precompiled package is available for your distribution - you just
-have to install it. Otherwise, see @ref{compilation}.
+@item
+QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators 
+execute most of the guest code natively, while
+continuing to emulate the rest of the machine.
 
-@node install_windows
-@section Windows
-@cindex installation (Windows)
+@item
+Various hardware devices can be emulated and in some cases, host
+devices (e.g. serial and parallel ports, USB, drives) can be used
+transparently by the guest Operating System. Host device passthrough
+can be used for talking to external physical peripherals (e.g. a
+webcam, modem or tape drive).
 
-Download the experimental binary installer at
-@url{http://www.free.oszoo.org/@/download.html}.
-TODO (no longer available)
+@item
+Symmetric multiprocessing (SMP) support.  Currently, an in-kernel
+accelerator is required to use more than one host CPU for emulation.
 
-@node install_mac
-@section Mac OS X
+@end itemize
 
-Download the experimental binary installer at
-@url{http://www.free.oszoo.org/@/download.html}.
-TODO (no longer available)
 
 @node QEMU PC System emulator
 @chapter QEMU PC System emulator
@@ -1071,35 +1041,55 @@ GlusterFS is an user space distributed file system.
 
 You can boot from the GlusterFS disk image with the command:
 @example
-qemu-system-x86_64 -drive file=gluster[+@var{transport}]://[@var{server}[:@var{port}]]/@var{volname}/@var{image}[?socket=...]
+URI:
+qemu-system-x86_64 -drive file=gluster[+@var{type}]://[@var{host}[:@var{port}]]/@var{volume}/@var{path}
+                               [?socket=...][,file.debug=9][,file.logfile=...]
+
+JSON:
+qemu-system-x86_64 'json:@{"driver":"qcow2",
+                           "file":@{"driver":"gluster",
+                                    "volume":"testvol","path":"a.img","debug":9,"logfile":"...",
+                                    "server":[@{"type":"tcp","host":"...","port":"..."@},
+                                              @{"type":"unix","socket":"..."@}]@}@}'
 @end example
 
 @var{gluster} is the protocol.
 
-@var{transport} specifies the transport type used to connect to gluster
+@var{type} specifies the transport type used to connect to gluster
 management daemon (glusterd). Valid transport types are
-tcp, unix and rdma. If a transport type isn't specified, then tcp
-type is assumed.
+tcp and unix. In the URI form, if a transport type isn't specified,
+then tcp type is assumed.
 
-@var{server} specifies the server where the volume file specification for
-the given volume resides. This can be either hostname, ipv4 address
-or ipv6 address. ipv6 address needs to be within square brackets [ ].
-If transport type is unix, then @var{server} field should not be specified.
+@var{host} specifies the server where the volume file specification for
+the given volume resides. This can be either a hostname or an ipv4 address.
+If transport type is unix, then @var{host} field should not be specified.
 Instead @var{socket} field needs to be populated with the path to unix domain
 socket.
 
 @var{port} is the port number on which glusterd is listening. This is optional
-and if not specified, QEMU will send 0 which will make gluster to use the
-default port. If the transport type is unix, then @var{port} should not be
-specified.
+and if not specified, it defaults to port 24007. If the transport type is unix,
+then @var{port} should not be specified.
+
+@var{volume} is the name of the gluster volume which contains the disk image.
+
+@var{path} is the path to the actual disk image that resides on gluster volume.
+
+@var{debug} is the logging level of the gluster protocol driver. Debug levels
+are 0-9, with 9 being the most verbose, and 0 representing no debugging output.
+The default level is 4. The current logging levels defined in the gluster source
+are 0 - None, 1 - Emergency, 2 - Alert, 3 - Critical, 4 - Error, 5 - Warning,
+6 - Notice, 7 - Info, 8 - Debug, 9 - Trace
+
+@var{logfile} is a commandline option to mention log file path which helps in
+logging to the specified file and also help in persisting the gfapi logs. The
+default is stderr.
+
 
-@var{volname} is the name of the gluster volume which contains the disk image.
 
-@var{image} is the path to the actual disk image that resides on gluster volume.
 
 You can create a GlusterFS disk image with the command:
 @example
-qemu-img create gluster://@var{server}/@var{volname}/@var{image} @var{size}
+qemu-img create gluster://@var{host}/@var{volume}/@var{path} @var{size}
 @end example
 
 Examples
@@ -1112,6 +1102,17 @@ qemu-system-x86_64 -drive file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir
 qemu-system-x86_64 -drive file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
 qemu-system-x86_64 -drive file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
 qemu-system-x86_64 -drive file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+qemu-system-x86_64 -drive file=gluster://1.2.3.4/testvol/a.img,file.debug=9,file.logfile=/var/log/qemu-gluster.log
+qemu-system-x86_64 'json:@{"driver":"qcow2",
+                           "file":@{"driver":"gluster",
+                                    "volume":"testvol","path":"a.img",
+                                    "debug":9,"logfile":"/var/log/qemu-gluster.log",
+                                    "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@},
+                                              @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}'
+qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img,
+                                       file.debug=9,file.logfile=/var/log/qemu-gluster.log,
+                                       file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007,
+                                       file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket
 @end example
 
 @node disk_images_ssh
@@ -2781,6 +2782,7 @@ so should only be used with trusted guest OS.
 
 @menu
 * Supported Operating Systems ::
+* Features::
 * Linux User space emulator::
 * BSD User space emulator ::
 @end menu
@@ -2797,6 +2799,39 @@ Linux (referred as qemu-linux-user)
 BSD (referred as qemu-bsd-user)
 @end itemize
 
+@node Features
+@section Features
+
+QEMU user space emulation has the following notable features:
+
+@table @strong
+@item System call translation:
+QEMU includes a generic system call translator.  This means that
+the parameters of the system calls can be converted to fix
+endianness and 32/64-bit mismatches between hosts and targets.
+IOCTLs can be converted too.
+
+@item POSIX signal handling:
+QEMU can redirect to the running program all signals coming from
+the host (such as @code{SIGALRM}), as well as synthesize signals from
+virtual CPU exceptions (for example @code{SIGFPE} when the program
+executes a division by zero).
+
+QEMU relies on the host kernel to emulate most signal system
+calls, for example to emulate the signal mask.  On Linux, QEMU
+supports both normal and real-time signals.
+
+@item Threading:
+On Linux, QEMU can emulate the @code{clone} syscall and create a real
+host thread (with a separate virtual CPU) for each emulated thread.
+Note that not all targets currently emulate atomic operations correctly.
+x86 and ARM use a global lock in order to preserve their semantics.
+@end table
+
+QEMU was conceived so that ultimately it can emulate itself. Although
+it is not very useful, it is an important test to show the power of the
+emulator.
+
 @node Linux User space emulator
 @section Linux User space emulator
 
@@ -3066,220 +3101,8 @@ Act as if the host page size was 'pagesize' bytes
 Run the emulation in single step mode.
 @end table
 
-@node compilation
-@chapter Compilation from the sources
-
-@menu
-* Linux/Unix::
-* Windows::
-* Cross compilation for Windows with Linux::
-* Mac OS X::
-* Make targets::
-@end menu
-
-@node Linux/Unix
-@section Linux/Unix
-
-@subsection Compilation
-
-First you must decompress the sources:
-@example
-cd /tmp
-tar zxvf qemu-x.y.z.tar.gz
-cd qemu-x.y.z
-@end example
-
-Then you configure QEMU and build it (usually no options are needed):
-@example
-./configure
-make
-@end example
-
-Then type as root user:
-@example
-make install
-@end example
-to install QEMU in @file{/usr/local}.
-
-@node Windows
-@section Windows
-
-@itemize
-@item Install the current versions of MSYS and MinGW from
-@url{http://www.mingw.org/}. You can find detailed installation
-instructions in the download section and the FAQ.
-
-@item Download
-the MinGW development library of SDL 1.2.x
-(@file{SDL-devel-1.2.x-@/mingw32.tar.gz}) from
-@url{http://www.libsdl.org}. Unpack it in a temporary place and
-edit the @file{sdl-config} script so that it gives the
-correct SDL directory when invoked.
-
-@item Install the MinGW version of zlib and make sure
-@file{zlib.h} and @file{libz.dll.a} are in
-MinGW's default header and linker search paths.
-
-@item Extract the current version of QEMU.
-
-@item Start the MSYS shell (file @file{msys.bat}).
-
-@item Change to the QEMU directory. Launch @file{./configure} and
-@file{make}.  If you have problems using SDL, verify that
-@file{sdl-config} can be launched from the MSYS command line.
-
-@item You can install QEMU in @file{Program Files/QEMU} by typing
-@file{make install}. Don't forget to copy @file{SDL.dll} in
-@file{Program Files/QEMU}.
-
-@end itemize
-
-@node Cross compilation for Windows with Linux
-@section Cross compilation for Windows with Linux
 
-@itemize
-@item
-Install the MinGW cross compilation tools available at
-@url{http://www.mingw.org/}.
-
-@item Download
-the MinGW development library of SDL 1.2.x
-(@file{SDL-devel-1.2.x-@/mingw32.tar.gz}) from
-@url{http://www.libsdl.org}. Unpack it in a temporary place and
-edit the @file{sdl-config} script so that it gives the
-correct SDL directory when invoked.  Set up the @code{PATH} environment
-variable so that @file{sdl-config} can be launched by
-the QEMU configuration script.
-
-@item Install the MinGW version of zlib and make sure
-@file{zlib.h} and @file{libz.dll.a} are in
-MinGW's default header and linker search paths.
-
-@item
-Configure QEMU for Windows cross compilation:
-@example
-PATH=/usr/i686-pc-mingw32/sys-root/mingw/bin:$PATH ./configure --cross-prefix='i686-pc-mingw32-'
-@end example
-The example assumes @file{sdl-config} is installed under @file{/usr/i686-pc-mingw32/sys-root/mingw/bin} and
-MinGW cross compilation tools have names like @file{i686-pc-mingw32-gcc} and @file{i686-pc-mingw32-strip}.
-We set the @code{PATH} environment variable to ensure the MinGW version of @file{sdl-config} is used and
-use --cross-prefix to specify the name of the cross compiler.
-You can also use --prefix to set the Win32 install path which defaults to @file{c:/Program Files/QEMU}.
-
-Under Fedora Linux, you can run:
-@example
-yum -y install mingw32-gcc mingw32-SDL mingw32-zlib
-@end example
-to get a suitable cross compilation environment.
-
-@item You can install QEMU in the installation directory by typing
-@code{make install}. Don't forget to copy @file{SDL.dll} and @file{zlib1.dll} into the
-installation directory.
-
-@end itemize
-
-Wine can be used to launch the resulting qemu-system-i386.exe
-and all other qemu-system-@var{target}.exe compiled for Win32.
-
-@node Mac OS X
-@section Mac OS X
-
-System Requirements:
-@itemize
-@item Mac OS 10.5 or higher
-@item The clang compiler shipped with Xcode 4.2 or higher,
-or GCC 4.3 or higher
-@end itemize
-
-Additional Requirements (install in order):
-@enumerate
-@item libffi: @uref{https://sourceware.org/libffi/}
-@item gettext: @uref{http://www.gnu.org/software/gettext/}
-@item glib: @uref{http://ftp.gnome.org/pub/GNOME/sources/glib/}
-@item pkg-config: @uref{http://www.freedesktop.org/wiki/Software/pkg-config/}
-@item autoconf: @uref{http://www.gnu.org/software/autoconf/autoconf.html}
-@item automake: @uref{http://www.gnu.org/software/automake/}
-@item pixman: @uref{http://www.pixman.org/}
-@end enumerate
-
-* You may find it easiest to get these from a third-party packager
-such as Homebrew, Macports, or Fink.
-
-After downloading the QEMU source code, double-click it to expand it.
-
-Then configure and make QEMU:
-@example
-./configure
-make
-@end example
-
-If you have a recent version of Mac OS X (OSX 10.7 or better
-with Xcode 4.2 or better) we recommend building QEMU with the
-default compiler provided by Apple, for your version of Mac OS X
-(which will be 'clang'). The configure script will
-automatically pick this.
-
-Note: If after the configure step you see a message like this:
-@example
-ERROR: Your compiler does not support the __thread specifier for
-       Thread-Local Storage (TLS). Please upgrade to a version that does.
-@end example
-you may have to build your own version of gcc from source. Expect that to take
-several hours. More information can be found here:
-@uref{https://gcc.gnu.org/install/} @*
-
-These are some of the third party binaries of gcc available for download:
-@itemize
-@item Homebrew: @uref{http://brew.sh/}
-@item @uref{https://www.litebeam.net/gcc/gcc_472.pkg}
-@item @uref{http://www.macports.org/ports.php?by=name&substr=gcc}
-@end itemize
-
-You can have several versions of GCC on your system. To specify a certain version,
-use the --cc and --cxx options.
-@example
-./configure --cxx=<path of your c++ compiler> --cc=<path of your c compiler> <other options>
-@end example
-
-@node Make targets
-@section Make targets
-
-@table @code
-
-@item make
-@item make all
-Make everything which is typically needed.
-
-@item install
-TODO
-
-@item install-doc
-TODO
-
-@item make clean
-Remove most files which were built during make.
-
-@item make distclean
-Remove everything which was built during make.
-
-@item make dvi
-@item make html
-@item make info
-@item make pdf
-Create documentation in dvi, html, info or pdf format.
-
-@item make cscope
-TODO
-
-@item make defconfig
-(Re-)create some build configuration files.
-User made changes will be overwritten.
-
-@item tar
-@item tarbin
-TODO
-
-@end table
+@include qemu-tech.texi
 
 @node License
 @appendix License
diff --git a/qemu-ga.texi b/qemu-ga.texi
index 0e53bf6b2c..4c7a8fd163 100644
--- a/qemu-ga.texi
+++ b/qemu-ga.texi
@@ -30,7 +30,7 @@ set user's password
 @end itemize
 
 qemu-ga will read a system configuration file on startup (located at
-q@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining
+@file{/etc/qemu/qemu-ga.conf} by default), then parse remaining
 configuration options on the command line. For the same key, the last
 option wins, but the lists accumulate (see below for configuration
 file format).
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 7e95b2da79..f054599a91 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -45,6 +45,12 @@ STEXI
 @item convert [--object @var{objectdef}] [--image-opts] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename}
 ETEXI
 
+DEF("dd", img_dd,
+    "dd [--image-opts] [-f fmt] [-O output_fmt] [bs=block_size] [count=blocks] [skip=blocks] if=input of=output")
+STEXI
+@item dd [--image-opts] [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
+ETEXI
+
 DEF("info", img_info,
     "info [--object objectdef] [--image-opts] [-f fmt] [--output=ofmt] [--backing-chain] filename")
 STEXI
diff --git a/qemu-img.c b/qemu-img.c
index f204d04136..6949b73ca5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -25,7 +25,7 @@
 #include "qemu-version.h"
 #include "qapi/error.h"
 #include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/cutils.h"
@@ -44,7 +44,7 @@
 #include <getopt.h>
 
 #define QEMU_IMG_VERSION "qemu-img version " QEMU_VERSION QEMU_PKGVERSION \
-                          ", " QEMU_COPYRIGHT "\n"
+                          "\n" QEMU_COPYRIGHT "\n"
 
 typedef struct img_cmd_t {
     const char *name;
@@ -166,7 +166,15 @@ static void QEMU_NORETURN help(void)
            "Parameters to compare subcommand:\n"
            "  '-f' first image format\n"
            "  '-F' second image format\n"
-           "  '-s' run in Strict mode - fail on different image size or sector allocation\n";
+           "  '-s' run in Strict mode - fail on different image size or sector allocation\n"
+           "\n"
+           "Parameters to dd subcommand:\n"
+           "  'bs=BYTES' read and write up to BYTES bytes at a time "
+           "(default: 512)\n"
+           "  'count=N' copy only N input blocks\n"
+           "  'if=FILE' read from FILE\n"
+           "  'of=FILE' write to FILE\n"
+           "  'skip=N' skip N bs-sized blocks at the start of input\n";
 
     printf("%s\nSupported formats:", help_msg);
     bdrv_iterate_format(format_print, NULL);
@@ -492,7 +500,7 @@ static void dump_json_image_check(ImageCheck *check, bool quiet)
 {
     QString *str;
     QObject *obj;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_ImageCheck(v, NULL, &check, &error_abort);
     visit_complete(v, &obj);
@@ -787,6 +795,7 @@ static void run_block_job(BlockJob *job, Error **errp)
 {
     AioContext *aio_context = blk_get_aio_context(job->blk);
 
+    aio_context_acquire(aio_context);
     do {
         aio_poll(aio_context, true);
         qemu_progress_print(job->len ?
@@ -794,6 +803,7 @@ static void run_block_job(BlockJob *job, Error **errp)
     } while (!job->ready);
 
     block_job_complete_sync(job, errp);
+    aio_context_release(aio_context);
 
     /* A block job may finish instantaneously without publishing any progress,
      * so just signal completion here */
@@ -811,6 +821,7 @@ static int img_commit(int argc, char **argv)
     Error *local_err = NULL;
     CommonBlockJobCBInfo cbi;
     bool image_opts = false;
+    AioContext *aio_context;
 
     fmt = NULL;
     cache = BDRV_DEFAULT_CACHE;
@@ -920,8 +931,12 @@ static int img_commit(int argc, char **argv)
         .bs   = bs,
     };
 
-    commit_active_start("commit", bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
-                        common_block_job_cb, &cbi, &local_err);
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+    commit_active_start("commit", bs, base_bs, BLOCK_JOB_DEFAULT, 0,
+                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
+                        &local_err, false);
+    aio_context_release(aio_context);
     if (local_err) {
         goto done;
     }
@@ -1590,7 +1605,9 @@ static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
                     break;
                 }
 
-                ret = blk_write_compressed(s->target, sector_num, buf, n);
+                ret = blk_pwrite_compressed(s->target,
+                                            sector_num << BDRV_SECTOR_BITS,
+                                            buf, n << BDRV_SECTOR_BITS);
                 if (ret < 0) {
                     return ret;
                 }
@@ -1727,7 +1744,7 @@ static int convert_do_copy(ImgConvertState *s)
 
     if (s->compressed) {
         /* signal EOF to align */
-        ret = blk_write_compressed(s->target, 0, NULL, 0);
+        ret = blk_pwrite_compressed(s->target, 0, NULL, 0);
         if (ret < 0) {
             goto fail;
         }
@@ -2032,7 +2049,7 @@ static int img_convert(int argc, char **argv)
         const char *preallocation =
             qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
 
-        if (!drv->bdrv_write_compressed) {
+        if (!drv->bdrv_co_pwritev_compressed) {
             error_report("Compression not supported for this file format");
             ret = -1;
             goto out;
@@ -2183,7 +2200,7 @@ static void dump_json_image_info_list(ImageInfoList *list)
 {
     QString *str;
     QObject *obj;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_ImageInfoList(v, NULL, &list, &error_abort);
     visit_complete(v, &obj);
@@ -2199,7 +2216,7 @@ static void dump_json_image_info(ImageInfo *info)
 {
     QString *str;
     QObject *obj;
-    Visitor *v = qmp_output_visitor_new(&obj);
+    Visitor *v = qobject_output_visitor_new(&obj);
 
     visit_type_ImageInfo(v, NULL, &info, &error_abort);
     visit_complete(v, &obj);
@@ -2946,6 +2963,7 @@ static int img_rebase(int argc, char **argv)
             error_reportf_err(local_err,
                               "Could not open old backing file '%s': ",
                               backing_name);
+            ret = -1;
             goto out;
         }
 
@@ -2963,6 +2981,7 @@ static int img_rebase(int argc, char **argv)
                 error_reportf_err(local_err,
                                   "Could not open new backing file '%s': ",
                                   out_baseimg);
+                ret = -1;
                 goto out;
             }
         }
@@ -3794,6 +3813,339 @@ static int img_bench(int argc, char **argv)
     return 0;
 }
 
+#define C_BS      01
+#define C_COUNT   02
+#define C_IF      04
+#define C_OF      010
+#define C_SKIP    020
+
+struct DdInfo {
+    unsigned int flags;
+    int64_t count;
+};
+
+struct DdIo {
+    int bsz;    /* Block size */
+    char *filename;
+    uint8_t *buf;
+    int64_t offset;
+};
+
+struct DdOpts {
+    const char *name;
+    int (*f)(const char *, struct DdIo *, struct DdIo *, struct DdInfo *);
+    unsigned int flag;
+};
+
+static int img_dd_bs(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    char *end;
+    int64_t res;
+
+    res = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+
+    if (res <= 0 || res > INT_MAX || *end) {
+        error_report("invalid number: '%s'", arg);
+        return 1;
+    }
+    in->bsz = out->bsz = res;
+
+    return 0;
+}
+
+static int img_dd_count(const char *arg,
+                        struct DdIo *in, struct DdIo *out,
+                        struct DdInfo *dd)
+{
+    char *end;
+
+    dd->count = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+
+    if (dd->count < 0 || *end) {
+        error_report("invalid number: '%s'", arg);
+        return 1;
+    }
+
+    return 0;
+}
+
+static int img_dd_if(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    in->filename = g_strdup(arg);
+
+    return 0;
+}
+
+static int img_dd_of(const char *arg,
+                     struct DdIo *in, struct DdIo *out,
+                     struct DdInfo *dd)
+{
+    out->filename = g_strdup(arg);
+
+    return 0;
+}
+
+static int img_dd_skip(const char *arg,
+                       struct DdIo *in, struct DdIo *out,
+                       struct DdInfo *dd)
+{
+    char *end;
+
+    in->offset = qemu_strtosz_suffix(arg, &end, QEMU_STRTOSZ_DEFSUFFIX_B);
+
+    if (in->offset < 0 || *end) {
+        error_report("invalid number: '%s'", arg);
+        return 1;
+    }
+
+    return 0;
+}
+
+static int img_dd(int argc, char **argv)
+{
+    int ret = 0;
+    char *arg = NULL;
+    char *tmp;
+    BlockDriver *drv = NULL, *proto_drv = NULL;
+    BlockBackend *blk1 = NULL, *blk2 = NULL;
+    QemuOpts *opts = NULL;
+    QemuOptsList *create_opts = NULL;
+    Error *local_err = NULL;
+    bool image_opts = false;
+    int c, i;
+    const char *out_fmt = "raw";
+    const char *fmt = NULL;
+    int64_t size = 0;
+    int64_t block_count = 0, out_pos, in_pos;
+    struct DdInfo dd = {
+        .flags = 0,
+        .count = 0,
+    };
+    struct DdIo in = {
+        .bsz = 512, /* Block size is by default 512 bytes */
+        .filename = NULL,
+        .buf = NULL,
+        .offset = 0
+    };
+    struct DdIo out = {
+        .bsz = 512,
+        .filename = NULL,
+        .buf = NULL,
+        .offset = 0
+    };
+
+    const struct DdOpts options[] = {
+        { "bs", img_dd_bs, C_BS },
+        { "count", img_dd_count, C_COUNT },
+        { "if", img_dd_if, C_IF },
+        { "of", img_dd_of, C_OF },
+        { "skip", img_dd_skip, C_SKIP },
+        { NULL, NULL, 0 }
+    };
+    const struct option long_options[] = {
+        { "help", no_argument, 0, 'h'},
+        { "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
+        { 0, 0, 0, 0 }
+    };
+
+    while ((c = getopt_long(argc, argv, "hf:O:", long_options, NULL))) {
+        if (c == EOF) {
+            break;
+        }
+        switch (c) {
+        case 'O':
+            out_fmt = optarg;
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case '?':
+            error_report("Try 'qemu-img --help' for more information.");
+            ret = -1;
+            goto out;
+        case 'h':
+            help();
+            break;
+        case OPTION_IMAGE_OPTS:
+            image_opts = true;
+            break;
+        }
+    }
+
+    for (i = optind; i < argc; i++) {
+        int j;
+        arg = g_strdup(argv[i]);
+
+        tmp = strchr(arg, '=');
+        if (tmp == NULL) {
+            error_report("unrecognized operand %s", arg);
+            ret = -1;
+            goto out;
+        }
+
+        *tmp++ = '\0';
+
+        for (j = 0; options[j].name != NULL; j++) {
+            if (!strcmp(arg, options[j].name)) {
+                break;
+            }
+        }
+        if (options[j].name == NULL) {
+            error_report("unrecognized operand %s", arg);
+            ret = -1;
+            goto out;
+        }
+
+        if (options[j].f(tmp, &in, &out, &dd) != 0) {
+            ret = -1;
+            goto out;
+        }
+        dd.flags |= options[j].flag;
+        g_free(arg);
+        arg = NULL;
+    }
+
+    if (!(dd.flags & C_IF && dd.flags & C_OF)) {
+        error_report("Must specify both input and output files");
+        ret = -1;
+        goto out;
+    }
+    blk1 = img_open(image_opts, in.filename, fmt, 0, false, false);
+
+    if (!blk1) {
+        ret = -1;
+        goto out;
+    }
+
+    drv = bdrv_find_format(out_fmt);
+    if (!drv) {
+        error_report("Unknown file format");
+        ret = -1;
+        goto out;
+    }
+    proto_drv = bdrv_find_protocol(out.filename, true, &local_err);
+
+    if (!proto_drv) {
+        error_report_err(local_err);
+        ret = -1;
+        goto out;
+    }
+    if (!drv->create_opts) {
+        error_report("Format driver '%s' does not support image creation",
+                     drv->format_name);
+        ret = -1;
+        goto out;
+    }
+    if (!proto_drv->create_opts) {
+        error_report("Protocol driver '%s' does not support image creation",
+                     proto_drv->format_name);
+        ret = -1;
+        goto out;
+    }
+    create_opts = qemu_opts_append(create_opts, drv->create_opts);
+    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
+
+    opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
+
+    size = blk_getlength(blk1);
+    if (size < 0) {
+        error_report("Failed to get size for '%s'", in.filename);
+        ret = -1;
+        goto out;
+    }
+
+    if (dd.flags & C_COUNT && dd.count <= INT64_MAX / in.bsz &&
+        dd.count * in.bsz < size) {
+        size = dd.count * in.bsz;
+    }
+
+    /* Overflow means the specified offset is beyond input image's size */
+    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
+                              size < in.bsz * in.offset)) {
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE, 0, &error_abort);
+    } else {
+        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
+                            size - in.bsz * in.offset, &error_abort);
+    }
+
+    ret = bdrv_create(drv, out.filename, opts, &local_err);
+    if (ret < 0) {
+        error_reportf_err(local_err,
+                          "%s: error while creating output image: ",
+                          out.filename);
+        ret = -1;
+        goto out;
+    }
+
+    blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
+                    false, false);
+
+    if (!blk2) {
+        ret = -1;
+        goto out;
+    }
+
+    if (dd.flags & C_SKIP && (in.offset > INT64_MAX / in.bsz ||
+                              size < in.offset * in.bsz)) {
+        /* We give a warning if the skip option is bigger than the input
+         * size and create an empty output disk image (i.e. like dd(1)).
+         */
+        error_report("%s: cannot skip to specified offset", in.filename);
+        in_pos = size;
+    } else {
+        in_pos = in.offset * in.bsz;
+    }
+
+    in.buf = g_new(uint8_t, in.bsz);
+
+    for (out_pos = 0; in_pos < size; block_count++) {
+        int in_ret, out_ret;
+
+        if (in_pos + in.bsz > size) {
+            in_ret = blk_pread(blk1, in_pos, in.buf, size - in_pos);
+        } else {
+            in_ret = blk_pread(blk1, in_pos, in.buf, in.bsz);
+        }
+        if (in_ret < 0) {
+            error_report("error while reading from input image file: %s",
+                         strerror(-in_ret));
+            ret = -1;
+            goto out;
+        }
+        in_pos += in_ret;
+
+        out_ret = blk_pwrite(blk2, out_pos, in.buf, in_ret, 0);
+
+        if (out_ret < 0) {
+            error_report("error while writing to output image file: %s",
+                         strerror(-out_ret));
+            ret = -1;
+            goto out;
+        }
+        out_pos += out_ret;
+    }
+
+out:
+    g_free(arg);
+    qemu_opts_del(opts);
+    qemu_opts_free(create_opts);
+    blk_unref(blk1);
+    blk_unref(blk2);
+    g_free(in.filename);
+    g_free(out.filename);
+    g_free(in.buf);
+    g_free(out.buf);
+
+    if (ret) {
+        return 1;
+    }
+    return 0;
+}
+
 
 static const img_cmd_t img_cmds[] = {
 #define DEF(option, callback, arg_string)        \
@@ -3822,6 +4174,7 @@ int main(int argc, char **argv)
     signal(SIGPIPE, SIG_IGN);
 #endif
 
+    module_call_init(MODULE_INIT_TRACE);
     error_set_progname(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
diff --git a/qemu-img.texi b/qemu-img.texi
index 449a19c710..174aae38b7 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -139,6 +139,22 @@ Parameters to convert subcommand:
 Skip the creation of the target volume
 @end table
 
+Parameters to dd subcommand:
+
+@table @option
+
+@item bs=@var{block_size}
+defines the block size
+@item count=@var{blocks}
+sets the number of input blocks to copy
+@item if=@var{input}
+sets the input file
+@item of=@var{output}
+sets the output file
+@item skip=@var{blocks}
+sets the number of input blocks to skip
+@end table
+
 Command description:
 
 @table @option
@@ -310,6 +326,17 @@ skipped. This is useful for formats such as @code{rbd} if the target
 volume has already been created with site specific options that cannot
 be supplied through qemu-img.
 
+@item dd [-f @var{fmt}] [-O @var{output_fmt}] [bs=@var{block_size}] [count=@var{blocks}] [skip=@var{blocks}] if=@var{input} of=@var{output}
+
+Dd copies from @var{input} file to @var{output} file converting it from
+@var{fmt} format to @var{output_fmt} format.
+
+The data is by default read and written using blocks of 512 bytes but can be
+modified by specifying @var{block_size}. If count=@var{blocks} is specified
+dd will stop reading input after reading @var{blocks} input blocks.
+
+The size syntax is similar to dd(1)'s size syntax.
+
 @item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
 
 Give information about the disk image @var{filename}. Use it in
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 25954f5634..95bcde1d88 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -18,7 +18,6 @@
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/timer.h"
-#include "sysemu/block-backend.h"
 #include "qemu/cutils.h"
 
 #define CMD_NOFILE_OK   0x01
@@ -504,7 +503,7 @@ static int do_write_compressed(BlockBackend *blk, char *buf, int64_t offset,
         return -ERANGE;
     }
 
-    ret = blk_write_compressed(blk, offset >> 9, (uint8_t *)buf, count >> 9);
+    ret = blk_pwrite_compressed(blk, offset, buf, count);
     if (ret < 0) {
         return ret;
     }
@@ -1956,7 +1955,7 @@ static int reopen_f(BlockBackend *blk, int argc, char **argv)
     qemu_opts_reset(&reopen_opts);
 
     brq = bdrv_reopen_queue(NULL, bs, opts, flags);
-    bdrv_reopen_multiple(brq, &local_err);
+    bdrv_reopen_multiple(bdrv_get_aio_context(bs), brq, &local_err);
     if (local_err) {
         error_report_err(local_err);
     } else {
@@ -2216,6 +2215,7 @@ static const cmdinfo_t help_cmd = {
 
 bool qemuio_command(BlockBackend *blk, const char *cmd)
 {
+    AioContext *ctx;
     char *input;
     const cmdinfo_t *ct;
     char **v;
@@ -2227,7 +2227,10 @@ bool qemuio_command(BlockBackend *blk, const char *cmd)
     if (c) {
         ct = find_command(v[0]);
         if (ct) {
+            ctx = blk ? blk_get_aio_context(blk) : qemu_get_aio_context();
+            aio_context_acquire(ctx);
             done = command(blk, ct, c, v);
+            aio_context_release(ctx);
         } else {
             fprintf(stderr, "command \"%s\" not found\n", v[0]);
         }
diff --git a/qemu-io.c b/qemu-io.c
index db129eac5f..23a229f880 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -467,6 +467,7 @@ int main(int argc, char **argv)
     signal(SIGPIPE, SIG_IGN);
 #endif
 
+    module_call_init(MODULE_INIT_TRACE);
     progname = basename(argv[0]);
     qemu_init_exec_dir(argv[0]);
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index e3571c2025..c734f627b4 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -48,6 +48,7 @@
 #define QEMU_NBD_OPT_OBJECT        260
 #define QEMU_NBD_OPT_TLSCREDS      261
 #define QEMU_NBD_OPT_IMAGE_OPTS    262
+#define QEMU_NBD_OPT_FORK          263
 
 #define MBR_SIZE 512
 
@@ -82,6 +83,7 @@ static void usage(const char *name)
 "  -t, --persistent          don't exit on the last connection\n"
 "  -v, --verbose             display extra debugging information\n"
 "  -x, --export-name=NAME    expose export by name\n"
+"  -D, --description=TEXT    with -x, also export a human-readable description\n"
 "\n"
 "Exposing part of the image:\n"
 "  -o, --offset=OFFSET       offset into the image\n"
@@ -92,6 +94,8 @@ static void usage(const char *name)
 "                            passwords and/or encryption keys\n"
 "  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
 "                            specify tracing options\n"
+"  --fork                    fork off the server process and exit the parent\n"
+"                            once the server is running\n"
 #ifdef __linux__
 "Kernel NBD client support:\n"
 "  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
@@ -474,7 +478,7 @@ int main(int argc, char **argv)
     off_t fd_size;
     QemuOpts *sn_opts = NULL;
     const char *sn_id_or_name = NULL;
-    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:";
+    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:";
     struct option lopt[] = {
         { "help", no_argument, NULL, 'h' },
         { "version", no_argument, NULL, 'V' },
@@ -500,9 +504,11 @@ int main(int argc, char **argv)
         { "verbose", no_argument, NULL, 'v' },
         { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
         { "export-name", required_argument, NULL, 'x' },
+        { "description", required_argument, NULL, 'D' },
         { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
         { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
         { "trace", required_argument, NULL, 'T' },
+        { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
         { NULL, 0, NULL, 0 }
     };
     int ch;
@@ -520,10 +526,13 @@ int main(int argc, char **argv)
     BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
     QDict *options = NULL;
     const char *export_name = NULL;
+    const char *export_description = NULL;
     const char *tlscredsid = NULL;
     bool imageOpts = false;
     bool writethrough = true;
     char *trace_file = NULL;
+    bool fork_process = false;
+    int old_stderr = -1;
 
     /* The client thread uses SIGTERM to interrupt the server.  A signal
      * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -533,6 +542,7 @@ int main(int argc, char **argv)
     sa_sigterm.sa_handler = termsig_handler;
     sigaction(SIGTERM, &sa_sigterm, NULL);
 
+    module_call_init(MODULE_INIT_TRACE);
     qcrypto_init(&error_fatal);
 
     module_call_init(MODULE_INIT_QOM);
@@ -682,6 +692,9 @@ int main(int argc, char **argv)
         case 'x':
             export_name = optarg;
             break;
+        case 'D':
+            export_description = optarg;
+            break;
         case 'v':
             verbose = 1;
             break;
@@ -714,6 +727,9 @@ int main(int argc, char **argv)
             g_free(trace_file);
             trace_file = trace_opt_parse(optarg);
             break;
+        case QEMU_NBD_OPT_FORK:
+            fork_process = true;
+            break;
         }
     }
 
@@ -773,7 +789,7 @@ int main(int argc, char **argv)
         return 0;
     }
 
-    if (device && !verbose) {
+    if ((device && !verbose) || fork_process) {
         int stderr_fd[2];
         pid_t pid;
         int ret;
@@ -796,6 +812,7 @@ int main(int argc, char **argv)
             ret = qemu_daemon(1, 0);
 
             /* Temporarily redirect stderr to the parent's pipe...  */
+            old_stderr = dup(STDERR_FILENO);
             dup2(stderr_fd[1], STDERR_FILENO);
             if (ret < 0) {
                 error_report("Failed to daemonize: %s", strerror(errno));
@@ -901,6 +918,14 @@ int main(int argc, char **argv)
         exit(EXIT_FAILURE);
     }
 
+    if (dev_offset >= fd_size) {
+        error_report("Offset (%lld) has to be smaller than the image size "
+                     "(%lld)",
+                     (long long int)dev_offset, (long long int)fd_size);
+        exit(EXIT_FAILURE);
+    }
+    fd_size -= dev_offset;
+
     if (partition != -1) {
         ret = find_partition(blk, partition, &dev_offset, &fd_size);
         if (ret < 0) {
@@ -910,15 +935,19 @@ int main(int argc, char **argv)
         }
     }
 
-    exp = nbd_export_new(blk, dev_offset, fd_size, nbdflags, nbd_export_closed,
-                         &local_err);
+    exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed,
+                         writethrough, NULL, &local_err);
     if (!exp) {
         error_report_err(local_err);
         exit(EXIT_FAILURE);
     }
     if (export_name) {
         nbd_export_set_name(exp, export_name);
+        nbd_export_set_description(exp, export_description);
         newproto = true;
+    } else if (export_description) {
+        error_report("Export description requires an export name");
+        exit(EXIT_FAILURE);
     }
 
     server_ioc = qio_channel_socket_new();
@@ -951,6 +980,11 @@ int main(int argc, char **argv)
         exit(EXIT_FAILURE);
     }
 
+    if (fork_process) {
+        dup2(old_stderr, STDERR_FILENO);
+        close(old_stderr);
+    }
+
     state = RUNNING;
     do {
         main_loop_wait(false);
diff --git a/qemu-nbd.texi b/qemu-nbd.texi
index 91ebf04b5b..9a84e81eed 100644
--- a/qemu-nbd.texi
+++ b/qemu-nbd.texi
@@ -79,13 +79,18 @@ Disconnect the device @var{dev}
 Allow up to @var{num} clients to share the device (default @samp{1})
 @item -t, --persistent
 Don't exit on the last connection
-@item -x NAME, --export-name=NAME
+@item -x, --export-name=@var{name}
 Set the NBD volume export name. This switches the server to use
 the new style NBD protocol negotiation
+@item -D, --description=@var{description}
+Set the NBD volume export description, as a human-readable
+string. Requires the use of @option{-x}
 @item --tls-creds=ID
 Enable mandatory TLS encryption for the server by setting the ID
 of the TLS credentials object previously created with the --object
 option.
+@item --fork
+Fork off the server process and exit the parent once the server is running.
 @item -v, --verbose
 Display extra debugging information
 @item -h, --help
diff --git a/qemu-options.hx b/qemu-options.hx
index c4b4e80b62..23e4d8bd20 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -175,7 +175,7 @@ DEF("set", HAS_ARG, QEMU_OPTION_set,
 STEXI
 @item -set @var{group}.@var{id}.@var{arg}=@var{value}
 @findex -set
-Set parameter @var{arg} for item @var{id} of type @var{group}\n"
+Set parameter @var{arg} for item @var{id} of type @var{group}
 ETEXI
 
 DEF("global", HAS_ARG, QEMU_OPTION_global,
@@ -306,7 +306,7 @@ STEXI
 @findex -k
 Use keyboard layout @var{language} (for example @code{fr} for
 French). This option is only needed where it is not easy to get raw PC
-keycodes (e.g. on Macs, with some X11 servers or with a VNC
+keycodes (e.g. on Macs, with some X11 servers or with a VNC or curses
 display). You don't normally need to use it on PC/Linux or PC/Windows
 hosts.
 
@@ -985,13 +985,14 @@ DEF("nographic", 0, QEMU_OPTION_nographic,
 STEXI
 @item -nographic
 @findex -nographic
-Normally, QEMU uses SDL to display the VGA output. With this option,
-you can totally disable graphical output so that QEMU is a simple
-command line application. The emulated serial port is redirected on
-the console and muxed with the monitor (unless redirected elsewhere
-explicitly). Therefore, you can still use QEMU to debug a Linux kernel
-with a serial console.  Use @key{C-a h} for help on switching between
-the console and monitor.
+Normally, if QEMU is compiled with graphical window support, it displays
+output such as guest graphics, guest console, and the QEMU monitor in a
+window. With this option, you can totally disable graphical output so
+that QEMU is a simple command line application. The emulated serial port
+is redirected on the console and muxed with the monitor (unless
+redirected elsewhere explicitly). Therefore, you can still use QEMU to
+debug a Linux kernel with a serial console. Use @key{C-a h} for help on
+switching between the console and monitor.
 ETEXI
 
 DEF("curses", 0, QEMU_OPTION_curses,
@@ -1000,9 +1001,11 @@ DEF("curses", 0, QEMU_OPTION_curses,
 STEXI
 @item -curses
 @findex -curses
-Normally, QEMU uses SDL to display the VGA output.  With this option,
-QEMU can display the VGA output when in text mode using a
-curses/ncurses interface.  Nothing is displayed in graphical mode.
+Normally, if QEMU is compiled with graphical window support, it displays
+output such as guest graphics, guest console, and the QEMU monitor in a
+window. With this option, QEMU can display the VGA output when in text
+mode using a curses/ncurses interface. Nothing is displayed in graphical
+mode.
 ETEXI
 
 DEF("no-frame", 0, QEMU_OPTION_no_frame,
@@ -1148,7 +1151,7 @@ Configure wan image compression (lossy for slow links).
 Default is auto.
 
 @item streaming-video=[off|all|filter]
-Configure video stream detection.  Default is filter.
+Configure video stream detection.  Default is off.
 
 @item agent-mouse=[on|off]
 Enable/disable passing mouse events via vdagent.  Default is on.
@@ -1246,13 +1249,14 @@ DEF("vnc", HAS_ARG, QEMU_OPTION_vnc ,
 STEXI
 @item -vnc @var{display}[,@var{option}[,@var{option}[,...]]]
 @findex -vnc
-Normally, QEMU uses SDL to display the VGA output.  With this option,
-you can have QEMU listen on VNC display @var{display} and redirect the VGA
-display over the VNC session.  It is very useful to enable the usb
-tablet device when using this option (option @option{-usbdevice
-tablet}). When using the VNC display, you must use the @option{-k}
-parameter to set the keyboard layout if you are not using en-us. Valid
-syntax for the @var{display} is
+Normally, if QEMU is compiled with graphical window support, it displays
+output such as guest graphics, guest console, and the QEMU monitor in a
+window. With this option, you can have QEMU listen on VNC display
+@var{display} and redirect the VGA display over the VNC session. It is
+very useful to enable the usb tablet device when using this option
+(option @option{-usbdevice tablet}). When using the VNC display, you
+must use the @option{-k} parameter to set the keyboard layout if you are
+not using en-us. Valid syntax for the @var{display} is
 
 @table @option
 
@@ -1597,10 +1601,11 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                configure a host TAP network backend with ID 'str'\n"
 #else
     "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
-    "         [,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
+    "         [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
     "         [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
     "         [,poll-us=n]\n"
     "                configure a host TAP network backend with ID 'str'\n"
+    "                connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n"
     "                use network scripts 'file' (default=" DEFAULT_NETWORK_SCRIPT ")\n"
     "                to configure it and 'dfile' (default=" DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
     "                to deconfigure it\n"
@@ -1887,8 +1892,8 @@ processed and applied to -net user. Mixing them with the new configuration
 syntax gives undefined results. Their use for new applications is discouraged
 as they will be removed from future versions.
 
-@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
-@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
+@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
+@itemx -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,br=@var{bridge}][,helper=@var{helper}]
 Connect the host TAP network interface @var{name} to VLAN @var{n}.
 
 Use the network script @var{file} to configure it and the network script
@@ -1899,8 +1904,9 @@ automatically provides one. The default network configure script is
 to disable script execution.
 
 If running QEMU as an unprivileged user, use the network helper
-@var{helper} to configure the TAP interface. The default network
-helper executable is @file{/path/to/qemu-bridge-helper}.
+@var{helper} to configure the TAP interface and attach it to the bridge.
+The default network helper executable is @file{/path/to/qemu-bridge-helper}
+and the default bridge device is @file{br0}.
 
 @option{fd}=@var{h} can be used to specify the handle of an already
 opened host TAP interface.
@@ -2151,6 +2157,7 @@ The general form of a character device option is:
 ETEXI
 
 DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
+    "-chardev help\n"
     "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
     "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n"
     "         [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n"
@@ -2216,6 +2223,8 @@ Backend is one of:
 @option{spiceport}.
 The specific backend will determine the applicable options.
 
+Use "-chardev help" to print all available chardev backend types.
+
 All devices must have an id, which can be any string up to 127 characters long.
 It is used to uniquely identify this device in other command line directives.
 
@@ -2233,7 +2242,7 @@ two serial ports and the QEMU monitor:
 
 @example
 -chardev stdio,mux=on,id=char0 \
--mon chardev=char0,mode=readline,default \
+-mon chardev=char0,mode=readline \
 -serial chardev:char0 \
 -serial chardev:char0
 @end example
@@ -2244,7 +2253,7 @@ multiplexed between the QEMU monitor and a parallel port:
 
 @example
 -chardev stdio,mux=on,id=char0 \
--mon chardev=char0,mode=readline,default \
+-mon chardev=char0,mode=readline \
 -parallel chardev:char0 \
 -chardev tcp,...,mux=on,id=char1 \
 -serial chardev:char1 \
@@ -2368,7 +2377,7 @@ console with the given dimensions.
 @item -chardev ringbuf ,id=@var{id} [,size=@var{size}]
 
 Create a ring buffer with fixed size @option{size}.
-@var{size} must be a power of two, and defaults to @code{64K}).
+@var{size} must be a power of two and defaults to @code{64K}.
 
 @item -chardev file ,id=@var{id} ,path=@var{path}
 
@@ -2589,19 +2598,40 @@ TCP, Unix Domain Sockets and RDMA transport protocols.
 
 Syntax for specifying a VM disk image on GlusterFS volume is
 @example
-gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+
+URI:
+gluster[+type]://[host[:port]]/volume/path[?socket=...][,debug=N][,logfile=...]
+
+JSON:
+'json:@{"driver":"qcow2","file":@{"driver":"gluster","volume":"testvol","path":"a.img","debug":N,"logfile":"...",
+@                                 "server":[@{"type":"tcp","host":"...","port":"..."@},
+@                                           @{"type":"unix","socket":"..."@}]@}@}'
 @end example
 
 
 Example
 @example
-qemu-system-x86_64 --drive file=gluster://192.0.2.1/testvol/a.img
+URI:
+qemu-system-x86_64 --drive file=gluster://192.0.2.1/testvol/a.img,
+@                               file.debug=9,file.logfile=/var/log/qemu-gluster.log
+
+JSON:
+qemu-system-x86_64 'json:@{"driver":"qcow2",
+@                          "file":@{"driver":"gluster",
+@                                   "volume":"testvol","path":"a.img",
+@                                   "debug":9,"logfile":"/var/log/qemu-gluster.log",
+@                                   "server":[@{"type":"tcp","host":"1.2.3.4","port":24007@},
+@                                             @{"type":"unix","socket":"/var/run/glusterd.socket"@}]@}@}'
+qemu-system-x86_64 -drive driver=qcow2,file.driver=gluster,file.volume=testvol,file.path=/path/a.img,
+@                                      file.debug=9,file.logfile=/var/log/qemu-gluster.log,
+@                                      file.server.0.type=tcp,file.server.0.host=1.2.3.4,file.server.0.port=24007,
+@                                      file.server.1.type=unix,file.server.1.socket=/var/run/glusterd.socket
 @end example
 
 See also @url{http://www.gluster.org}.
 
-@item HTTP/HTTPS/FTP/FTPS/TFTP
-QEMU supports read-only access to files accessed over http(s), ftp(s) and tftp.
+@item HTTP/HTTPS/FTP/FTPS
+QEMU supports read-only access to files accessed over http(s) and ftp(s).
 
 Syntax using a single filename:
 @example
@@ -2611,7 +2641,7 @@ Syntax using a single filename:
 where:
 @table @option
 @item protocol
-'http', 'https', 'ftp', 'ftps', or 'tftp'.
+'http', 'https', 'ftp', or 'ftps'.
 
 @item username
 Optional username for authentication to the remote server.
@@ -3157,9 +3187,9 @@ Like -qmp but uses pretty JSON formatting.
 ETEXI
 
 DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
-    "-mon [chardev=]name[,mode=readline|control][,default]\n", QEMU_ARCH_ALL)
+    "-mon [chardev=]name[,mode=readline|control]\n", QEMU_ARCH_ALL)
 STEXI
-@item -mon [chardev=]name[,mode=readline|control][,default]
+@item -mon [chardev=]name[,mode=readline|control]
 @findex -mon
 Setup monitor on chardev @var{name}.
 ETEXI
@@ -3947,13 +3977,83 @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
 be the same. we can just use indev or outdev, but at least one of indev or outdev
 need to be specified.
 
-@item -object filter-dump,id=@var{id},netdev=@var{dev},file=@var{filename}][,maxlen=@var{len}]
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},rewriter-mode=@var{mode}[,queue=@var{all|rx|tx}]
+
+Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
+secondary from primary to keep secondary tcp connection,and rewrite
+tcp packet to primary from secondary make tcp packet can be handled by
+client.
+
+usage:
+colo secondary:
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+-object filter-rewriter,id=rew0,netdev=hn0,queue=all
+
+@item -object filter-dump,id=@var{id},netdev=@var{dev}[,file=@var{filename}][,maxlen=@var{len}]
 
 Dump the network traffic on netdev @var{dev} to the file specified by
 @var{filename}. At most @var{len} bytes (64k by default) per packet are stored.
 The file format is libpcap, so it can be analyzed with tools such as tcpdump
 or Wireshark.
 
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
+outdev=@var{chardevid}
+
+Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
+secondary packet. If the packets are same, we will output primary
+packet to outdev@var{chardevid}, else we will notify colo-frame
+do checkpoint and send primary packet to outdev@var{chardevid}.
+
+we must use it with the help of filter-mirror and filter-redirector.
+
+@example
+
+primary:
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0
+
+secondary:
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
+-chardev socket,id=red0,host=3.3.3.3,port=9003
+-chardev socket,id=red1,host=3.3.3.3,port=9004
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
+
+@end example
+
+If you want to know the detail of above command line, you can read
+the colo-compare git log.
+
+@item -object cryptodev-backend-builtin,id=@var{id}[,queues=@var{queues}]
+
+Creates a cryptodev backend which executes crypto opreation from
+the QEMU cipher APIS. The @var{id} parameter is
+a unique ID that will be used to reference this cryptodev backend from
+the @option{virtio-crypto} device. The @var{queues} parameter is optional,
+which specify the queue number of cryptodev backend, the default of
+@var{queues} is 1.
+
+@example
+
+ # qemu-system-x86_64 \
+   [...] \
+       -object cryptodev-backend-builtin,id=cryptodev0 \
+       -device virtio-crypto-pci,id=crypto0,cryptodev=cryptodev0 \
+   [...]
+@end example
+
 @item -object secret,id=@var{id},data=@var{string},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
 @item -object secret,id=@var{id},file=@var{filename},format=@var{raw|base64}[,keyid=@var{secretid},iv=@var{string}]
 
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index cb569dc058..df75d9c471 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -65,6 +65,7 @@ static const struct QemuSeccompSyscall seccomp_whitelist[] = {
     { SCMP_SYS(prctl), 245 },
     { SCMP_SYS(signalfd), 245 },
     { SCMP_SYS(getrlimit), 245 },
+    { SCMP_SYS(getrusage), 245 },
     { SCMP_SYS(set_tid_address), 245 },
     { SCMP_SYS(statfs), 245 },
     { SCMP_SYS(unlink), 245 },
diff --git a/qemu-tech.texi b/qemu-tech.texi
index bdb2285f4e..52a56ae25e 100644
--- a/qemu-tech.texi
+++ b/qemu-tech.texi
@@ -1,147 +1,27 @@
-\input texinfo @c -*- texinfo -*-
-@c %**start of header
-@setfilename qemu-tech.info
-
-@documentlanguage en
-@documentencoding UTF-8
-
-@settitle QEMU Internals
-@exampleindent 0
-@paragraphindent 0
-@c %**end of header
-
-@ifinfo
-@direntry
-* QEMU Internals: (qemu-tech).   The QEMU Emulator Internals.
-@end direntry
-@end ifinfo
-
-@iftex
-@titlepage
-@sp 7
-@center @titlefont{QEMU Internals}
-@sp 3
-@end titlepage
-@end iftex
-
-@ifnottex
-@node Top
-@top
+@node Implementation notes
+@appendix Implementation notes
 
 @menu
-* Introduction::
-* QEMU Internals::
-* Regression Tests::
-* Index::
+* CPU emulation::
+* Translator Internals::
+* QEMU compared to other emulators::
+* Bibliography::
 @end menu
-@end ifnottex
-
-@contents
 
-@node Introduction
-@chapter Introduction
+@node CPU emulation
+@section CPU emulation
 
 @menu
-* intro_features::         Features
-* intro_x86_emulation::    x86 and x86-64 emulation
-* intro_arm_emulation::    ARM emulation
-* intro_mips_emulation::   MIPS emulation
-* intro_ppc_emulation::    PowerPC emulation
-* intro_sparc_emulation::  Sparc32 and Sparc64 emulation
-* intro_xtensa_emulation:: Xtensa emulation
-* intro_other_emulation::  Other CPU emulation
+* x86::     x86 and x86-64 emulation
+* ARM::     ARM emulation
+* MIPS::    MIPS emulation
+* PPC::     PowerPC emulation
+* SPARC::   Sparc32 and Sparc64 emulation
+* Xtensa::  Xtensa emulation
 @end menu
 
-@node intro_features
-@section Features
-
-QEMU is a FAST! processor emulator using a portable dynamic
-translator.
-
-QEMU has two operating modes:
-
-@itemize @minus
-
-@item
-Full system emulation. In this mode (full platform virtualization),
-QEMU emulates a full system (usually a PC), including a processor and
-various peripherals. It can be used to launch several different
-Operating Systems at once without rebooting the host machine or to
-debug system code.
-
-@item
-User mode emulation. In this mode (application level virtualization),
-QEMU can launch processes compiled for one CPU on another CPU, however
-the Operating Systems must match. This can be used for example to ease
-cross-compilation and cross-debugging.
-@end itemize
-
-As QEMU requires no host kernel driver to run, it is very safe and
-easy to use.
-
-QEMU generic features:
-
-@itemize
-
-@item User space only or full system emulation.
-
-@item Using dynamic translation to native code for reasonable speed.
-
-@item
-Working on x86, x86_64 and PowerPC32/64 hosts. Being tested on ARM,
-HPPA, Sparc32 and Sparc64. Previous versions had some support for
-Alpha and S390 hosts, but TCG (see below) doesn't support those yet.
-
-@item Self-modifying code support.
-
-@item Precise exceptions support.
-
-@item
-Floating point library supporting both full software emulation and
-native host FPU instructions.
-
-@end itemize
-
-QEMU user mode emulation features:
-@itemize
-@item Generic Linux system call converter, including most ioctls.
-
-@item clone() emulation using native CPU clone() to use Linux scheduler for threads.
-
-@item Accurate signal handling by remapping host signals to target signals.
-@end itemize
-
-Linux user emulator (Linux host only) can be used to launch the Wine
-Windows API emulator (@url{http://www.winehq.org}). A BSD user emulator for BSD
-hosts is under development. It would also be possible to develop a
-similar user emulator for Solaris.
-
-QEMU full system emulation features:
-@itemize
-@item
-QEMU uses a full software MMU for maximum portability.
-
-@item
-QEMU can optionally use an in-kernel accelerator, like kvm. The accelerators 
-execute some of the guest code natively, while
-continuing to emulate the rest of the machine.
-
-@item
-Various hardware devices can be emulated and in some cases, host
-devices (e.g. serial and parallel ports, USB, drives) can be used
-transparently by the guest Operating System. Host device passthrough
-can be used for talking to external physical peripherals (e.g. a
-webcam, modem or tape drive).
-
-@item
-Symmetric multiprocessing (SMP) even on a host with a single CPU. On a
-SMP host system, QEMU can use only one CPU fully due to difficulty in
-implementing atomic memory accesses efficiently.
-
-@end itemize
-
-@node intro_x86_emulation
-@section x86 and x86-64 emulation
+@node x86
+@subsection x86 and x86-64 emulation
 
 QEMU x86 target features:
 
@@ -175,8 +55,8 @@ normal use.
 
 @end itemize
 
-@node intro_arm_emulation
-@section ARM emulation
+@node ARM
+@subsection ARM emulation
 
 @itemize
 
@@ -188,8 +68,8 @@ normal use.
 
 @end itemize
 
-@node intro_mips_emulation
-@section MIPS emulation
+@node MIPS
+@subsection MIPS emulation
 
 @itemize
 
@@ -215,8 +95,8 @@ Current QEMU limitations:
 
 @end itemize
 
-@node intro_ppc_emulation
-@section PowerPC emulation
+@node PPC
+@subsection PowerPC emulation
 
 @itemize
 
@@ -227,8 +107,8 @@ FPU and MMU.
 
 @end itemize
 
-@node intro_sparc_emulation
-@section Sparc32 and Sparc64 emulation
+@node SPARC
+@subsection Sparc32 and Sparc64 emulation
 
 @itemize
 
@@ -255,8 +135,8 @@ Current QEMU limitations:
 
 @end itemize
 
-@node intro_xtensa_emulation
-@section Xtensa emulation
+@node Xtensa
+@subsection Xtensa emulation
 
 @itemize
 
@@ -280,96 +160,8 @@ may be created from overlay with minimal amount of hand-written code.
 
 @end itemize
 
-@node intro_other_emulation
-@section Other CPU emulation
-
-In addition to the above, QEMU supports emulation of other CPUs with
-varying levels of success. These are:
-
-@itemize
-
-@item
-Alpha
-@item
-CRIS
-@item
-M68k
-@item
-SH4
-@end itemize
-
-@node QEMU Internals
-@chapter QEMU Internals
-
-@menu
-* QEMU compared to other emulators::
-* Portable dynamic translation::
-* Condition code optimisations::
-* CPU state optimisations::
-* Translation cache::
-* Direct block chaining::
-* Self-modifying code and translated code invalidation::
-* Exception support::
-* MMU emulation::
-* Device emulation::
-* Hardware interrupts::
-* User emulation specific details::
-* Bibliography::
-@end menu
-
-@node QEMU compared to other emulators
-@section QEMU compared to other emulators
-
-Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than
-bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC
-emulation while QEMU can emulate several processors.
-
-Like Valgrind [2], QEMU does user space emulation and dynamic
-translation. Valgrind is mainly a memory debugger while QEMU has no
-support for it (QEMU could be used to detect out of bound memory
-accesses as Valgrind, but it has no support to track uninitialised data
-as Valgrind does). The Valgrind dynamic translator generates better code
-than QEMU (in particular it does register allocation) but it is closely
-tied to an x86 host and target and has no support for precise exceptions
-and system emulation.
-
-EM86 [3] is the closest project to user space QEMU (and QEMU still uses
-some of its code, in particular the ELF file loader). EM86 was limited
-to an alpha host and used a proprietary and slow interpreter (the
-interpreter part of the FX!32 Digital Win32 code translator [4]).
-
-TWIN from Willows Software was a Windows API emulator like Wine. It is less
-accurate than Wine but includes a protected mode x86 interpreter to launch
-x86 Windows executables. Such an approach has greater potential because most
-of the Windows API is executed natively but it is far more difficult to
-develop because all the data structures and function parameters exchanged
-between the API and the x86 code must be converted.
-
-User mode Linux [5] was the only solution before QEMU to launch a
-Linux kernel as a process while not needing any host kernel
-patches. However, user mode Linux requires heavy kernel patches while
-QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is
-slower.
-
-The Plex86 [6] PC virtualizer is done in the same spirit as the now
-obsolete qemu-fast system emulator. It requires a patched Linux kernel
-to work (you cannot launch the same kernel on your PC), but the
-patches are really small. As it is a PC virtualizer (no emulation is
-done except for some privileged instructions), it has the potential of
-being faster than QEMU. The downside is that a complicated (and
-potentially unsafe) host kernel patch is needed.
-
-The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster
-than QEMU (without virtualization), but they all need specific, proprietary
-and potentially unsafe host drivers. Moreover, they are unable to
-provide cycle exact simulation as an emulator can.
-
-VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC
-[12] uses QEMU to simulate a system where some hardware devices are
-developed in SystemC.
-
-@node Portable dynamic translation
-@section Portable dynamic translation
+@node Translator Internals
+@section Translator Internals
 
 QEMU is a dynamic translator. When it first encounters a piece of code,
 it converts it to the host instruction set. Usually dynamic translators
@@ -377,68 +169,26 @@ are very complicated and highly CPU dependent. QEMU uses some tricks
 which make it relatively easily portable and simple while achieving good
 performances.
 
-After the release of version 0.9.1, QEMU switched to a new method of
-generating code, Tiny Code Generator or TCG. TCG relaxes the
-dependency on the exact version of the compiler used. The basic idea
-is to split every target instruction into a couple of RISC-like TCG
-ops (see @code{target-i386/translate.c}). Some optimizations can be
-performed at this stage, including liveness analysis and trivial
-constant expression evaluation. TCG ops are then implemented in the
-host CPU back end, also known as TCG target (see
-@code{tcg/i386/tcg-target.inc.c}). For more information, please take a
-look at @code{tcg/README}.
-
-@node Condition code optimisations
-@section Condition code optimisations
-
-Lazy evaluation of CPU condition codes (@code{EFLAGS} register on x86)
-is important for CPUs where every instruction sets the condition
-codes. It tends to be less important on conventional RISC systems
-where condition codes are only updated when explicitly requested. On
-Sparc64, costly update of both 32 and 64 bit condition codes can be
-avoided with lazy evaluation.
-
-Instead of computing the condition codes after each x86 instruction,
-QEMU just stores one operand (called @code{CC_SRC}), the result
-(called @code{CC_DST}) and the type of operation (called
-@code{CC_OP}). When the condition codes are needed, the condition
-codes can be calculated using this information. In addition, an
-optimized calculation can be performed for some instruction types like
-conditional branches.
-
-@code{CC_OP} is almost never explicitly set in the generated code
-because it is known at translation time.
-
-The lazy condition code evaluation is used on x86, m68k, cris and
-Sparc. ARM uses a simplified variant for the N and Z flags.
-
-@node CPU state optimisations
-@section CPU state optimisations
+QEMU's dynamic translation backend is called TCG, for "Tiny Code
+Generator". For more information, please take a look at @code{tcg/README}.
+
+Some notable features of QEMU's dynamic translator are:
+
+@table @strong
 
+@item CPU state optimisations:
 The target CPUs have many internal states which change the way it
 evaluates instructions. In order to achieve a good speed, the
 translation phase considers that some state information of the virtual
 CPU cannot change in it. The state is recorded in the Translation
 Block (TB). If the state changes (e.g. privilege level), a new TB will
 be generated and the previous TB won't be used anymore until the state
-matches the state recorded in the previous TB. For example, if the SS,
+matches the state recorded in the previous TB. The same idea can be applied
+to other aspects of the CPU state.  For example, on x86, if the SS,
 DS and ES segments have a zero base, then the translator does not even
 generate an addition for the segment base.
 
-[The FPU stack pointer register is not handled that way yet].
-
-@node Translation cache
-@section Translation cache
-
-A 32 MByte cache holds the most recently used translations. For
-simplicity, it is completely flushed when it is full. A translation unit
-contains just a single basic block (a block of x86 instructions
-terminated by a jump or by a virtual CPU state change which the
-translator cannot deduce statically).
-
-@node Direct block chaining
-@section Direct block chaining
-
+@item Direct block chaining:
 After each translated basic block is executed, QEMU uses the simulated
 Program Counter (PC) and other cpu state information (such as the CS
 segment base value) to find the next basic block.
@@ -452,18 +202,17 @@ it easier to make the jump target modification atomic. On some host
 architectures (such as x86 or PowerPC), the @code{JUMP} opcode is
 directly patched so that the block chaining has no overhead.
 
-@node Self-modifying code and translated code invalidation
-@section Self-modifying code and translated code invalidation
-
+@item Self-modifying code and translated code invalidation:
 Self-modifying code is a special challenge in x86 emulation because no
 instruction cache invalidation is signaled by the application when code
 is modified.
 
-When translated code is generated for a basic block, the corresponding
-host page is write protected if it is not already read-only. Then, if
-a write access is done to the page, Linux raises a SEGV signal. QEMU
-then invalidates all the translated code in the page and enables write
-accesses to the page.
+User-mode emulation marks a host page as write-protected (if it is
+not already read-only) every time translated code is generated for a
+basic block.  Then, if a write access is done to the page, Linux raises
+a SEGV signal. QEMU then invalidates all the translated code in the page
+and enables write accesses to the page.  For system emulation, write
+protection is achieved through the software MMU.
 
 Correct translated code invalidation is done efficiently by maintaining
 a linked list of every translated block contained in a given page. Other
@@ -475,132 +224,95 @@ necessary. However, QEMU still requires that the generated code always
 matches the target instructions in memory in order to handle
 exceptions correctly.
 
-@node Exception support
-@section Exception support
-
+@item Exception support:
 longjmp() is used when an exception such as division by zero is
 encountered.
 
 The host SIGSEGV and SIGBUS signal handlers are used to get invalid
-memory accesses. The simulated program counter is found by
-retranslating the corresponding basic block and by looking where the
-host program counter was at the exception point.
-
-The virtual CPU cannot retrieve the exact @code{EFLAGS} register because
-in some cases it is not computed because of condition code
-optimisations. It is not a big concern because the emulated code can
-still be restarted in any cases.
-
-@node MMU emulation
-@section MMU emulation
-
-For system emulation QEMU supports a soft MMU. In that mode, the MMU
+memory accesses.  QEMU keeps a map from host program counter to
+target program counter, and looks up where the exception happened
+based on the host program counter at the exception point.
+
+On some targets, some bits of the virtual CPU's state are not flushed to the
+memory until the end of the translation block.  This is done for internal
+emulation state that is rarely accessed directly by the program and/or changes
+very often throughout the execution of a translation block---this includes
+condition codes on x86, delay slots on SPARC, conditional execution on
+ARM, and so on.  This state is stored for each target instruction, and
+looked up on exceptions.
+
+@item MMU emulation:
+For system emulation QEMU uses a software MMU. In that mode, the MMU
 virtual to physical address translation is done at every memory
-access. QEMU uses an address translation cache to speed up the
-translation.
+access.
 
+QEMU uses an address translation cache (TLB) to speed up the translation.
 In order to avoid flushing the translated code each time the MMU
-mappings change, QEMU uses a physically indexed translation cache. It
+mappings change, all caches in QEMU are physically indexed.  This
 means that each basic block is indexed with its physical address.
 
-When MMU mappings change, only the chaining of the basic blocks is
-reset (i.e. a basic block can no longer jump directly to another one).
-
-@node Device emulation
-@section Device emulation
-
-Systems emulated by QEMU are organized by boards. At initialization
-phase, each board instantiates a number of CPUs, devices, RAM and
-ROM. Each device in turn can assign I/O ports or memory areas (for
-MMIO) to its handlers. When the emulation starts, an access to the
-ports or MMIO memory areas assigned to the device causes the
-corresponding handler to be called.
-
-RAM and ROM are handled more optimally, only the offset to the host
-memory needs to be added to the guest address.
-
-The video RAM of VGA and other display cards is special: it can be
-read or written directly like RAM, but write accesses cause the memory
-to be marked with VGA_DIRTY flag as well.
+In order to avoid invalidating the basic block chain when MMU mappings
+change, chaining is only performed when the destination of the jump
+shares a page with the basic block that is performing the jump.
 
-QEMU supports some device classes like serial and parallel ports, USB,
-drives and network devices, by providing APIs for easier connection to
-the generic, higher level implementations. The API hides the
-implementation details from the devices, like native device use or
-advanced block device formats like QCOW.
-
-Usually the devices implement a reset method and register support for
-saving and loading of the device state. The devices can also use
-timers, especially together with the use of bottom halves (BHs).
-
-@node Hardware interrupts
-@section Hardware interrupts
-
-In order to be faster, QEMU does not check at every basic block if a
-hardware interrupt is pending. Instead, the user must asynchronously
-call a specific function to tell that an interrupt is pending. This
-function resets the chaining of the currently executing basic
-block. It ensures that the execution will return soon in the main loop
-of the CPU emulator. Then the main loop can test if the interrupt is
-pending and handle it.
-
-@node User emulation specific details
-@section User emulation specific details
-
-@subsection Linux system call translation
-
-QEMU includes a generic system call translator for Linux. It means that
-the parameters of the system calls can be converted to fix the
-endianness and 32/64 bit issues. The IOCTLs are converted with a generic
-type description system (see @file{ioctls.h} and @file{thunk.c}).
-
-QEMU supports host CPUs which have pages bigger than 4KB. It records all
-the mappings the process does and try to emulated the @code{mmap()}
-system calls in cases where the host @code{mmap()} call would fail
-because of bad page alignment.
-
-@subsection Linux signals
-
-Normal and real-time signals are queued along with their information
-(@code{siginfo_t}) as it is done in the Linux kernel. Then an interrupt
-request is done to the virtual CPU. When it is interrupted, one queued
-signal is handled by generating a stack frame in the virtual CPU as the
-Linux kernel does. The @code{sigreturn()} system call is emulated to return
-from the virtual signal handler.
+The MMU can also distinguish RAM and ROM memory areas from MMIO memory
+areas.  Access is faster for RAM and ROM because the translation cache also
+hosts the offset between guest address and host memory.  Accessing MMIO
+memory areas instead calls out to C code for device emulation.
+Finally, the MMU helps tracking dirty pages and pages pointed to by
+translation blocks.
+@end table
 
-Some signals (such as SIGALRM) directly come from the host. Other
-signals are synthesized from the virtual CPU exceptions such as SIGFPE
-when a division by zero is done (see @code{main.c:cpu_loop()}).
+@node QEMU compared to other emulators
+@section QEMU compared to other emulators
 
-The blocked signal mask is still handled by the host Linux kernel so
-that most signal system calls can be redirected directly to the host
-Linux kernel. Only the @code{sigaction()} and @code{sigreturn()} system
-calls need to be fully emulated (see @file{signal.c}).
+Like bochs [1], QEMU emulates an x86 CPU. But QEMU is much faster than
+bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC
+emulation while QEMU can emulate several processors.
 
-@subsection clone() system call and threads
+Like Valgrind [2], QEMU does user space emulation and dynamic
+translation. Valgrind is mainly a memory debugger while QEMU has no
+support for it (QEMU could be used to detect out of bound memory
+accesses as Valgrind, but it has no support to track uninitialised data
+as Valgrind does). The Valgrind dynamic translator generates better code
+than QEMU (in particular it does register allocation) but it is closely
+tied to an x86 host and target and has no support for precise exceptions
+and system emulation.
 
-The Linux clone() system call is usually used to create a thread. QEMU
-uses the host clone() system call so that real host threads are created
-for each emulated thread. One virtual CPU instance is created for each
-thread.
+EM86 [3] is the closest project to user space QEMU (and QEMU still uses
+some of its code, in particular the ELF file loader). EM86 was limited
+to an alpha host and used a proprietary and slow interpreter (the
+interpreter part of the FX!32 Digital Win32 code translator [4]).
 
-The virtual x86 CPU atomic operations are emulated with a global lock so
-that their semantic is preserved.
+TWIN from Willows Software was a Windows API emulator like Wine. It is less
+accurate than Wine but includes a protected mode x86 interpreter to launch
+x86 Windows executables. Such an approach has greater potential because most
+of the Windows API is executed natively but it is far more difficult to
+develop because all the data structures and function parameters exchanged
+between the API and the x86 code must be converted.
 
-Note that currently there are still some locking issues in QEMU. In
-particular, the translated cache flush is not protected yet against
-reentrancy.
+User mode Linux [5] was the only solution before QEMU to launch a
+Linux kernel as a process while not needing any host kernel
+patches. However, user mode Linux requires heavy kernel patches while
+QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is
+slower.
 
-@subsection Self-virtualization
+The Plex86 [6] PC virtualizer is done in the same spirit as the now
+obsolete qemu-fast system emulator. It requires a patched Linux kernel
+to work (you cannot launch the same kernel on your PC), but the
+patches are really small. As it is a PC virtualizer (no emulation is
+done except for some privileged instructions), it has the potential of
+being faster than QEMU. The downside is that a complicated (and
+potentially unsafe) host kernel patch is needed.
 
-QEMU was conceived so that ultimately it can emulate itself. Although
-it is not very useful, it is an important test to show the power of the
-emulator.
+The commercial PC Virtualizers (VMWare [7], VirtualPC [8]) are faster
+than QEMU (without virtualization), but they all need specific, proprietary
+and potentially unsafe host drivers. Moreover, they are unable to
+provide cycle exact simulation as an emulator can.
 
-Achieving self-virtualization is not easy because there may be address
-space conflicts. QEMU user emulators solve this problem by being an
-executable ELF shared object as the ld-linux.so ELF interpreter. That
-way, it can be relocated at load time.
+VirtualBox [9], Xen [10] and KVM [11] are based on QEMU. QEMU-SystemC
+[12] uses QEMU to simulate a system where some hardware devices are
+developed in SystemC.
 
 @node Bibliography
 @section Bibliography
@@ -657,43 +369,3 @@ Kernel Based Virtual Machine (KVM).
 QEMU-SystemC, a hardware co-simulator.
 
 @end table
-
-@node Regression Tests
-@chapter Regression Tests
-
-In the directory @file{tests/}, various interesting testing programs
-are available. They are used for regression testing.
-
-@menu
-* test-i386::
-* linux-test::
-@end menu
-
-@node test-i386
-@section @file{test-i386}
-
-This program executes most of the 16 bit and 32 bit x86 instructions and
-generates a text output. It can be compared with the output obtained with
-a real CPU or another emulator. The target @code{make test} runs this
-program and a @code{diff} on the generated output.
-
-The Linux system call @code{modify_ldt()} is used to create x86 selectors
-to test some 16 bit addressing and 32 bit with segmentation cases.
-
-The Linux system call @code{vm86()} is used to test vm86 emulation.
-
-Various exceptions are raised to test most of the x86 user space
-exception reporting.
-
-@node linux-test
-@section @file{linux-test}
-
-This program tests various Linux system calls. It is used to verify
-that the system call parameters are correctly converted between target
-and host CPUs.
-
-@node Index
-@chapter Index
-@printindex cp
-
-@bye
diff --git a/qemu.nsi b/qemu.nsi
index a20f6ef35b..1a2d7d18a8 100644
--- a/qemu.nsi
+++ b/qemu.nsi
@@ -171,10 +171,8 @@ SectionEnd
 Section "Documentation" SectionDoc
     SetOutPath "$INSTDIR"
     File "${BINDIR}\qemu-doc.html"
-    File "${BINDIR}\qemu-tech.html"
     CreateDirectory "$SMPROGRAMS\${PRODUCT}"
     CreateShortCut "$SMPROGRAMS\${PRODUCT}\User Documentation.lnk" "$INSTDIR\qemu-doc.html" "" "$INSTDIR\qemu-doc.html" 0
-    CreateShortCut "$SMPROGRAMS\${PRODUCT}\Technical Documentation.lnk" "$INSTDIR\qemu-tech.html" "" "$INSTDIR\qemu-tech.html" 0
 SectionEnd
 !endif
 
@@ -219,7 +217,6 @@ Section "Uninstall"
     Delete "$INSTDIR\qemu.exe"
     Delete "$INSTDIR\qemu-system-*.exe"
     Delete "$INSTDIR\qemu-doc.html"
-    Delete "$INSTDIR\qemu-tech.html"
     RMDir /r "$INSTDIR\keymaps"
     RMDir /r "$INSTDIR\share"
     ; Remove generated files
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index bb65d8ba17..71582e0c38 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -26,13 +26,10 @@ static gboolean ga_channel_listen_accept(GIOChannel *channel,
     GAChannel *c = data;
     int ret, client_fd;
     bool accepted = false;
-    struct sockaddr_un addr;
-    socklen_t addrlen = sizeof(addr);
 
     g_assert(channel != NULL);
 
-    client_fd = qemu_accept(g_io_channel_unix_get_fd(channel),
-                            (struct sockaddr *)&addr, &addrlen);
+    client_fd = qemu_accept(g_io_channel_unix_get_fd(channel), NULL, NULL);
     if (client_fd == -1) {
         g_warning("error converting fd to gsocket: %s", strerror(errno));
         goto out;
@@ -64,7 +61,6 @@ static void ga_channel_listen_add(GAChannel *c, int listen_fd, bool create)
 
 static void ga_channel_listen_close(GAChannel *c)
 {
-    g_assert(c->method == GA_CHANNEL_UNIX_LISTEN);
     g_assert(c->listen_channel);
     g_io_channel_shutdown(c->listen_channel, true, NULL);
     g_io_channel_unref(c->listen_channel);
@@ -80,7 +76,7 @@ static void ga_channel_client_close(GAChannel *c)
     g_io_channel_shutdown(c->client_channel, true, NULL);
     g_io_channel_unref(c->client_channel);
     c->client_channel = NULL;
-    if (c->method == GA_CHANNEL_UNIX_LISTEN && c->listen_channel) {
+    if (c->listen_channel) {
         ga_channel_listen_add(c, 0, false);
     }
 }
@@ -197,6 +193,31 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, GAChannelMethod
         ga_channel_listen_add(c, fd, true);
         break;
     }
+    case GA_CHANNEL_VSOCK_LISTEN: {
+        Error *local_err = NULL;
+        SocketAddress *addr;
+        char *addr_str;
+        int fd;
+
+        addr_str = g_strdup_printf("vsock:%s", path);
+        addr = socket_parse(addr_str, &local_err);
+        g_free(addr_str);
+        if (local_err != NULL) {
+            g_critical("%s", error_get_pretty(local_err));
+            error_free(local_err);
+            return false;
+        }
+
+        fd = socket_listen(addr, &local_err);
+        qapi_free_SocketAddress(addr);
+        if (local_err != NULL) {
+            g_critical("%s", error_get_pretty(local_err));
+            error_free(local_err);
+            return false;
+        }
+        ga_channel_listen_add(c, fd, true);
+        break;
+    }
     default:
         g_critical("error binding/listening to specified socket");
         return false;
@@ -258,8 +279,7 @@ GAChannel *ga_channel_new(GAChannelMethod method, const gchar *path,
 
 void ga_channel_free(GAChannel *c)
 {
-    if (c->method == GA_CHANNEL_UNIX_LISTEN
-        && c->listen_channel) {
+    if (c->listen_channel) {
         ga_channel_listen_close(c);
     }
     if (c->client_channel) {
diff --git a/qga/channel.h b/qga/channel.h
index ae8cf0f7e0..8fd0c8f72c 100644
--- a/qga/channel.h
+++ b/qga/channel.h
@@ -19,6 +19,7 @@ typedef enum {
     GA_CHANNEL_VIRTIO_SERIAL,
     GA_CHANNEL_ISA_SERIAL,
     GA_CHANNEL_UNIX_LISTEN,
+    GA_CHANNEL_VSOCK_LISTEN,
 } GAChannelMethod;
 
 typedef gboolean (*GAChannelCallback)(GIOCondition condition, gpointer opaque);
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 9c9be12116..19d72b2411 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -840,8 +840,99 @@ static void guest_fsfreeze_cleanup(void)
 GuestFilesystemTrimResponse *
 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
 {
-    error_setg(errp, QERR_UNSUPPORTED);
-    return NULL;
+    GuestFilesystemTrimResponse *resp;
+    HANDLE handle;
+    WCHAR guid[MAX_PATH] = L"";
+
+    handle = FindFirstVolumeW(guid, ARRAYSIZE(guid));
+    if (handle == INVALID_HANDLE_VALUE) {
+        error_setg_win32(errp, GetLastError(), "failed to find any volume");
+        return NULL;
+    }
+
+    resp = g_new0(GuestFilesystemTrimResponse, 1);
+
+    do {
+        GuestFilesystemTrimResult *res;
+        GuestFilesystemTrimResultList *list;
+        PWCHAR uc_path;
+        DWORD char_count = 0;
+        char *path, *out;
+        GError *gerr = NULL;
+        gchar * argv[4];
+
+        GetVolumePathNamesForVolumeNameW(guid, NULL, 0, &char_count);
+
+        if (GetLastError() != ERROR_MORE_DATA) {
+            continue;
+        }
+        if (GetDriveTypeW(guid) != DRIVE_FIXED) {
+            continue;
+        }
+
+        uc_path = g_malloc(sizeof(WCHAR) * char_count);
+        if (!GetVolumePathNamesForVolumeNameW(guid, uc_path, char_count,
+                                              &char_count) || !*uc_path) {
+            /* strange, but this condition could be faced even with size == 2 */
+            g_free(uc_path);
+            continue;
+        }
+
+        res = g_new0(GuestFilesystemTrimResult, 1);
+
+        path = g_utf16_to_utf8(uc_path, char_count, NULL, NULL, &gerr);
+
+        g_free(uc_path);
+
+        if (!path) {
+            res->has_error = true;
+            res->error = g_strdup(gerr->message);
+            g_error_free(gerr);
+            break;
+        }
+
+        res->path = path;
+
+        list = g_new0(GuestFilesystemTrimResultList, 1);
+        list->value = res;
+        list->next = resp->paths;
+
+        resp->paths = list;
+
+        memset(argv, 0, sizeof(argv));
+        argv[0] = (gchar *)"defrag.exe";
+        argv[1] = (gchar *)"/L";
+        argv[2] = path;
+
+        if (!g_spawn_sync(NULL, argv, NULL, G_SPAWN_SEARCH_PATH, NULL, NULL,
+                          &out /* stdout */, NULL /* stdin */,
+                          NULL, &gerr)) {
+            res->has_error = true;
+            res->error = g_strdup(gerr->message);
+            g_error_free(gerr);
+        } else {
+            /* defrag.exe is UGLY. Exit code is ALWAYS zero.
+               Error is reported in the output with something like
+               (x89000020) etc code in the stdout */
+
+            int i;
+            gchar **lines = g_strsplit(out, "\r\n", 0);
+            g_free(out);
+
+            for (i = 0; lines[i] != NULL; i++) {
+                if (g_strstr_len(lines[i], -1, "(0x") == NULL) {
+                    continue;
+                }
+                res->has_error = true;
+                res->error = g_strdup(lines[i]);
+                break;
+            }
+            g_strfreev(lines);
+        }
+    } while (FindNextVolumeW(handle, guid, ARRAYSIZE(guid)));
+
+    FindVolumeClose(handle);
+    return resp;
 }
 
 typedef enum {
@@ -1416,7 +1507,7 @@ GList *ga_command_blacklist_init(GList *blacklist)
         "guest-get-memory-blocks", "guest-set-memory-blocks",
         "guest-get-memory-block-size",
         "guest-fsfreeze-freeze-list",
-        "guest-fstrim", NULL};
+        NULL};
     char **p = (char **)list_unsupported;
 
     while (*p) {
diff --git a/qga/commands.c b/qga/commands.c
index 50fd26a817..edd3e830e6 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -16,6 +16,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/base64.h"
 #include "qemu/cutils.h"
+#include "qemu/atomic.h"
 
 /* Maximum captured guest-exec out_data/err_data - 16MB */
 #define GUEST_EXEC_MAX_OUTPUT (16*1024*1024)
@@ -82,7 +83,7 @@ struct GuestExecIOData {
     guchar *data;
     gsize size;
     gsize length;
-    gint closed;
+    bool closed;
     bool truncated;
     const char *name;
 };
@@ -93,7 +94,7 @@ struct GuestExecInfo {
     int64_t pid_numeric;
     gint status;
     bool has_output;
-    gint finished;
+    bool finished;
     GuestExecIOData in;
     GuestExecIOData out;
     GuestExecIOData err;
@@ -156,13 +157,13 @@ GuestExecStatus *qmp_guest_exec_status(int64_t pid, Error **err)
 
     ges = g_new0(GuestExecStatus, 1);
 
-    bool finished = g_atomic_int_get(&gei->finished);
+    bool finished = atomic_mb_read(&gei->finished);
 
     /* need to wait till output channels are closed
      * to be sure we captured all output at this point */
     if (gei->has_output) {
-        finished = finished && g_atomic_int_get(&gei->out.closed);
-        finished = finished && g_atomic_int_get(&gei->err.closed);
+        finished = finished && atomic_mb_read(&gei->out.closed);
+        finished = finished && atomic_mb_read(&gei->err.closed);
     }
 
     ges->exited = finished;
@@ -264,7 +265,7 @@ static void guest_exec_child_watch(GPid pid, gint status, gpointer data)
             (int32_t)gpid_to_int64(pid), (uint32_t)status);
 
     gei->status = status;
-    gei->finished = true;
+    atomic_mb_set(&gei->finished, true);
 
     g_spawn_close_pid(pid);
 }
@@ -320,7 +321,7 @@ static gboolean guest_exec_input_watch(GIOChannel *ch,
 done:
     g_io_channel_shutdown(ch, true, NULL);
     g_io_channel_unref(ch);
-    g_atomic_int_set(&p->closed, 1);
+    atomic_mb_set(&p->closed, true);
     g_free(p->data);
 
     return false;
@@ -374,7 +375,7 @@ static gboolean guest_exec_output_watch(GIOChannel *ch,
 close:
     g_io_channel_shutdown(ch, true, NULL);
     g_io_channel_unref(ch);
-    g_atomic_int_set(&p->closed, 1);
+    atomic_mb_set(&p->closed, true);
     return false;
 }
 
diff --git a/qga/guest-agent-command-state.c b/qga/guest-agent-command-state.c
index 4de229cd78..e609d320f0 100644
--- a/qga/guest-agent-command-state.c
+++ b/qga/guest-agent-command-state.c
@@ -71,3 +71,9 @@ GACommandState *ga_command_state_new(void)
     cs->groups = NULL;
     return cs;
 }
+
+void ga_command_state_free(GACommandState *cs)
+{
+    g_slist_free_full(cs->groups, g_free);
+    g_free(cs);
+}
diff --git a/qga/guest-agent-core.h b/qga/guest-agent-core.h
index 0a49516045..63e9d398ae 100644
--- a/qga/guest-agent-core.h
+++ b/qga/guest-agent-core.h
@@ -28,6 +28,7 @@ void ga_command_state_add(GACommandState *cs,
 void ga_command_state_init_all(GACommandState *cs);
 void ga_command_state_cleanup_all(GACommandState *cs);
 GACommandState *ga_command_state_new(void);
+void ga_command_state_free(GACommandState *cs);
 bool ga_logging_enabled(GAState *s);
 void ga_disable_logging(GAState *s);
 void ga_enable_logging(GAState *s);
diff --git a/qga/main.c b/qga/main.c
index 4c3b2c772b..6caf215575 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -190,8 +190,8 @@ static void usage(const char *cmd)
 "Usage: %s [-m <method> -p <path>] [<options>]\n"
 "QEMU Guest Agent %s\n"
 "\n"
-"  -m, --method      transport method: one of unix-listen, virtio-serial, or\n"
-"                    isa-serial (virtio-serial is the default)\n"
+"  -m, --method      transport method: one of unix-listen, virtio-serial,\n"
+"                    isa-serial, or vsock-listen (virtio-serial is the default)\n"
 "  -p, --path        device/socket path (the default for virtio-serial is:\n"
 "                    %s,\n"
 "                    the default for isa-serial is:\n"
@@ -659,6 +659,8 @@ static gboolean channel_init(GAState *s, const gchar *method, const gchar *path)
         channel_method = GA_CHANNEL_ISA_SERIAL;
     } else if (strcmp(method, "unix-listen") == 0) {
         channel_method = GA_CHANNEL_UNIX_LISTEN;
+    } else if (strcmp(method, "vsock-listen") == 0) {
+        channel_method = GA_CHANNEL_VSOCK_LISTEN;
     } else {
         g_critical("unsupported channel method/type: %s", method);
         return false;
@@ -1175,6 +1177,7 @@ static void config_free(GAConfig *config)
 #ifdef CONFIG_FSFREEZE
     g_free(config->fsfreeze_hook);
 #endif
+    g_list_free_full(config->blacklist, g_free);
     g_free(config);
 }
 
@@ -1310,11 +1313,6 @@ static int run_agent(GAState *s, GAConfig *config)
     return EXIT_SUCCESS;
 }
 
-static void free_blacklist_entry(gpointer entry, gpointer unused)
-{
-    g_free(entry);
-}
-
 int main(int argc, char **argv)
 {
     int ret = EXIT_SUCCESS;
@@ -1375,11 +1373,12 @@ int main(int argc, char **argv)
 end:
     if (s->command_state) {
         ga_command_state_cleanup_all(s->command_state);
+        ga_command_state_free(s->command_state);
+        json_message_parser_destroy(&s->parser);
     }
     if (s->channel) {
         ga_channel_free(s->channel);
     }
-    g_list_foreach(config->blacklist, free_blacklist_entry, NULL);
     g_free(s->pstate_filepath);
     g_free(s->state_filepath_isfrozen);
 
@@ -1388,6 +1387,10 @@ int main(int argc, char **argv)
     }
 
     config_free(config);
+    if (s->main_loop) {
+        g_main_loop_unref(s->main_loop);
+    }
+    g_free(s);
 
     return ret;
 }
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index c21f3084dc..94c03128fd 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -102,7 +102,7 @@
 #
 # Returns: Time in nanoseconds.
 #
-# Since 1.5
+# Since: 1.5
 ##
 { 'command': 'guest-get-time',
   'returns': 'int' }
@@ -149,13 +149,13 @@
 # @success-response: whether command returns a response on success
 #                    (since 1.7)
 #
-# Since 1.1.0
+# Since: 1.1.0
 ##
 { 'struct': 'GuestAgentCommandInfo',
   'data': { 'name': 'str', 'enabled': 'bool', 'success-response': 'bool' } }
 
 ##
-# @GuestAgentInfo
+# @GuestAgentInfo:
 #
 # Information about guest agent.
 #
@@ -163,7 +163,7 @@
 #
 # @supported_commands: Information about guest agent commands
 #
-# Since 0.15.0
+# Since: 0.15.0
 ##
 { 'struct': 'GuestAgentInfo',
   'data': { 'version': 'str',
@@ -203,7 +203,7 @@
 #
 # Open a file in the guest and retrieve a file handle for it
 #
-# @filepath: Full path to the file in the guest to open.
+# @path: Full path to the file in the guest to open.
 #
 # @mode: #optional open mode, as per fopen(), "r" is the default.
 #
@@ -230,7 +230,7 @@
   'data': { 'handle': 'int' } }
 
 ##
-# @GuestFileRead
+# @GuestFileRead:
 #
 # Result of guest agent file-read operation
 #
@@ -264,7 +264,7 @@
   'returns': 'GuestFileRead' }
 
 ##
-# @GuestFileWrite
+# @GuestFileWrite:
 #
 # Result of guest agent file-write operation
 #
@@ -300,7 +300,7 @@
 
 
 ##
-# @GuestFileSeek
+# @GuestFileSeek:
 #
 # Result of guest agent file-seek operation
 #
@@ -378,7 +378,7 @@
   'data': { 'handle': 'int' } }
 
 ##
-# @GuestFsFreezeStatus
+# @GuestFsfreezeStatus:
 #
 # An enumeration of filesystem freeze states
 #
@@ -455,7 +455,7 @@
   'returns': 'int' }
 
 ##
-# @GuestFilesystemTrimResult
+# @GuestFilesystemTrimResult:
 #
 # @path: path that was trimmed
 # @error: an error message when trim failed
@@ -469,7 +469,7 @@
            '*trimmed': 'int', '*minimum': 'int', '*error': 'str'} }
 
 ##
-# @GuestFilesystemTrimResponse
+# @GuestFilesystemTrimResponse:
 #
 # @paths: list of @GuestFilesystemTrimResult per path that was trimmed
 #
@@ -501,7 +501,7 @@
   'returns': 'GuestFilesystemTrimResponse' }
 
 ##
-# @guest-suspend-disk
+# @guest-suspend-disk:
 #
 # Suspend guest to disk.
 #
@@ -529,7 +529,7 @@
 { 'command': 'guest-suspend-disk', 'success-response': false }
 
 ##
-# @guest-suspend-ram
+# @guest-suspend-ram:
 #
 # Suspend guest to ram.
 #
@@ -561,7 +561,7 @@
 { 'command': 'guest-suspend-ram', 'success-response': false }
 
 ##
-# @guest-suspend-hybrid
+# @guest-suspend-hybrid:
 #
 # Save guest state to disk and suspend to ram.
 #
@@ -720,7 +720,7 @@
   'returns': 'int' }
 
 ##
-# @GuestDiskBusType
+# @GuestDiskBusType:
 #
 # An enumeration of bus type of disks
 #
@@ -770,7 +770,7 @@
 # @GuestDiskAddress:
 #
 # @pci-controller: controller's PCI address
-# @type: bus type
+# @bus-type: bus type
 # @bus: bus id
 # @target: target id
 # @unit: unit id
@@ -783,7 +783,7 @@
            'bus': 'int', 'target': 'int', 'unit': 'int'} }
 
 ##
-# @GuestFilesystemInfo
+# @GuestFilesystemInfo:
 #
 # @name: disk name
 # @mountpoint: mount point path
@@ -811,7 +811,7 @@
   'returns': ['GuestFilesystemInfo'] }
 
 ##
-# @guest-set-user-password
+# @guest-set-user-password:
 #
 # @username: the user account whose password to change
 # @password: the new password entry string, base64 encoded
@@ -832,11 +832,12 @@
 #
 # Returns: Nothing on success.
 #
-# Since 2.3
+# Since: 2.3
 ##
 { 'command': 'guest-set-user-password',
   'data': { 'username': 'str', 'password': 'str', 'crypted': 'bool' } }
 
+##
 # @GuestMemoryBlock:
 #
 # @phys-index: Arbitrary guest-specific unique identifier of the MEMORY BLOCK.
@@ -872,7 +873,7 @@
   'returns': ['GuestMemoryBlock'] }
 
 ##
-# @GuestMemoryBlockResponseType
+# @GuestMemoryBlockResponseType:
 #
 # An enumeration of memory block operation result.
 #
@@ -936,6 +937,7 @@
   'data':    {'mem-blks': ['GuestMemoryBlock'] },
   'returns': ['GuestMemoryBlockResponse'] }
 
+##
 # @GuestMemoryBlockInfo:
 #
 # @size: the size (in bytes) of the guest memory blocks,
@@ -952,14 +954,14 @@
 #
 # Get information relating to guest memory blocks.
 #
-# Returns: memory block size in bytes.
 # Returns: @GuestMemoryBlockInfo
 #
-# Since 2.3
+# Since: 2.3
 ##
 { 'command': 'guest-get-memory-block-info',
   'returns': 'GuestMemoryBlockInfo' }
 
+##
 # @GuestExecStatus:
 #
 # @exited: true if process has already terminated.
@@ -982,7 +984,7 @@
             '*out-data': 'str', '*err-data': 'str',
             '*out-truncated': 'bool', '*err-truncated': 'bool' }}
 ##
-# @guest-exec-status
+# @guest-exec-status:
 #
 # Check status of process associated with PID retrieved via guest-exec.
 # Reap the process and associated metadata if it has exited.
@@ -991,7 +993,7 @@
 #
 # Returns: GuestExecStatus on success.
 #
-# Since 2.5
+# Since: 2.5
 ##
 { 'command': 'guest-exec-status',
   'data':    { 'pid': 'int' },
@@ -1001,7 +1003,7 @@
 # @GuestExec:
 # @pid: pid of child process in guest OS
 #
-#Since: 2.5
+# Since: 2.5
 ##
 { 'struct': 'GuestExec',
   'data': { 'pid': 'int'} }
diff --git a/qga/vss-win32/Makefile.objs b/qga/vss-win32/Makefile.objs
index 7c96c6b288..23d08da225 100644
--- a/qga/vss-win32/Makefile.objs
+++ b/qga/vss-win32/Makefile.objs
@@ -7,7 +7,7 @@ $(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes -Wmis
 
 $(obj)/qga-vss.dll: LDFLAGS = -shared -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi -luuid -static
 $(obj)/qga-vss.dll: $(obj-qga-vss-dll-obj-y) $(SRC_PATH)/$(obj)/qga-vss.def
-	$(call quiet-command,$(CXX) -o $@ $(qga-vss-dll-obj-y) $(SRC_PATH)/qga/vss-win32/qga-vss.def $(CXXFLAGS) $(LDFLAGS),"  LINK  $(TARGET_DIR)$@")
+	$(call quiet-command,$(CXX) -o $@ $(qga-vss-dll-obj-y) $(SRC_PATH)/qga/vss-win32/qga-vss.def $(CXXFLAGS) $(LDFLAGS),"LINK","$(TARGET_DIR)$@")
 
 
 # rules to build qga-provider.tlb
@@ -17,7 +17,7 @@ MIDL=$(WIN_SDK)/Bin/midl
 
 $(obj)/qga-vss.tlb: $(SRC_PATH)/$(obj)/qga-vss.idl
 ifeq ($(WIN_SDK),"")
-	$(call quiet-command,cp $(dir $<)qga-vss.tlb $@, "  COPY  $(TARGET_DIR)$@")
+	$(call quiet-command,cp $(dir $<)qga-vss.tlb $@,"COPY","$(TARGET_DIR)$@")
 else
-	$(call quiet-command,$(MIDL) -tlb $@ -I $(WIN_SDK)/Include $<,"  MIDL  $(TARGET_DIR)$@")
+	$(call quiet-command,$(MIDL) -tlb $@ -I $(WIN_SDK)/Include $<,"MIDL","$(TARGET_DIR)$@")
 endif
diff --git a/qmp.c b/qmp.c
index b6d531ebe2..0028f0b30e 100644
--- a/qmp.c
+++ b/qmp.c
@@ -18,6 +18,7 @@
 #include "qemu/cutils.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
+#include "qemu/uuid.h"
 #include "qmp-commands.h"
 #include "sysemu/char.h"
 #include "ui/qemu-spice.h"
@@ -30,7 +31,7 @@
 #include "qom/qom-qobject.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qobject.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "hw/boards.h"
 #include "qom/object_interfaces.h"
 #include "hw/mem/pc-dimm.h"
@@ -51,21 +52,11 @@ NameInfo *qmp_query_name(Error **errp)
 VersionInfo *qmp_query_version(Error **errp)
 {
     VersionInfo *info = g_new0(VersionInfo, 1);
-    const char *version = QEMU_VERSION;
-    const char *tmp;
-    int err;
 
     info->qemu = g_new0(VersionTriple, 1);
-    err = qemu_strtoll(version, &tmp, 10, &info->qemu->major);
-    assert(err == 0);
-    tmp++;
-
-    err = qemu_strtoll(tmp, &tmp, 10, &info->qemu->minor);
-    assert(err == 0);
-    tmp++;
-
-    err = qemu_strtoll(tmp, &tmp, 10, &info->qemu->micro);
-    assert(err == 0);
+    info->qemu->major = QEMU_VERSION_MAJOR;
+    info->qemu->minor = QEMU_VERSION_MINOR;
+    info->qemu->micro = QEMU_VERSION_MICRO;
     info->package = g_strdup(QEMU_PKGVERSION);
 
     return info;
@@ -84,15 +75,8 @@ KvmInfo *qmp_query_kvm(Error **errp)
 UuidInfo *qmp_query_uuid(Error **errp)
 {
     UuidInfo *info = g_malloc0(sizeof(*info));
-    char uuid[64];
 
-    snprintf(uuid, sizeof(uuid), UUID_FMT, qemu_uuid[0], qemu_uuid[1],
-                   qemu_uuid[2], qemu_uuid[3], qemu_uuid[4], qemu_uuid[5],
-                   qemu_uuid[6], qemu_uuid[7], qemu_uuid[8], qemu_uuid[9],
-                   qemu_uuid[10], qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
-                   qemu_uuid[14], qemu_uuid[15]);
-
-    info->UUID = g_strdup(uuid);
+    info->UUID = qemu_uuid_unparse_strdup(&qemu_uuid);
     return info;
 }
 
@@ -446,8 +430,8 @@ void qmp_change(const char *device, const char *target,
     if (strcmp(device, "vnc") == 0) {
         qmp_change_vnc(target, has_arg, arg, errp);
     } else {
-        qmp_blockdev_change_medium(device, target, has_arg, arg, false, 0,
-                                   errp);
+        qmp_blockdev_change_medium(true, device, false, NULL, target,
+                                   has_arg, arg, false, 0, errp);
     }
 }
 
@@ -607,6 +591,27 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
     return arch_query_cpu_definitions(errp);
 }
 
+CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                     CpuModelInfo *model,
+                                                     Error **errp)
+{
+    return arch_query_cpu_model_expansion(type, model, errp);
+}
+
+CpuModelCompareInfo *qmp_query_cpu_model_comparison(CpuModelInfo *modela,
+                                                    CpuModelInfo *modelb,
+                                                    Error **errp)
+{
+    return arch_query_cpu_model_comparison(modela, modelb, errp);
+}
+
+CpuModelBaselineInfo *qmp_query_cpu_model_baseline(CpuModelInfo *modela,
+                                                   CpuModelInfo *modelb,
+                                                   Error **errp)
+{
+    return arch_query_cpu_model_baseline(modela, modelb, errp);
+}
+
 void qmp_add_client(const char *protocol, const char *fdname,
                     bool has_skipauth, bool skipauth, bool has_tls, bool tls,
                     Error **errp)
@@ -654,7 +659,7 @@ void qmp_add_client(const char *protocol, const char *fdname,
 void qmp_object_add(const char *type, const char *id,
                     bool has_props, QObject *props, Error **errp)
 {
-    const QDict *pdict = NULL;
+    QDict *pdict;
     Visitor *v;
     Object *obj;
 
@@ -664,14 +669,18 @@ void qmp_object_add(const char *type, const char *id,
             error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
             return;
         }
+        QINCREF(pdict);
+    } else {
+        pdict = qdict_new();
     }
 
-    v = qmp_input_visitor_new(props, true);
+    v = qobject_input_visitor_new(QOBJECT(pdict), true);
     obj = user_creatable_add_type(type, id, pdict, v, errp);
     visit_free(v);
     if (obj) {
         object_unref(obj);
     }
+    QDECREF(pdict);
 }
 
 void qmp_object_del(const char *id, Error **errp)
diff --git a/qobject/qdict.c b/qobject/qdict.c
index 60f158c3b7..197b0fbd47 100644
--- a/qobject/qdict.c
+++ b/qobject/qdict.c
@@ -17,6 +17,7 @@
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qobject.h"
+#include "qapi/error.h"
 #include "qemu/queue.h"
 #include "qemu-common.h"
 #include "qemu/cutils.h"
@@ -683,6 +684,282 @@ void qdict_array_split(QDict *src, QList **dst)
     }
 }
 
+/**
+ * qdict_split_flat_key:
+ * @key: the key string to split
+ * @prefix: non-NULL pointer to hold extracted prefix
+ * @suffix: non-NULL pointer to remaining suffix
+ *
+ * Given a flattened key such as 'foo.0.bar', split it into two parts
+ * at the first '.' separator. Allows double dot ('..') to escape the
+ * normal separator.
+ *
+ * e.g.
+ *    'foo.0.bar' -> prefix='foo' and suffix='0.bar'
+ *    'foo..0.bar' -> prefix='foo.0' and suffix='bar'
+ *
+ * The '..' sequence will be unescaped in the returned 'prefix'
+ * string. The 'suffix' string will be left in escaped format, so it
+ * can be fed back into the qdict_split_flat_key() key as the input
+ * later.
+ *
+ * The caller is responsible for freeing the string returned in @prefix
+ * using g_free().
+ */
+static void qdict_split_flat_key(const char *key, char **prefix,
+                                 const char **suffix)
+{
+    const char *separator;
+    size_t i, j;
+
+    /* Find first '.' separator, but if there is a pair '..'
+     * that acts as an escape, so skip over '..' */
+    separator = NULL;
+    do {
+        if (separator) {
+            separator += 2;
+        } else {
+            separator = key;
+        }
+        separator = strchr(separator, '.');
+    } while (separator && separator[1] == '.');
+
+    if (separator) {
+        *prefix = g_strndup(key, separator - key);
+        *suffix = separator + 1;
+    } else {
+        *prefix = g_strdup(key);
+        *suffix = NULL;
+    }
+
+    /* Unescape the '..' sequence into '.' */
+    for (i = 0, j = 0; (*prefix)[i] != '\0'; i++, j++) {
+        if ((*prefix)[i] == '.') {
+            assert((*prefix)[i + 1] == '.');
+            i++;
+        }
+        (*prefix)[j] = (*prefix)[i];
+    }
+    (*prefix)[j] = '\0';
+}
+
+/**
+ * qdict_is_list:
+ * @maybe_list: dict to check if keys represent list elements.
+ *
+ * Determine whether all keys in @maybe_list are valid list elements.
+ * If @maybe_list is non-zero in length and all the keys look like
+ * valid list indexes, this will return 1. If @maybe_list is zero
+ * length or all keys are non-numeric then it will return 0 to indicate
+ * it is a normal qdict. If there is a mix of numeric and non-numeric
+ * keys, or the list indexes are non-contiguous, an error is reported.
+ *
+ * Returns: 1 if a valid list, 0 if a dict, -1 on error
+ */
+static int qdict_is_list(QDict *maybe_list, Error **errp)
+{
+    const QDictEntry *ent;
+    ssize_t len = 0;
+    ssize_t max = -1;
+    int is_list = -1;
+    int64_t val;
+
+    for (ent = qdict_first(maybe_list); ent != NULL;
+         ent = qdict_next(maybe_list, ent)) {
+
+        if (qemu_strtoll(ent->key, NULL, 10, &val) == 0) {
+            if (is_list == -1) {
+                is_list = 1;
+            } else if (!is_list) {
+                error_setg(errp,
+                           "Cannot mix list and non-list keys");
+                return -1;
+            }
+            len++;
+            if (val > max) {
+                max = val;
+            }
+        } else {
+            if (is_list == -1) {
+                is_list = 0;
+            } else if (is_list) {
+                error_setg(errp,
+                           "Cannot mix list and non-list keys");
+                return -1;
+            }
+        }
+    }
+
+    if (is_list == -1) {
+        assert(!qdict_size(maybe_list));
+        is_list = 0;
+    }
+
+    /* NB this isn't a perfect check - e.g. it won't catch
+     * a list containing '1', '+1', '01', '3', but that
+     * does not matter - we've still proved that the
+     * input is a list. It is up the caller to do a
+     * stricter check if desired */
+    if (len != (max + 1)) {
+        error_setg(errp, "List indices are not contiguous, "
+                   "saw %zd elements but %zd largest index",
+                   len, max);
+        return -1;
+    }
+
+    return is_list;
+}
+
+/**
+ * qdict_crumple:
+ * @src: the original flat dictionary (only scalar values) to crumple
+ *
+ * Takes a flat dictionary whose keys use '.' separator to indicate
+ * nesting, and values are scalars, and crumples it into a nested
+ * structure.
+ *
+ * To include a literal '.' in a key name, it must be escaped as '..'
+ *
+ * For example, an input of:
+ *
+ * { 'foo.0.bar': 'one', 'foo.0.wizz': '1',
+ *   'foo.1.bar': 'two', 'foo.1.wizz': '2' }
+ *
+ * will result in an output of:
+ *
+ * {
+ *   'foo': [
+ *      { 'bar': 'one', 'wizz': '1' },
+ *      { 'bar': 'two', 'wizz': '2' }
+ *   ],
+ * }
+ *
+ * The following scenarios in the input dict will result in an
+ * error being returned:
+ *
+ *  - Any values in @src are non-scalar types
+ *  - If keys in @src imply that a particular level is both a
+ *    list and a dict. e.g., "foo.0.bar" and "foo.eek.bar".
+ *  - If keys in @src imply that a particular level is a list,
+ *    but the indices are non-contiguous. e.g. "foo.0.bar" and
+ *    "foo.2.bar" without any "foo.1.bar" present.
+ *  - If keys in @src represent list indexes, but are not in
+ *    the "%zu" format. e.g. "foo.+0.bar"
+ *
+ * Returns: either a QDict or QList for the nested data structure, or NULL
+ * on error
+ */
+QObject *qdict_crumple(const QDict *src, Error **errp)
+{
+    const QDictEntry *ent;
+    QDict *two_level, *multi_level = NULL;
+    QObject *dst = NULL, *child;
+    size_t i;
+    char *prefix = NULL;
+    const char *suffix = NULL;
+    int is_list;
+
+    two_level = qdict_new();
+
+    /* Step 1: split our totally flat dict into a two level dict */
+    for (ent = qdict_first(src); ent != NULL; ent = qdict_next(src, ent)) {
+        if (qobject_type(ent->value) == QTYPE_QDICT ||
+            qobject_type(ent->value) == QTYPE_QLIST) {
+            error_setg(errp, "Value %s is not a scalar",
+                       ent->key);
+            goto error;
+        }
+
+        qdict_split_flat_key(ent->key, &prefix, &suffix);
+
+        child = qdict_get(two_level, prefix);
+        if (suffix) {
+            if (child) {
+                if (qobject_type(child) != QTYPE_QDICT) {
+                    error_setg(errp, "Key %s prefix is already set as a scalar",
+                               prefix);
+                    goto error;
+                }
+            } else {
+                child = QOBJECT(qdict_new());
+                qdict_put_obj(two_level, prefix, child);
+            }
+            qobject_incref(ent->value);
+            qdict_put_obj(qobject_to_qdict(child), suffix, ent->value);
+        } else {
+            if (child) {
+                error_setg(errp, "Key %s prefix is already set as a dict",
+                           prefix);
+                goto error;
+            }
+            qobject_incref(ent->value);
+            qdict_put_obj(two_level, prefix, ent->value);
+        }
+
+        g_free(prefix);
+        prefix = NULL;
+    }
+
+    /* Step 2: optionally process the two level dict recursively
+     * into a multi-level dict */
+    multi_level = qdict_new();
+    for (ent = qdict_first(two_level); ent != NULL;
+         ent = qdict_next(two_level, ent)) {
+
+        if (qobject_type(ent->value) == QTYPE_QDICT) {
+            child = qdict_crumple(qobject_to_qdict(ent->value), errp);
+            if (!child) {
+                goto error;
+            }
+
+            qdict_put_obj(multi_level, ent->key, child);
+        } else {
+            qobject_incref(ent->value);
+            qdict_put_obj(multi_level, ent->key, ent->value);
+        }
+    }
+    QDECREF(two_level);
+    two_level = NULL;
+
+    /* Step 3: detect if we need to turn our dict into list */
+    is_list = qdict_is_list(multi_level, errp);
+    if (is_list < 0) {
+        goto error;
+    }
+
+    if (is_list) {
+        dst = QOBJECT(qlist_new());
+
+        for (i = 0; i < qdict_size(multi_level); i++) {
+            char *key = g_strdup_printf("%zu", i);
+
+            child = qdict_get(multi_level, key);
+            g_free(key);
+
+            if (!child) {
+                error_setg(errp, "Missing list index %zu", i);
+                goto error;
+            }
+
+            qobject_incref(child);
+            qlist_append_obj(qobject_to_qlist(dst), child);
+        }
+        QDECREF(multi_level);
+        multi_level = NULL;
+    } else {
+        dst = QOBJECT(multi_level);
+    }
+
+    return dst;
+
+ error:
+    g_free(prefix);
+    QDECREF(multi_level);
+    QDECREF(two_level);
+    qobject_decref(dst);
+    return NULL;
+}
+
 /**
  * qdict_array_entries(): Returns the number of direct array entries if the
  * sub-QDict of src specified by the prefix in subqdict (or src itself for
diff --git a/qom/cpu.c b/qom/cpu.c
index 83c98a3837..ee58280a60 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -29,6 +29,7 @@
 #include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 #include "hw/qdev-properties.h"
+#include "trace.h"
 
 bool cpu_exists(int64_t id)
 {
@@ -119,10 +120,10 @@ void cpu_reset_interrupt(CPUState *cpu, int mask)
 
 void cpu_exit(CPUState *cpu)
 {
-    cpu->exit_request = 1;
+    atomic_set(&cpu->exit_request, 1);
     /* Ensure cpu_exec will see the exit request after TCG has exited.  */
     smp_wmb();
-    cpu->tcg_exit_req = 1;
+    atomic_set(&cpu->tcg_exit_req, 1);
 }
 
 int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu,
@@ -249,11 +250,14 @@ void cpu_reset(CPUState *cpu)
     if (klass->reset != NULL) {
         (*klass->reset)(cpu);
     }
+
+    trace_guest_cpu_reset(cpu);
 }
 
 static void cpu_common_reset(CPUState *cpu)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
+    int i;
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
         qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
@@ -269,7 +273,10 @@ static void cpu_common_reset(CPUState *cpu)
     cpu->can_do_io = 1;
     cpu->exception_index = -1;
     cpu->crash_occurred = false;
-    memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
+
+    for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
+        atomic_set(&cpu->tb_jmp_cache[i], NULL);
+    }
 }
 
 static bool cpu_common_has_work(CPUState *cs)
@@ -337,6 +344,15 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp)
         cpu_synchronize_post_init(cpu);
         cpu_resume(cpu);
     }
+
+    /* NOTE: latest generic point where the cpu is fully realized */
+    trace_init_vcpu(cpu);
+}
+
+static void cpu_common_unrealizefn(DeviceState *dev, Error **errp)
+{
+    CPUState *cpu = CPU(dev);
+    cpu_exec_unrealizefn(cpu);
 }
 
 static void cpu_common_initfn(Object *obj)
@@ -346,15 +362,24 @@ static void cpu_common_initfn(Object *obj)
 
     cpu->cpu_index = UNASSIGNED_CPU_INDEX;
     cpu->gdb_num_regs = cpu->gdb_num_g_regs = cc->gdb_num_core_regs;
+    /* *-user doesn't have configurable SMP topology */
+    /* the default value is changed by qemu_init_vcpu() for softmmu */
+    cpu->nr_cores = 1;
+    cpu->nr_threads = 1;
+
     qemu_mutex_init(&cpu->work_mutex);
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
-    bitmap_zero(cpu->trace_dstate, TRACE_VCPU_EVENT_COUNT);
+
+    cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
+
+    cpu_exec_initfn(cpu);
 }
 
 static void cpu_common_finalize(Object *obj)
 {
-    cpu_exec_exit(CPU(obj));
+    CPUState *cpu = CPU(obj);
+    g_free(cpu->trace_dstate);
 }
 
 static int64_t cpu_common_get_arch_id(CPUState *cpu)
@@ -387,6 +412,7 @@ static void cpu_class_init(ObjectClass *klass, void *data)
     k->cpu_exec_exit = cpu_common_noop;
     k->cpu_exec_interrupt = cpu_common_exec_interrupt;
     dc->realize = cpu_common_realizefn;
+    dc->unrealize = cpu_common_unrealizefn;
     /*
      * Reason: CPUs still need special care by board code: wiring up
      * IRQs, adding reset handlers, halting non-first CPUs, ...
diff --git a/qom/object.c b/qom/object.c
index 72d3c65a66..a2e796a1c9 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -620,7 +620,7 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename,
     Object *inst;
 
     for (i = 0; obj && i < OBJECT_CLASS_CAST_CACHE; i++) {
-        if (obj->class->object_cast_cache[i] == typename) {
+        if (atomic_read(&obj->class->object_cast_cache[i]) == typename) {
             goto out;
         }
     }
@@ -637,10 +637,10 @@ Object *object_dynamic_cast_assert(Object *obj, const char *typename,
 
     if (obj && obj == inst) {
         for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
-            obj->class->object_cast_cache[i - 1] =
-                    obj->class->object_cast_cache[i];
+            atomic_set(&obj->class->object_cast_cache[i - 1],
+                       atomic_read(&obj->class->object_cast_cache[i]));
         }
-        obj->class->object_cast_cache[i - 1] = typename;
+        atomic_set(&obj->class->object_cast_cache[i - 1], typename);
     }
 
 out:
@@ -710,7 +710,7 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class,
     int i;
 
     for (i = 0; class && i < OBJECT_CLASS_CAST_CACHE; i++) {
-        if (class->class_cast_cache[i] == typename) {
+        if (atomic_read(&class->class_cast_cache[i]) == typename) {
             ret = class;
             goto out;
         }
@@ -731,9 +731,10 @@ ObjectClass *object_class_dynamic_cast_assert(ObjectClass *class,
 #ifdef CONFIG_QOM_CAST_DEBUG
     if (class && ret == class) {
         for (i = 1; i < OBJECT_CLASS_CAST_CACHE; i++) {
-            class->class_cast_cache[i - 1] = class->class_cast_cache[i];
+            atomic_set(&class->class_cast_cache[i - 1],
+                       atomic_read(&class->class_cast_cache[i]));
         }
-        class->class_cast_cache[i - 1] = typename;
+        atomic_set(&class->class_cast_cache[i - 1], typename);
     }
 out:
 #endif
diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index bf598468ab..ded4d84c85 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -3,7 +3,7 @@
 #include "qom/object_interfaces.h"
 #include "qemu/module.h"
 #include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/opts-visitor.h"
 
 void user_creatable_complete(Object *obj, Error **errp)
diff --git a/qom/qom-qobject.c b/qom/qom-qobject.c
index 6bc8aed78e..78d0b99324 100644
--- a/qom/qom-qobject.c
+++ b/qom/qom-qobject.c
@@ -15,8 +15,8 @@
 #include "qom/object.h"
 #include "qom/qom-qobject.h"
 #include "qapi/visitor.h"
-#include "qapi/qmp-input-visitor.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
 #include "qapi/qmp/types.h"
@@ -46,7 +46,7 @@ void object_property_set_qobject(Object *obj, QObject *value,
 {
     Visitor *v;
     /* TODO: Should we reject, rather than ignore, excess input? */
-    v = qmp_input_visitor_new(value, false);
+    v = qobject_input_visitor_new(value, false);
     object_property_set(obj, v, name, errp);
     visit_free(v);
 }
@@ -58,7 +58,7 @@ QObject *object_property_get_qobject(Object *obj, const char *name,
     Error *local_err = NULL;
     Visitor *v;
 
-    v = qmp_output_visitor_new(&ret);
+    v = qobject_output_visitor_new(&ret);
     object_property_get(obj, v, name, &local_err);
     if (!local_err) {
         visit_complete(v, &ret);
diff --git a/qtest.c b/qtest.c
index da4826c69f..46b99aed52 100644
--- a/qtest.c
+++ b/qtest.c
@@ -27,6 +27,10 @@
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#ifdef TARGET_PPC64
+#include "hw/ppc/spapr_rtas.h"
+#endif
 
 #define MAX_IRQ 256
 
@@ -34,7 +38,7 @@ bool qtest_allowed;
 
 static DeviceState *irq_intercept_dev;
 static FILE *qtest_log_fp;
-static CharDriverState *qtest_chr;
+static CharBackend qtest_chr;
 static GString *inbuf;
 static int irq_levels[MAX_IRQ];
 static qemu_timeval start_time;
@@ -133,6 +137,7 @@ static bool qtest_opened;
  *  < OK
  *
  * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0.
+ * For 'memset' a zero size is permitted and does nothing.
  *
  * DATA is an arbitrarily long hex number prefixed with '0x'.  If it's smaller
  * than the expected size, the value will be zero filled at the end of the data
@@ -185,7 +190,7 @@ static void qtest_get_time(qemu_timeval *tv)
     }
 }
 
-static void qtest_send_prefix(CharDriverState *chr)
+static void qtest_send_prefix(CharBackend *chr)
 {
     qemu_timeval tv;
 
@@ -213,7 +218,7 @@ static void GCC_FMT_ATTR(1, 2) qtest_log_send(const char *fmt, ...)
     va_end(ap);
 }
 
-static void do_qtest_send(CharDriverState *chr, const char *str, size_t len)
+static void do_qtest_send(CharBackend *chr, const char *str, size_t len)
 {
     qemu_chr_fe_write_all(chr, (uint8_t *)str, len);
     if (qtest_log_fp && qtest_opened) {
@@ -221,12 +226,12 @@ static void do_qtest_send(CharDriverState *chr, const char *str, size_t len)
     }
 }
 
-static void qtest_send(CharDriverState *chr, const char *str)
+static void qtest_send(CharBackend *chr, const char *str)
 {
     do_qtest_send(chr, str, strlen(str));
 }
 
-static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharDriverState *chr,
+static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharBackend *chr,
                                            const char *fmt, ...)
 {
     va_list ap;
@@ -244,7 +249,7 @@ static void qtest_irq_handler(void *opaque, int n, int level)
     qemu_set_irq(old_irq, level);
 
     if (irq_levels[n] != level) {
-        CharDriverState *chr = qtest_chr;
+        CharBackend *chr = &qtest_chr;
         irq_levels[n] = level;
         qtest_send_prefix(chr);
         qtest_sendf(chr, "IRQ %s %d\n",
@@ -252,7 +257,7 @@ static void qtest_irq_handler(void *opaque, int n, int level)
     }
 }
 
-static void qtest_process_command(CharDriverState *chr, gchar **words)
+static void qtest_process_command(CharBackend *chr, gchar **words)
 {
     const gchar *command;
 
@@ -324,12 +329,13 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
     } else if (strcmp(words[0], "outb") == 0 ||
                strcmp(words[0], "outw") == 0 ||
                strcmp(words[0], "outl") == 0) {
-        uint16_t addr;
-        uint32_t value;
+        unsigned long addr;
+        unsigned long value;
 
         g_assert(words[1] && words[2]);
-        addr = strtoul(words[1], NULL, 0);
-        value = strtoul(words[2], NULL, 0);
+        g_assert(qemu_strtoul(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoul(words[2], NULL, 0, &value) == 0);
+        g_assert(addr <= 0xffff);
 
         if (words[0][3] == 'b') {
             cpu_outb(addr, value);
@@ -343,11 +349,12 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
     } else if (strcmp(words[0], "inb") == 0 ||
         strcmp(words[0], "inw") == 0 ||
         strcmp(words[0], "inl") == 0) {
-        uint16_t addr;
+        unsigned long addr;
         uint32_t value = -1U;
 
         g_assert(words[1]);
-        addr = strtoul(words[1], NULL, 0);
+        g_assert(qemu_strtoul(words[1], NULL, 0, &addr) == 0);
+        g_assert(addr <= 0xffff);
 
         if (words[0][2] == 'b') {
             value = cpu_inb(addr);
@@ -366,8 +373,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         uint64_t value;
 
         g_assert(words[1] && words[2]);
-        addr = strtoull(words[1], NULL, 0);
-        value = strtoull(words[2], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &value) == 0);
 
         if (words[0][5] == 'b') {
             uint8_t data = value;
@@ -395,7 +402,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         uint64_t value = UINT64_C(-1);
 
         g_assert(words[1]);
-        addr = strtoull(words[1], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
 
         if (words[0][4] == 'b') {
             uint8_t data;
@@ -421,8 +428,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         char *enc;
 
         g_assert(words[1] && words[2]);
-        addr = strtoull(words[1], NULL, 0);
-        len = strtoull(words[2], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
 
         data = g_malloc(len);
         cpu_physical_memory_read(addr, data, len);
@@ -443,8 +450,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         gchar *b64_data;
 
         g_assert(words[1] && words[2]);
-        addr = strtoull(words[1], NULL, 0);
-        len = strtoull(words[2], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
 
         data = g_malloc(len);
         cpu_physical_memory_read(addr, data, len);
@@ -460,8 +467,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         size_t data_len;
 
         g_assert(words[1] && words[2] && words[3]);
-        addr = strtoull(words[1], NULL, 0);
-        len = strtoull(words[2], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
 
         data_len = strlen(words[3]);
         if (data_len < 3) {
@@ -486,17 +493,19 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
     } else if (strcmp(words[0], "memset") == 0) {
         uint64_t addr, len;
         uint8_t *data;
-        uint8_t pattern;
+        unsigned long pattern;
 
         g_assert(words[1] && words[2] && words[3]);
-        addr = strtoull(words[1], NULL, 0);
-        len = strtoull(words[2], NULL, 0);
-        pattern = strtoull(words[3], NULL, 0);
-
-        data = g_malloc(len);
-        memset(data, pattern, len);
-        cpu_physical_memory_write(addr, data, len);
-        g_free(data);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
+        g_assert(qemu_strtoul(words[3], NULL, 0, &pattern) == 0);
+
+        if (len) {
+            data = g_malloc(len);
+            memset(data, pattern, len);
+            cpu_physical_memory_write(addr, data, len);
+            g_free(data);
+        }
 
         qtest_send_prefix(chr);
         qtest_send(chr, "OK\n");
@@ -507,8 +516,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         gsize out_len;
 
         g_assert(words[1] && words[2] && words[3]);
-        addr = strtoull(words[1], NULL, 0);
-        len = strtoull(words[2], NULL, 0);
+        g_assert(qemu_strtoull(words[1], NULL, 0, &addr) == 0);
+        g_assert(qemu_strtoull(words[2], NULL, 0, &len) == 0);
 
         data_len = strlen(words[3]);
         if (data_len < 3) {
@@ -528,11 +537,32 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
 
         qtest_send_prefix(chr);
         qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "endianness") == 0) {
+        qtest_send_prefix(chr);
+#if defined(TARGET_WORDS_BIGENDIAN)
+        qtest_sendf(chr, "OK big\n");
+#else
+        qtest_sendf(chr, "OK little\n");
+#endif
+#ifdef TARGET_PPC64
+    } else if (strcmp(words[0], "rtas") == 0) {
+        uint64_t res, args, ret;
+        unsigned long nargs, nret;
+
+        g_assert(qemu_strtoul(words[2], NULL, 0, &nargs) == 0);
+        g_assert(qemu_strtoull(words[3], NULL, 0, &args) == 0);
+        g_assert(qemu_strtoul(words[4], NULL, 0, &nret) == 0);
+        g_assert(qemu_strtoull(words[5], NULL, 0, &ret) == 0);
+        res = qtest_rtas_call(words[1], nargs, args, nret, ret);
+
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK %"PRIu64"\n", res);
+#endif
     } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) {
         int64_t ns;
 
         if (words[1]) {
-            ns = strtoll(words[1], NULL, 0);
+            g_assert(qemu_strtoll(words[1], NULL, 0, &ns) == 0);
         } else {
             ns = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
         }
@@ -544,7 +574,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         int64_t ns;
 
         g_assert(words[1]);
-        ns = strtoll(words[1], NULL, 0);
+        g_assert(qemu_strtoll(words[1], NULL, 0, &ns) == 0);
         qtest_clock_warp(ns);
         qtest_send_prefix(chr);
         qtest_sendf(chr, "OK %"PRIi64"\n",
@@ -555,7 +585,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
     }
 }
 
-static void qtest_process_inbuf(CharDriverState *chr, GString *inbuf)
+static void qtest_process_inbuf(CharBackend *chr, GString *inbuf)
 {
     char *end;
 
@@ -579,7 +609,7 @@ static void qtest_process_inbuf(CharDriverState *chr, GString *inbuf)
 
 static void qtest_read(void *opaque, const uint8_t *buf, int size)
 {
-    CharDriverState *chr = opaque;
+    CharBackend *chr = opaque;
 
     g_string_append_len(inbuf, (const gchar *)buf, size);
     qtest_process_inbuf(chr, inbuf);
@@ -640,7 +670,7 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
 {
     CharDriverState *chr;
 
-    chr = qemu_chr_new("qtest", qtest_chrdev, NULL);
+    chr = qemu_chr_new("qtest", qtest_chrdev);
 
     if (chr == NULL) {
         error_setg(errp, "Failed to initialize device for qtest: \"%s\"",
@@ -656,16 +686,17 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp)
         qtest_log_fp = stderr;
     }
 
-    qemu_chr_add_handlers(chr, qtest_can_read, qtest_read, qtest_event, chr);
-    qemu_chr_fe_set_echo(chr, true);
+    qemu_chr_fe_init(&qtest_chr, chr, errp);
+    qemu_chr_fe_set_handlers(&qtest_chr, qtest_can_read, qtest_read,
+                             qtest_event, &qtest_chr, NULL, true);
+    qemu_chr_fe_set_echo(&qtest_chr, true);
 
     inbuf = g_string_new("");
-    qtest_chr = chr;
 }
 
 bool qtest_driver(void)
 {
-    return qtest_chr;
+    return qtest_chr.chr != NULL;
 }
 
 static void qtest_accel_class_init(ObjectClass *oc, void *data)
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index fcb3f74d60..c8ad3ebb89 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -4,3 +4,4 @@ common-obj-y += replay-events.o
 common-obj-y += replay-time.o
 common-obj-y += replay-input.o
 common-obj-y += replay-char.o
+common-obj-y += replay-snapshot.o
diff --git a/replay/replay-events.c b/replay/replay-events.c
index 3807245ae7..c513913671 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -279,7 +279,7 @@ static Event *replay_read_event(int checkpoint)
 /* Called with replay mutex locked */
 void replay_read_events(int checkpoint)
 {
-    while (replay_data_kind == EVENT_ASYNC) {
+    while (replay_state.data_kind == EVENT_ASYNC) {
         Event *event = replay_read_event(checkpoint);
         if (!event) {
             break;
@@ -309,3 +309,11 @@ bool replay_events_enabled(void)
 {
     return events_enabled;
 }
+
+uint64_t blkreplay_next_id(void)
+{
+    if (replay_events_enabled()) {
+        return replay_state.block_request_id++;
+    }
+    return 0;
+}
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index 5835e8def3..bea7b4aa6b 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -16,11 +16,8 @@
 #include "qemu/error-report.h"
 #include "sysemu/sysemu.h"
 
-unsigned int replay_data_kind = -1;
-static unsigned int replay_has_unread_data;
-
 /* Mutex to protect reading and writing events to the log.
-   replay_data_kind and replay_has_unread_data are also protected
+   data_kind and has_unread_data are also protected
    by this mutex.
    It also protects replay events queue which stores events to be
    written or read to the log. */
@@ -150,15 +147,16 @@ void replay_check_error(void)
 void replay_fetch_data_kind(void)
 {
     if (replay_file) {
-        if (!replay_has_unread_data) {
-            replay_data_kind = replay_get_byte();
-            if (replay_data_kind == EVENT_INSTRUCTION) {
+        if (!replay_state.has_unread_data) {
+            replay_state.data_kind = replay_get_byte();
+            if (replay_state.data_kind == EVENT_INSTRUCTION) {
                 replay_state.instructions_count = replay_get_dword();
             }
             replay_check_error();
-            replay_has_unread_data = 1;
-            if (replay_data_kind >= EVENT_COUNT) {
-                error_report("Replay: unknown event kind %d", replay_data_kind);
+            replay_state.has_unread_data = 1;
+            if (replay_state.data_kind >= EVENT_COUNT) {
+                error_report("Replay: unknown event kind %d",
+                             replay_state.data_kind);
                 exit(1);
             }
         }
@@ -167,7 +165,7 @@ void replay_fetch_data_kind(void)
 
 void replay_finish_event(void)
 {
-    replay_has_unread_data = 0;
+    replay_state.has_unread_data = 0;
     replay_fetch_data_kind();
 }
 
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index efbf14c8a7..9117e442d0 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -62,11 +62,19 @@ typedef struct ReplayState {
     uint64_t current_step;
     /*! Number of instructions to be executed before other events happen. */
     int instructions_count;
+    /*! Type of the currently executed event. */
+    unsigned int data_kind;
+    /*! Flag which indicates that event is not processed yet. */
+    unsigned int has_unread_data;
+    /*! Temporary variable for saving current log offset. */
+    uint64_t file_offset;
+    /*! Next block operation id.
+        This counter is global, because requests from different
+        block devices should not get overlapping ids. */
+    uint64_t block_request_id;
 } ReplayState;
 extern ReplayState replay_state;
 
-extern unsigned int replay_data_kind;
-
 /* File for replay writing */
 extern FILE *replay_file;
 
@@ -98,7 +106,7 @@ void replay_check_error(void);
     the next event from the log. */
 void replay_finish_event(void);
 /*! Reads data type from the file and stores it in the
-    replay_data_kind variable. */
+    data_kind variable. */
 void replay_fetch_data_kind(void);
 
 /*! Saves queued events (like instructions and sound). */
@@ -119,8 +127,6 @@ void replay_read_next_clock(unsigned int kind);
 void replay_init_events(void);
 /*! Clears internal data structures for events handling */
 void replay_finish_events(void);
-/*! Enables storing events in the queue */
-void replay_enable_events(void);
 /*! Flushes events queue */
 void replay_flush_events(void);
 /*! Clears events list before loading new VM state */
@@ -155,4 +161,11 @@ void replay_event_char_read_save(void *opaque);
 /*! Reads char event read from the file. */
 void *replay_event_char_read_load(void);
 
+/* VMState-related functions */
+
+/* Registers replay VMState.
+   Should be called before virtual devices initialization
+   to make cached timers available for post_load functions. */
+void replay_vmstate_register(void);
+
 #endif
diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
new file mode 100644
index 0000000000..498059734d
--- /dev/null
+++ b/replay/replay-snapshot.c
@@ -0,0 +1,61 @@
+/*
+ * replay-snapshot.c
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "sysemu/sysemu.h"
+#include "monitor/monitor.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "migration/vmstate.h"
+
+static void replay_pre_save(void *opaque)
+{
+    ReplayState *state = opaque;
+    state->file_offset = ftell(replay_file);
+}
+
+static int replay_post_load(void *opaque, int version_id)
+{
+    ReplayState *state = opaque;
+    fseek(replay_file, state->file_offset, SEEK_SET);
+    /* If this was a vmstate, saved in recording mode,
+       we need to initialize replay data fields. */
+    replay_fetch_data_kind();
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_replay = {
+    .name = "replay",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = replay_pre_save,
+    .post_load = replay_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64_ARRAY(cached_clock, ReplayState, REPLAY_CLOCK_COUNT),
+        VMSTATE_UINT64(current_step, ReplayState),
+        VMSTATE_INT32(instructions_count, ReplayState),
+        VMSTATE_UINT32(data_kind, ReplayState),
+        VMSTATE_UINT32(has_unread_data, ReplayState),
+        VMSTATE_UINT64(file_offset, ReplayState),
+        VMSTATE_UINT64(block_request_id, ReplayState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+void replay_vmstate_register(void)
+{
+    vmstate_register(NULL, 0, &vmstate_replay, &replay_state);
+}
diff --git a/replay/replay-time.c b/replay/replay-time.c
index fffe072c55..f70382a88f 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -31,7 +31,7 @@ int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
 
 void replay_read_next_clock(ReplayClockKind kind)
 {
-    unsigned int read_kind = replay_data_kind - EVENT_CLOCK;
+    unsigned int read_kind = replay_state.data_kind - EVENT_CLOCK;
 
     assert(read_kind == kind);
 
diff --git a/replay/replay.c b/replay/replay.c
index 167fd2942d..c797aeae8a 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -38,15 +38,15 @@ bool replay_next_event_is(int event)
 
     /* nothing to skip - not all instructions used */
     if (replay_state.instructions_count != 0) {
-        assert(replay_data_kind == EVENT_INSTRUCTION);
+        assert(replay_state.data_kind == EVENT_INSTRUCTION);
         return event == EVENT_INSTRUCTION;
     }
 
     while (true) {
-        if (event == replay_data_kind) {
+        if (event == replay_state.data_kind) {
             res = true;
         }
-        switch (replay_data_kind) {
+        switch (replay_state.data_kind) {
         case EVENT_SHUTDOWN:
             replay_finish_event();
             qemu_system_shutdown_request();
@@ -85,7 +85,7 @@ void replay_account_executed_instructions(void)
             replay_state.instructions_count -= count;
             replay_state.current_step += count;
             if (replay_state.instructions_count == 0) {
-                assert(replay_data_kind == EVENT_INSTRUCTION);
+                assert(replay_state.data_kind == EVENT_INSTRUCTION);
                 replay_finish_event();
                 /* Wake up iothread. This is required because
                    timers will not expire until clock counters
@@ -188,7 +188,7 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
     if (replay_mode == REPLAY_MODE_PLAY) {
         if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) {
             replay_finish_event();
-        } else if (replay_data_kind != EVENT_ASYNC) {
+        } else if (replay_state.data_kind != EVENT_ASYNC) {
             res = false;
             goto out;
         }
@@ -196,7 +196,7 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint)
         /* replay_read_events may leave some unread events.
            Return false if not all of the events associated with
            checkpoint were processed */
-        res = replay_data_kind != EVENT_ASYNC;
+        res = replay_state.data_kind != EVENT_ASYNC;
     } else if (replay_mode == REPLAY_MODE_RECORD) {
         replay_put_event(EVENT_CHECKPOINT + checkpoint);
         replay_save_events(checkpoint);
@@ -237,9 +237,10 @@ static void replay_enable(const char *fname, int mode)
     replay_filename = g_strdup(fname);
 
     replay_mode = mode;
-    replay_data_kind = -1;
+    replay_state.data_kind = -1;
     replay_state.instructions_count = 0;
     replay_state.current_step = 0;
+    replay_state.has_unread_data = 0;
 
     /* skip file header for RECORD and check it for PLAY */
     if (replay_mode == REPLAY_MODE_RECORD) {
@@ -291,6 +292,7 @@ void replay_configure(QemuOpts *opts)
         exit(1);
     }
 
+    replay_vmstate_register();
     replay_enable(fname, mode);
 
 out:
diff --git a/replication.c b/replication.c
new file mode 100644
index 0000000000..be3a42f9c9
--- /dev/null
+++ b/replication.c
@@ -0,0 +1,107 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+static QLIST_HEAD(, ReplicationState) replication_states;
+
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops)
+{
+    ReplicationState *rs;
+
+    assert(ops != NULL);
+    rs = g_new0(ReplicationState, 1);
+    rs->opaque = opaque;
+    rs->ops = ops;
+    QLIST_INSERT_HEAD(&replication_states, rs, node);
+
+    return rs;
+}
+
+void replication_remove(ReplicationState *rs)
+{
+    if (rs) {
+        QLIST_REMOVE(rs, node);
+        g_free(rs);
+    }
+}
+
+/*
+ * The caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp)
+{
+    ReplicationState *rs, *next;
+    Error *local_err = NULL;
+
+    QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+        if (rs->ops && rs->ops->start) {
+            rs->ops->start(rs, mode, &local_err);
+        }
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void replication_do_checkpoint_all(Error **errp)
+{
+    ReplicationState *rs, *next;
+    Error *local_err = NULL;
+
+    QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+        if (rs->ops && rs->ops->checkpoint) {
+            rs->ops->checkpoint(rs, &local_err);
+        }
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void replication_get_error_all(Error **errp)
+{
+    ReplicationState *rs, *next;
+    Error *local_err = NULL;
+
+    QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+        if (rs->ops && rs->ops->get_error) {
+            rs->ops->get_error(rs, &local_err);
+        }
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void replication_stop_all(bool failover, Error **errp)
+{
+    ReplicationState *rs, *next;
+    Error *local_err = NULL;
+
+    QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+        if (rs->ops && rs->ops->stop) {
+            rs->ops->stop(rs, failover, &local_err);
+        }
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
diff --git a/replication.h b/replication.h
new file mode 100644
index 0000000000..ece6ca6133
--- /dev/null
+++ b/replication.h
@@ -0,0 +1,174 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REPLICATION_H
+#define REPLICATION_H
+
+#include "qemu/queue.h"
+
+typedef struct ReplicationOps ReplicationOps;
+typedef struct ReplicationState ReplicationState;
+
+/**
+ * SECTION:replication.h
+ * @title:Base Replication System
+ * @short_description: interfaces for handling replication
+ *
+ * The Replication Model provides a framework for handling Replication
+ *
+ * <example>
+ *   <title>How to use replication interfaces</title>
+ *   <programlisting>
+ * #include "replication.h"
+ *
+ * typedef struct BDRVReplicationState {
+ *     ReplicationState *rs;
+ * } BDRVReplicationState;
+ *
+ * static void replication_start(ReplicationState *rs, ReplicationMode mode,
+ *                               Error **errp);
+ * static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
+ * static void replication_get_error(ReplicationState *rs, Error **errp);
+ * static void replication_stop(ReplicationState *rs, bool failover,
+ *                              Error **errp);
+ *
+ * static ReplicationOps replication_ops = {
+ *     .start = replication_start,
+ *     .checkpoint = replication_do_checkpoint,
+ *     .get_error = replication_get_error,
+ *     .stop = replication_stop,
+ * }
+ *
+ * static int replication_open(BlockDriverState *bs, QDict *options,
+ *                             int flags, Error **errp)
+ * {
+ *     BDRVReplicationState *s = bs->opaque;
+ *     s->rs = replication_new(bs, &replication_ops);
+ *     return 0;
+ * }
+ *
+ * static void replication_close(BlockDriverState *bs)
+ * {
+ *     BDRVReplicationState *s = bs->opaque;
+ *     replication_remove(s->rs);
+ * }
+ *
+ * BlockDriver bdrv_replication = {
+ *     .format_name                = "replication",
+ *     .protocol_name              = "replication",
+ *     .instance_size              = sizeof(BDRVReplicationState),
+ *
+ *     .bdrv_open                  = replication_open,
+ *     .bdrv_close                 = replication_close,
+ * };
+ *
+ * static void bdrv_replication_init(void)
+ * {
+ *     bdrv_register(&bdrv_replication);
+ * }
+ *
+ * block_init(bdrv_replication_init);
+ *   </programlisting>
+ * </example>
+ *
+ * We create an example about how to use replication interfaces in above.
+ * Then in migration, we can use replication_(start/stop/do_checkpoint/
+ * get_error)_all to handle all replication operations.
+ */
+
+/**
+ * ReplicationState:
+ * @opaque: opaque pointer value passed to this ReplicationState
+ * @ops: replication operation of this ReplicationState
+ * @node: node that we will insert into @replication_states QLIST
+ */
+struct ReplicationState {
+    void *opaque;
+    ReplicationOps *ops;
+    QLIST_ENTRY(ReplicationState) node;
+};
+
+/**
+ * ReplicationOps:
+ * @start: callback to start replication
+ * @stop: callback to stop replication
+ * @checkpoint: callback to do checkpoint
+ * @get_error: callback to check if error occurred during replication
+ */
+struct ReplicationOps {
+    void (*start)(ReplicationState *rs, ReplicationMode mode, Error **errp);
+    void (*stop)(ReplicationState *rs, bool failover, Error **errp);
+    void (*checkpoint)(ReplicationState *rs, Error **errp);
+    void (*get_error)(ReplicationState *rs, Error **errp);
+};
+
+/**
+ * replication_new:
+ * @opaque: opaque pointer value passed to ReplicationState
+ * @ops: replication operation of the new relevant ReplicationState
+ *
+ * Called to create a new ReplicationState instance, and then insert it
+ * into @replication_states QLIST
+ *
+ * Returns: the new ReplicationState instance
+ */
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops);
+
+/**
+ * replication_remove:
+ * @rs: the ReplicationState instance to remove
+ *
+ * Called to remove a ReplicationState instance, and then delete it from
+ * @replication_states QLIST
+ */
+void replication_remove(ReplicationState *rs);
+
+/**
+ * replication_start_all:
+ * @mode: replication mode that could be "primary" or "secondary"
+ * @errp: returns an error if this function fails
+ *
+ * Start replication, called in migration/checkpoint thread
+ *
+ * Note: the caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp);
+
+/**
+ * replication_do_checkpoint_all:
+ * @errp: returns an error if this function fails
+ *
+ * This interface is called after all VM state is transferred to Secondary QEMU
+ */
+void replication_do_checkpoint_all(Error **errp);
+
+/**
+ * replication_get_error_all:
+ * @errp: returns an error if this function fails
+ *
+ * This interface is called to check if error occurred during replication
+ */
+void replication_get_error_all(Error **errp);
+
+/**
+ * replication_stop_all:
+ * @failover: boolean value that indicates if we need do failover or not
+ * @errp: returns an error if this function fails
+ *
+ * It is called on failover. The vm should be stopped before calling it, if you
+ * use this API to shutdown the guest, or other things except failover
+ */
+void replication_stop_all(bool failover, Error **errp);
+
+#endif /* REPLICATION_H */
diff --git a/roms/Makefile b/roms/Makefile
index 88b3709d4d..b5e5a69e91 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -63,6 +63,7 @@ default:
 	@echo "  efirom         -- update nic roms (bios+efi, this needs"
 	@echo "                    the EfiRom utility from edk2 / tianocore)"
 	@echo "  slof           -- update slof.bin"
+	@echo "  skiboot        -- update skiboot.lid"
 	@echo "  u-boot.e500    -- update u-boot.e500"
 
 bios: build-seabios-config-seabios-128k build-seabios-config-seabios-256k
@@ -103,7 +104,7 @@ build-lgplvgabios:
 	$(MAKE) -C vgabios $(vgabios_targets)
 
 
-.PHONY: sgabios
+.PHONY: sgabios skiboot
 sgabios:
 	$(MAKE) -C sgabios
 	cp sgabios/sgabios.bin ../pc-bios
@@ -146,6 +147,10 @@ u-boot.e500:
 	$(powerpc_cross_prefix)strip u-boot/build.e500/u-boot -o \
 		../pc-bios/u-boot.e500
 
+skiboot:
+	$(MAKE) -C skiboot CROSS=$(powerpc64_cross_prefix)
+	cp skiboot/skiboot.lid ../pc-bios/skiboot.lid
+
 clean:
 	rm -rf seabios/.config seabios/out seabios/builds
 	$(MAKE) -C vgabios clean
@@ -155,3 +160,4 @@ clean:
 	$(MAKE) -C ipxe/src veryclean
 	$(MAKE) -C SLOF clean
 	rm -rf u-boot/build.e500
+	$(MAKE) -C skiboot clean
diff --git a/roms/skiboot b/roms/skiboot
new file mode 160000
index 0000000000..762d0082f1
--- /dev/null
+++ b/roms/skiboot
@@ -0,0 +1 @@
+Subproject commit 762d0082f18e4fb921a2d44a1051b02d8b0f6381
diff --git a/rules.mak b/rules.mak
index 99cd0b3371..f4839d2c38 100644
--- a/rules.mak
+++ b/rules.mak
@@ -14,6 +14,7 @@ MAKEFLAGS += -rR
 %.cpp:
 %.m:
 %.mak:
+clean-target:
 
 # Flags for C++ compilation
 QEMU_CXXFLAGS = -D__STDC_LIMIT_MACROS $(filter-out -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Wold-style-declaration -Wold-style-definition -Wredundant-decls, $(QEMU_CFLAGS))
@@ -50,15 +51,15 @@ process-archive-undefs = $(filter-out %.a %.mo,$1) \
                               $(call undefined-symbols,$(filter %.mo,$1)))) \
                 $(filter %.a,$1)
 
-extract-libs = $(strip $(foreach o,$1,$($o-libs)))
+extract-libs = $(strip $(foreach o,$(filter-out %.mo,$1),$($o-libs)))
 expand-objs = $(strip $(sort $(filter %.o,$1)) \
                   $(foreach o,$(filter %.mo,$1),$($o-objs)) \
                   $(filter-out %.o %.mo,$1))
 
 %.o: %.c
-	$(call quiet-command,$(CC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"  CC    $(TARGET_DIR)$@")
+	$(call quiet-command,$(CC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"CC","$(TARGET_DIR)$@")
 %.o: %.rc
-	$(call quiet-command,$(WINDRES) -I. -o $@ $<,"  RC    $(TARGET_DIR)$@")
+	$(call quiet-command,$(WINDRES) -I. -o $@ $<,"RC","$(TARGET_DIR)$@")
 
 # If we have a CXX we might have some C++ objects, in which case we
 # must link with the C++ compiler, not the plain C compiler.
@@ -66,22 +67,22 @@ LINKPROG = $(or $(CXX),$(CC))
 
 LINK = $(call quiet-command, $(LINKPROG) $(QEMU_CFLAGS) $(CFLAGS) $(LDFLAGS) -o $@ \
        $(call process-archive-undefs, $1) \
-       $(version-obj-y) $(call extract-libs,$1) $(LIBS),"  LINK  $(TARGET_DIR)$@")
+       $(version-obj-y) $(call extract-libs,$1) $(LIBS),"LINK","$(TARGET_DIR)$@")
 
 %.o: %.S
-	$(call quiet-command,$(CCAS) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"  CCAS  $(TARGET_DIR)$@")
+	$(call quiet-command,$(CCAS) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) -c -o $@ $<,"CCAS","$(TARGET_DIR)$@")
 
 %.o: %.cc
-	$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"  CXX   $(TARGET_DIR)$@")
+	$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"CXX","$(TARGET_DIR)$@")
 
 %.o: %.cpp
-	$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"  CXX   $(TARGET_DIR)$@")
+	$(call quiet-command,$(CXX) $(QEMU_INCLUDES) $(QEMU_CXXFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"CXX","$(TARGET_DIR)$@")
 
 %.o: %.m
-	$(call quiet-command,$(OBJCC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"  OBJC  $(TARGET_DIR)$@")
+	$(call quiet-command,$(OBJCC) $(QEMU_INCLUDES) $(QEMU_CFLAGS) $(QEMU_DGFLAGS) $(CFLAGS) $($@-cflags) -c -o $@ $<,"OBJC","$(TARGET_DIR)$@")
 
 %.o: %.dtrace
-	$(call quiet-command,dtrace -o $@ -G -s $<, "  GEN   $(TARGET_DIR)$@")
+	$(call quiet-command,dtrace -o $@ -G -s $<,"GEN","$(TARGET_DIR)$@")
 
 DSO_OBJ_CFLAGS := -fPIC -DBUILD_DSO
 module-common.o: CFLAGS += $(DSO_OBJ_CFLAGS)
@@ -89,13 +90,13 @@ module-common.o: CFLAGS += $(DSO_OBJ_CFLAGS)
 %$(DSOSUF): %.mo
 	$(call LINK,$^)
 	@# Copy to build root so modules can be loaded when program started without install
-	$(if $(findstring /,$@),$(call quiet-command,cp $@ $(subst /,-,$@), "  CP    $(subst /,-,$@)"))
+	$(if $(findstring /,$@),$(call quiet-command,cp $@ $(subst /,-,$@),"CP","$(subst /,-,$@)"))
 
 
-LD_REL := $(CC) -nostdlib -Wl,-r $(LD_REL_FLAGS)
+LD_REL := $(CC) -nostdlib $(LD_REL_FLAGS)
 
 %.mo:
-	$(call quiet-command,$(LD_REL) -o $@ $^,"  LD -r $(TARGET_DIR)$@")
+	$(call quiet-command,$(LD_REL) -o $@ $^,"LD","$(TARGET_DIR)$@")
 
 .PHONY: modules
 modules:
@@ -104,9 +105,15 @@ modules:
 	$(call LINK,$(filter %.o %.a %.mo, $^))
 
 %.a:
-	$(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"  AR    $(TARGET_DIR)$@")
+	$(call quiet-command,rm -f $@ && $(AR) rcs $@ $^,"AR","$(TARGET_DIR)$@")
 
-quiet-command = $(if $(V),$1,$(if $(2),@echo $2 && $1, @$1))
+# Usage: $(call quiet-command,command and args,"NAME","args to print")
+# This will run "command and args", and either:
+#  if V=1 just print the whole command and args
+#  otherwise print the 'quiet' output in the format "  NAME     args to print"
+# NAME should be a short name of the command, 7 letters or fewer.
+# If called with only a single argument, will print nothing in quiet mode.
+quiet-command = $(if $(V),$1,$(if $(2),@printf "  %-7s %s\n" $2 $3 && $1, @$1))
 
 # cc-option
 # Usage: CFLAGS+=$(call cc-option, -falign-functions=0, -malign-functions=0)
@@ -131,7 +138,7 @@ endef
 # Looks in the PATH if the argument contains no slash, else only considers one
 # specific directory.  Returns an # empty string if the program doesn't exist
 # there.
-find-in-path = $(if $(find-string /, $1), \
+find-in-path = $(if $(findstring /, $1), \
         $(wildcard $1), \
         $(wildcard $(patsubst %, %/$1, $(subst :, ,$(PATH)))))
 
@@ -172,7 +179,7 @@ config-%.h: config-%.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 
 config-%.h-timestamp: config-%.mak $(SRC_PATH)/scripts/create_config
-	$(call quiet-command, sh $(SRC_PATH)/scripts/create_config < $< > $@, "  GEN   $(TARGET_DIR)config-$*.h")
+	$(call quiet-command, sh $(SRC_PATH)/scripts/create_config < $< > $@,"GEN","$(TARGET_DIR)config-$*.h")
 
 .PHONY: clean-timestamp
 clean-timestamp:
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b0096a4460..f084542934 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1320,6 +1320,16 @@ sub process {
 			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
 			ERROR("DOS line endings\n" . $herevet);
 
+		} elsif ($realfile =~ /^docs\/.+\.txt/ ||
+			 $realfile =~ /^docs\/.+\.md/) {
+		    if ($rawline =~ /^\+\s+$/ && $rawline !~ /^\+ {4}$/) {
+			# TODO: properly check we're in a code block
+			#       (surrounding text is 4-column aligned)
+			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+			ERROR("code blocks in documentation should have " .
+			      "empty lines with exactly 4 columns of " .
+			      "whitespace\n" . $herevet);
+		    }
 		} elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
 			my $herevet = "$here\n" . cat_vet($rawline) . "\n";
 			ERROR("trailing whitespace\n" . $herevet);
@@ -1744,7 +1754,7 @@ sub process {
 			# Ignore those directives where spaces _are_ permitted.
 			if ($name =~ /^(?:
 				if|for|while|switch|return|case|
-				volatile|__volatile__|
+				volatile|__volatile__|coroutine_fn|
 				__attribute__|format|__extension__|
 				asm|__asm__)$/x)
 			{
@@ -2397,7 +2407,7 @@ sub process {
 # we have e.g. CONFIG_LINUX and CONFIG_WIN32 for common cases
 # where they might be necessary.
 		if ($line =~ m@^.\s*\#\s*if.*\b__@) {
-			ERROR("architecture specific defines should be avoided\n" .  $herecurr);
+			WARN("architecture specific defines should be avoided\n" .  $herecurr);
 		}
 
 # Check that the storage class is at the beginning of a declaration
@@ -2488,8 +2498,8 @@ sub process {
 				VMStateDescription|
 				VMStateInfo}x;
 		if ($line !~ /\bconst\b/ &&
-		    $line =~ /\b($struct_ops)\b/) {
-			ERROR("struct $1 should normally be const\n" .
+		    $line =~ /\b($struct_ops)\b.*=/) {
+			ERROR("initializer for struct $1 should normally be const\n" .
 				$herecurr);
 		}
 
diff --git a/scripts/clean-includes b/scripts/clean-includes
index 4412a5590a..dd938daa3e 100755
--- a/scripts/clean-includes
+++ b/scripts/clean-includes
@@ -14,15 +14,18 @@
 # the top-level directory.
 
 # Usage:
-#   clean-includes [--git subjectprefix] file ...
+#   clean-includes [--git subjectprefix] [--check-dup-head] file ...
 # or
-#   clean-includes [--git subjectprefix] --all
+#   clean-includes [--git subjectprefix] [--check-dup-head] --all
 #
 # If the --git subjectprefix option is given, then after making
 # the changes to the files this script will create a git commit
 # with the subject line "subjectprefix: Clean up includes"
 # and a boilerplate commit message.
 #
+# If --check-dup-head is specified, additionally check for duplicate
+# header includes.
+#
 # Using --all will cause clean-includes to run on the whole source
 # tree (excluding certain directories which are known not to need
 # handling).
@@ -45,23 +48,40 @@
 
 
 GIT=no
+DUPHEAD=no
 
 # Extended regular expression defining files to ignore when using --all
 XDIRREGEX='^(tests/tcg|tests/multiboot|pc-bios|disas/libvixl)'
 
-if [ $# -ne 0 ] && [ "$1" = "--git" ]; then
-    if [ $# -eq 1 ]; then
-        echo "--git option requires an argument"
-        exit 1
-    fi
-    GITSUBJ="$2"
-    GIT=yes
-    shift
-    shift
-fi
+while true
+do
+    case $1 in
+    "--git")
+         if [ $# -eq 1 ]; then
+             echo "--git option requires an argument"
+             exit 1
+         fi
+         GITSUBJ="$2"
+         GIT=yes
+         shift
+         shift
+         ;;
+    "--check-dup-head")
+        DUPHEAD=yes
+        shift
+        ;;
+    "--")
+        shift
+        break
+        ;;
+    *)
+        break
+        ;;
+   esac
+done
 
 if [ $# -eq 0 ]; then
-    echo "Usage: clean-includes [--git subjectprefix] [--all | foo.c ...]"
+    echo "Usage: clean-includes [--git subjectprefix] [--check-dup-head] [--all | foo.c ...]"
     echo "(modifies the files in place)"
     exit 1
 fi
@@ -91,7 +111,6 @@ cat >"$COCCIFILE" <<EOT
 )
 EOT
 
-
 for f in "$@"; do
   case "$f" in
     *.inc.c)
@@ -154,6 +173,15 @@ for f in "$@"; do
 
 done
 
+if [ "$DUPHEAD" = "yes" ]; then
+    egrep "^[[:space:]]*#[[:space:]]*include" "$@" | tr -d '[:blank:]' \
+        | sort | uniq -c | awk '{if ($1 > 1) print $0}'
+    if [ $? -eq 0 ]; then
+        echo "Found duplicate header file includes. Please check the above files manually."
+        exit 1
+    fi
+fi
+
 if [ "$GIT" = "yes" ]; then
     git add -- "$@"
     git commit --signoff -F - <<EOF
diff --git a/scripts/coccinelle/typecast.cocci b/scripts/coccinelle/typecast.cocci
new file mode 100644
index 0000000000..be2183ee4f
--- /dev/null
+++ b/scripts/coccinelle/typecast.cocci
@@ -0,0 +1,7 @@
+// Remove useless casts
+@@
+type T;
+T v;
+@@
+-	(T *)&v
++	&v
diff --git a/scripts/create_config b/scripts/create_config
index 1dd6a354f5..e6929dd61e 100755
--- a/scripts/create_config
+++ b/scripts/create_config
@@ -7,7 +7,13 @@ while read line; do
 case $line in
  VERSION=*) # configuration
     version=${line#*=}
+    major=$(echo "$version" | cut -d. -f1)
+    minor=$(echo "$version" | cut -d. -f2)
+    micro=$(echo "$version" | cut -d. -f3)
     echo "#define QEMU_VERSION \"$version\""
+    echo "#define QEMU_VERSION_MAJOR $major"
+    echo "#define QEMU_VERSION_MINOR $minor"
+    echo "#define QEMU_VERSION_MICRO $micro"
     ;;
  qemu_*dir=*) # qemu-specific directory configuration
     name=${line%=*}
diff --git a/scripts/hxtool b/scripts/hxtool
index 995bb7f08c..04f7d7b0ed 100644
--- a/scripts/hxtool
+++ b/scripts/hxtool
@@ -26,32 +26,32 @@ hxtotexi()
             ;;
             STEXI*)
             if test $flag -eq 1 ; then
-                echo "line $line: syntax error: expected ETEXI, found $str" >&2
+                printf "line %d: syntax error: expected ETEXI, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             flag=1
             ;;
             ETEXI*)
             if test $flag -ne 1 ; then
-                echo "line $line: syntax error: expected STEXI, found $str" >&2
+                printf "line %d: syntax error: expected STEXI, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             flag=0
             ;;
             SQMP*|EQMP*)
             if test $flag -eq 1 ; then
-                echo "line $line: syntax error: expected ETEXI, found $str" >&2
+                printf "line %d: syntax error: expected ETEXI, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             ;;
             DEFHEADING*)
-            echo "$(expr "$str" : "DEFHEADING(\(.*\))")"
+            printf '%s\n' "$(expr "$str" : "DEFHEADING(\(.*\))")"
             ;;
             ARCHHEADING*)
-            echo "$(expr "$str" : "ARCHHEADING(\(.*\),.*)")"
+            printf '%s\n' "$(expr "$str" : "ARCHHEADING(\(.*\),.*)")"
             ;;
             *)
-            test $flag -eq 1 && echo "$str"
+            test $flag -eq 1 && printf '%s\n' "$str"
             ;;
         esac
         line=$((line+1))
@@ -69,26 +69,26 @@ hxtoqmp()
             ;;
             SQMP*)
             if test $flag -eq 1 ; then
-                echo "line $line: syntax error: expected EQMP, found $str" >&2
+                printf "line %d: syntax error: expected EQMP, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             flag=1
             ;;
             EQMP*)
             if test $flag -ne 1 ; then
-                echo "line $line: syntax error: expected SQMP, found $str" >&2
+                printf "line %d: syntax error: expected SQMP, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             flag=0
             ;;
             STEXI*|ETEXI*)
             if test $flag -eq 1 ; then
-                echo "line $line: syntax error: expected EQMP, found $str" >&2
+                printf "line %d: syntax error: expected EQMP, found '%s'\n" "$line" "$str" >&2
                 exit 1
             fi
             ;;
             *)
-            test $flag -eq 1 && echo "$str"
+            test $flag -eq 1 && printf '%s\n' "$str"
             ;;
         esac
         line=$((line+1))
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index 8f0371f498..222025525b 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -79,6 +79,7 @@ class Misc(object):
     def show(self):
         print self.name
         value = msr().read(self.msr, 0)
+        print '  Hex: 0x%x' % (value)
         def first_bit(key):
             if type(key) is tuple:
                 return key[0]
@@ -172,6 +173,7 @@ controls = [
             16: 'RDSEED exiting',
             18: 'EPT-violation #VE',
             20: 'Enable XSAVES/XRSTORS',
+            25: 'TSC scaling',
             },
         cap_msr = MSR_IA32_VMX_PROCBASED_CTLS2,
         ),
diff --git a/scripts/modules/module_block.py b/scripts/modules/module_block.py
new file mode 100644
index 0000000000..3f73007640
--- /dev/null
+++ b/scripts/modules/module_block.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python
+#
+# Module information generator
+#
+# Copyright Red Hat, Inc. 2015 - 2016
+#
+# Authors:
+#  Marc Mari <markmb@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.
+# See the COPYING file in the top-level directory.
+
+from __future__ import print_function
+import sys
+import os
+
+def get_string_struct(line):
+    data = line.split()
+
+    # data[0] -> struct element name
+    # data[1] -> =
+    # data[2] -> value
+
+    return data[2].replace('"', '')[:-1]
+
+def add_module(fheader, library, format_name, protocol_name):
+    lines = []
+    lines.append('.library_name = "' + library + '",')
+    if format_name != "":
+        lines.append('.format_name = "' + format_name + '",')
+    if protocol_name != "":
+        lines.append('.protocol_name = "' + protocol_name + '",')
+
+    text = '\n        '.join(lines)
+    fheader.write('\n    {\n        ' + text + '\n    },')
+
+def process_file(fheader, filename):
+    # This parser assumes the coding style rules are being followed
+    with open(filename, "r") as cfile:
+        found_start = False
+        library, _ = os.path.splitext(os.path.basename(filename))
+        for line in cfile:
+            if found_start:
+                line = line.replace('\n', '')
+                if line.find(".format_name") != -1:
+                    format_name = get_string_struct(line)
+                elif line.find(".protocol_name") != -1:
+                    protocol_name = get_string_struct(line)
+                elif line == "};":
+                    add_module(fheader, library, format_name, protocol_name)
+                    found_start = False
+            elif line.find("static BlockDriver") != -1:
+                found_start = True
+                format_name = ""
+                protocol_name = ""
+
+def print_top(fheader):
+    fheader.write('''/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+/*
+ * QEMU Block Module Infrastructure
+ *
+ * Authors:
+ *  Marc Mari       <markmb@redhat.com>
+ */
+
+''')
+
+    fheader.write('''#ifndef QEMU_MODULE_BLOCK_H
+#define QEMU_MODULE_BLOCK_H
+
+#include "qemu-common.h"
+
+static const struct {
+    const char *format_name;
+    const char *protocol_name;
+    const char *library_name;
+} block_driver_modules[] = {''')
+
+def print_bottom(fheader):
+    fheader.write('''
+};
+
+#endif
+''')
+
+# First argument: output file
+# All other arguments: modules source files (.c)
+output_file = sys.argv[1]
+with open(output_file, 'w') as fheader:
+    print_top(fheader)
+
+    for filename in sys.argv[2:]:
+        if os.path.isfile(filename):
+            process_file(fheader, filename)
+        else:
+            print("File " + filename + " does not exist.", file=sys.stderr)
+            sys.exit(1)
+
+    print_bottom(fheader)
+
+sys.exit(0)
diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
index a06a2c4f9b..09e8467d90 100644
--- a/scripts/qapi-commands.py
+++ b/scripts/qapi-commands.py
@@ -68,7 +68,7 @@ def gen_marshal_output(ret_type):
     Error *err = NULL;
     Visitor *v;
 
-    v = qmp_output_visitor_new(ret_out);
+    v = qobject_output_visitor_new(ret_out);
     visit_type_%(c_name)s(v, "unused", &ret_in, &err);
     if (!err) {
         visit_complete(v, ret_out);
@@ -84,10 +84,7 @@ def gen_marshal_output(ret_type):
 
 
 def gen_marshal_proto(name):
-    ret = 'void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)' % c_name(name)
-    if not middle_mode:
-        ret = 'static ' + ret
-    return ret
+    return 'void qmp_marshal_%s(QDict *args, QObject **ret, Error **errp)' % c_name(name)
 
 
 def gen_marshal_decl(name):
@@ -98,6 +95,8 @@ def gen_marshal_decl(name):
 
 
 def gen_marshal(name, arg_type, boxed, ret_type):
+    have_args = arg_type and not arg_type.is_empty()
+
     ret = mcgen('''
 
 %(proto)s
@@ -112,17 +111,31 @@ def gen_marshal(name, arg_type, boxed, ret_type):
 ''',
                      c_type=ret_type.c_type())
 
-    if arg_type and not arg_type.is_empty():
+    if have_args:
+        visit_members = ('visit_type_%s_members(v, &arg, &err);'
+                         % arg_type.c_name())
         ret += mcgen('''
     Visitor *v;
     %(c_name)s arg = {0};
 
-    v = qmp_input_visitor_new(QOBJECT(args), true);
+''',
+                     c_name=arg_type.c_name())
+    else:
+        visit_members = ''
+        ret += mcgen('''
+    Visitor *v = NULL;
+
+    if (args) {
+''')
+        push_indent()
+
+    ret += mcgen('''
+    v = qobject_input_visitor_new(QOBJECT(args), true);
     visit_start_struct(v, NULL, NULL, 0, &err);
     if (err) {
         goto out;
     }
-    visit_type_%(c_name)s_members(v, &arg, &err);
+    %(visit_members)s
     if (!err) {
         visit_check_struct(v, &err);
     }
@@ -131,35 +144,47 @@ def gen_marshal(name, arg_type, boxed, ret_type):
         goto out;
     }
 ''',
-                     c_name=arg_type.c_name())
+                 visit_members=visit_members)
 
-    else:
+    if not have_args:
+        pop_indent()
         ret += mcgen('''
-
-    (void)args;
+    }
 ''')
 
     ret += gen_call(name, arg_type, boxed, ret_type)
 
-    # 'goto out' produced above for arg_type, and by gen_call() for ret_type
-    if (arg_type and not arg_type.is_empty()) or ret_type:
-        ret += mcgen('''
+    ret += mcgen('''
 
 out:
-''')
-    ret += mcgen('''
     error_propagate(errp, err);
+    visit_free(v);
 ''')
-    if arg_type and not arg_type.is_empty():
+
+    if have_args:
+        visit_members = ('visit_type_%s_members(v, &arg, NULL);'
+                         % arg_type.c_name())
+    else:
+        visit_members = ''
         ret += mcgen('''
-    visit_free(v);
+    if (args) {
+''')
+        push_indent()
+
+    ret += mcgen('''
     v = qapi_dealloc_visitor_new();
     visit_start_struct(v, NULL, NULL, 0, NULL);
-    visit_type_%(c_name)s_members(v, &arg, NULL);
+    %(visit_members)s
     visit_end_struct(v, NULL);
     visit_free(v);
 ''',
-                     c_name=arg_type.c_name())
+                 visit_members=visit_members)
+
+    if not have_args:
+        pop_indent()
+        ret += mcgen('''
+    }
+''')
 
     ret += mcgen('''
 }
@@ -209,8 +234,7 @@ def visit_begin(self, schema):
         self._visited_ret_types = set()
 
     def visit_end(self):
-        if not middle_mode:
-            self.defn += gen_registry(self._regy)
+        self.defn += gen_registry(self._regy)
         self._regy = None
         self._visited_ret_types = None
 
@@ -222,21 +246,12 @@ def visit_command(self, name, info, arg_type, ret_type,
         if ret_type and ret_type not in self._visited_ret_types:
             self._visited_ret_types.add(ret_type)
             self.defn += gen_marshal_output(ret_type)
-        if middle_mode:
-            self.decl += gen_marshal_decl(name)
+        self.decl += gen_marshal_decl(name)
         self.defn += gen_marshal(name, arg_type, boxed, ret_type)
-        if not middle_mode:
-            self._regy += gen_register_command(name, success_response)
-
-
-middle_mode = False
+        self._regy += gen_register_command(name, success_response)
 
-(input_file, output_dir, do_c, do_h, prefix, opts) = \
-    parse_command_line("m", ["middle"])
 
-for o, a in opts:
-    if o in ("-m", "--middle"):
-        middle_mode = True
+(input_file, output_dir, do_c, do_h, prefix, opts) = parse_command_line()
 
 c_comment = '''
 /*
@@ -278,8 +293,8 @@ def visit_command(self, name, info, arg_type, ret_type,
 #include "qapi/qmp/types.h"
 #include "qapi/qmp/dispatch.h"
 #include "qapi/visitor.h"
-#include "qapi/qmp-output-visitor.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "qapi/dealloc-visitor.h"
 #include "%(prefix)sqapi-types.h"
 #include "%(prefix)sqapi-visit.h"
diff --git a/scripts/qapi-event.py b/scripts/qapi-event.py
index 38d82111ad..f4eb7f85b1 100644
--- a/scripts/qapi-event.py
+++ b/scripts/qapi-event.py
@@ -98,7 +98,7 @@ def gen_event_send(name, arg_type, boxed):
 
     if arg_type and not arg_type.is_empty():
         ret += mcgen('''
-    v = qmp_output_visitor_new(&obj);
+    v = qobject_output_visitor_new(&obj);
 ''')
         if not arg_type.is_implicit():
             ret += mcgen('''
@@ -209,7 +209,7 @@ def visit_event(self, name, info, arg_type, boxed):
 #include "qemu-common.h"
 #include "%(prefix)sqapi-event.h"
 #include "%(prefix)sqapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/qmp-event.h"
 
 ''',
diff --git a/scripts/show-fixed-bugs.sh b/scripts/show-fixed-bugs.sh
new file mode 100755
index 0000000000..36f306898f
--- /dev/null
+++ b/scripts/show-fixed-bugs.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+
+# This script checks the git log for URLs to the QEMU launchpad bugtracker
+# and optionally checks whether the corresponding bugs are not closed yet.
+
+show_help () {
+    echo "Usage:"
+    echo "  -s <commit>  : Start searching at this commit"
+    echo "  -e <commit>  : End searching at this commit"
+    echo "  -c           : Check if bugs are still open"
+    echo "  -b           : Open bugs in browser"
+}
+
+while getopts "s:e:cbh" opt; do
+   case "$opt" in
+    s)  start="$OPTARG" ;;
+    e)  end="$OPTARG" ;;
+    c)  check_if_open=1 ;;
+    b)  show_in_browser=1 ;;
+    h)  show_help ; exit 0 ;;
+    *)   echo "Use -h for help." ; exit 1 ;;
+   esac
+done
+
+if [ "x$start" = "x" ]; then
+    start=`git tag -l 'v[0-9]*\.[0-9]*\.0' | tail -n 2 | head -n 1`
+fi
+if [ "x$end" = "x" ]; then
+    end=`git tag -l  'v[0-9]*\.[0-9]*\.0' | tail -n 1`
+fi
+
+if [ "x$start" = "x" ] || [ "x$end" = "x" ]; then
+    echo "Could not determine start or end revision ... Please note that this"
+    echo "script must be run from a checked out git repository of QEMU."
+    exit 1
+fi
+
+echo "Searching git log for bugs in the range $start..$end"
+
+urlstr='https://bugs.launchpad.net/\(bugs\|qemu/+bug\)/'
+bug_urls=`git log $start..$end \
+  | sed -n '\,'"$urlstr"', s,\(.*\)\('"$urlstr"'\)\([0-9]*\).*,\2\4,p' \
+  | sort -u`
+
+echo Found bug URLs:
+for i in $bug_urls ; do echo " $i" ; done
+
+if [ "x$check_if_open" = "x1" ]; then
+    echo
+    echo "Checking which ones are still open..."
+    for i in $bug_urls ; do
+        if ! curl -s -L "$i" | grep "value status" | grep -q "Fix Released" ; then
+            echo " $i"
+            final_bug_urls="$final_bug_urls $i"
+        fi
+    done
+else
+    final_bug_urls=$bug_urls
+fi
+
+if [ "x$final_bug_urls" = "x" ]; then
+    echo "No open bugs found."
+elif [ "x$show_in_browser" = "x1" ]; then
+    # Try to determine which browser we should use
+    if [ "x$BROWSER" != "x" ]; then
+        bugbrowser="$BROWSER"
+    elif command -v xdg-open >/dev/null 2>&1; then
+        bugbrowser=xdg-open
+    elif command -v gnome-open >/dev/null 2>&1; then
+        bugbrowser=gnome-open
+    elif [ "`uname`" = "Darwin" ]; then
+        bugbrowser=open
+    elif command -v sensible-browser >/dev/null 2>&1; then
+        bugbrowser=sensible-browser
+    else
+        echo "Please set the BROWSER variable to the browser of your choice."
+        exit 1
+    fi
+    # Now show the bugs in the browser
+    first=1
+    for i in $final_bug_urls; do
+        "$bugbrowser" "$i"
+        if [ $first = 1 ]; then
+            # if it is the first entry, give the browser some time to start
+            # (to avoid messages like "Firefox is already running, but is
+            # not responding...")
+            sleep 4
+            first=0
+        fi
+    done
+fi
diff --git a/scripts/simpletrace.py b/scripts/simpletrace.py
index 3916c6d14a..4ca903dc0c 100755
--- a/scripts/simpletrace.py
+++ b/scripts/simpletrace.py
@@ -12,13 +12,16 @@
 import struct
 import re
 import inspect
-from tracetool import _read_events, Event
+from tracetool import read_events, Event
 from tracetool.backend.simple import is_string
 
 header_event_id = 0xffffffffffffffff
 header_magic    = 0xf2b177cb0aa429b4
 dropped_event_id = 0xfffffffffffffffe
 
+record_type_mapping = 0
+record_type_event = 1
+
 log_header_fmt = '=QQQ'
 rec_header_fmt = '=QQII'
 
@@ -30,14 +33,16 @@ def read_header(fobj, hfmt):
         return None
     return struct.unpack(hfmt, hdr)
 
-def get_record(edict, rechdr, fobj):
-    """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
+def get_record(edict, idtoname, rechdr, fobj):
+    """Deserialize a trace record from a file into a tuple
+       (name, timestamp, pid, arg1, ..., arg6)."""
     if rechdr is None:
         return None
-    rec = (rechdr[0], rechdr[1], rechdr[3])
     if rechdr[0] != dropped_event_id:
         event_id = rechdr[0]
-        event = edict[event_id]
+        name = idtoname[event_id]
+        rec = (name, rechdr[1], rechdr[3])
+        event = edict[name]
         for type, name in event.args:
             if is_string(type):
                 l = fobj.read(4)
@@ -48,15 +53,22 @@ def get_record(edict, rechdr, fobj):
                 (value,) = struct.unpack('=Q', fobj.read(8))
                 rec = rec + (value,)
     else:
+        rec = ("dropped", rechdr[1], rechdr[3])
         (value,) = struct.unpack('=Q', fobj.read(8))
         rec = rec + (value,)
     return rec
 
+def get_mapping(fobj):
+    (event_id, ) = struct.unpack('=Q', fobj.read(8))
+    (len, ) = struct.unpack('=L', fobj.read(4))
+    name = fobj.read(len)
+
+    return (event_id, name)
 
-def read_record(edict, fobj):
+def read_record(edict, idtoname, fobj):
     """Deserialize a trace record from a file into a tuple (event_num, timestamp, pid, arg1, ..., arg6)."""
     rechdr = read_header(fobj, rec_header_fmt)
-    return get_record(edict, rechdr, fobj) # return tuple of record elements
+    return get_record(edict, idtoname, rechdr, fobj)
 
 def read_trace_header(fobj):
     """Read and verify trace file header"""
@@ -67,20 +79,30 @@ def read_trace_header(fobj):
         raise ValueError('Not a valid trace file!')
 
     log_version = header[2]
-    if log_version not in [0, 2, 3]:
+    if log_version not in [0, 2, 3, 4]:
         raise ValueError('Unknown version of tracelog format!')
-    if log_version != 3:
+    if log_version != 4:
         raise ValueError('Log format %d not supported with this QEMU release!'
                          % log_version)
 
 def read_trace_records(edict, fobj):
     """Deserialize trace records from a file, yielding record tuples (event_num, timestamp, pid, arg1, ..., arg6)."""
+    idtoname = {
+        dropped_event_id: "dropped"
+    }
     while True:
-        rec = read_record(edict, fobj)
-        if rec is None:
+        t = fobj.read(8)
+        if len(t) == 0:
             break
 
-        yield rec
+        (rectype, ) = struct.unpack('=Q', t)
+        if rectype == record_type_mapping:
+            event_id, name = get_mapping(fobj)
+            idtoname[event_id] = name
+        else:
+            rec = read_record(edict, idtoname, fobj)
+
+            yield rec
 
 class Analyzer(object):
     """A trace file analyzer which processes trace records.
@@ -107,7 +129,7 @@ def end(self):
 def process(events, log, analyzer, read_header=True):
     """Invoke an analyzer on each event in a log."""
     if isinstance(events, str):
-        events = _read_events(open(events, 'r'))
+        events = read_events(open(events, 'r'))
     if isinstance(log, str):
         log = open(log, 'rb')
 
@@ -115,10 +137,10 @@ def process(events, log, analyzer, read_header=True):
         read_trace_header(log)
 
     dropped_event = Event.build("Dropped_Event(uint64_t num_events_dropped)")
-    edict = {dropped_event_id: dropped_event}
+    edict = {"dropped": dropped_event}
 
-    for num, event in enumerate(events):
-        edict[num] = event
+    for event in events:
+        edict[event.name] = event
 
     def build_fn(analyzer, event):
         if isinstance(event, str):
@@ -166,7 +188,7 @@ def run(analyzer):
                          '<trace-file>\n' % sys.argv[0])
         sys.exit(1)
 
-    events = _read_events(open(sys.argv[1], 'r'))
+    events = read_events(open(sys.argv[1], 'r'))
     process(events, sys.argv[2], analyzer, read_header=read_header)
 
 if __name__ == '__main__':
diff --git a/scripts/tracetool.py b/scripts/tracetool.py
index 7b82959e84..c9e47371d3 100755
--- a/scripts/tracetool.py
+++ b/scripts/tracetool.py
@@ -15,6 +15,8 @@
 
 import sys
 import getopt
+import os.path
+import re
 
 from tracetool import error_write, out
 import tracetool.backend
@@ -60,6 +62,15 @@ def error_opt(msg = None):
     else:
         sys.exit(1)
 
+def make_group_name(filename):
+    dirname = os.path.realpath(os.path.dirname(filename))
+    basedir = os.path.join(os.path.dirname(__file__), os.pardir)
+    basedir = os.path.realpath(os.path.abspath(basedir))
+    dirname = dirname[len(basedir) + 1:]
+
+    if dirname == "":
+        return "common"
+    return "_" + re.sub(r"[^A-Za-z0-9]", "_", dirname)
 
 def main(args):
     global _SCRIPT
@@ -129,8 +140,15 @@ def main(args):
         if probe_prefix is None:
             probe_prefix = ".".join(["qemu", target_type, target_name])
 
+    if len(args) != 1:
+        error_opt("missing trace-events filepath")
+    with open(args[0], "r") as fh:
+        events = tracetool.read_events(fh)
+
+    group = make_group_name(args[0])
+
     try:
-        tracetool.generate(sys.stdin, arg_format, arg_backends,
+        tracetool.generate(events, group, arg_format, arg_backends,
                            binary=binary, probe_prefix=probe_prefix)
     except tracetool.TracetoolError as e:
         error_opt(str(e))
diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index be24039c5e..365446fa53 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -265,11 +265,13 @@ def formats(self):
 
     QEMU_TRACE               = "trace_%(name)s"
     QEMU_TRACE_TCG           = QEMU_TRACE + "_tcg"
+    QEMU_DSTATE              = "_TRACE_%(NAME)s_DSTATE"
+    QEMU_EVENT               = "_TRACE_%(NAME)s_EVENT"
 
     def api(self, fmt=None):
         if fmt is None:
             fmt = Event.QEMU_TRACE
-        return fmt % {"name": self.name}
+        return fmt % {"name": self.name, "NAME": self.name.upper()}
 
     def transform(self, *trans):
         """Return a new Event with transformed Arguments."""
@@ -280,7 +282,17 @@ def transform(self, *trans):
                      self)
 
 
-def _read_events(fobj):
+def read_events(fobj):
+    """Generate the output for the given (format, backends) pair.
+
+    Parameters
+    ----------
+    fobj : file
+        Event description file.
+
+    Returns a list of Event objects
+    """
+
     events = []
     for line in fobj:
         if not line.strip():
@@ -352,14 +364,16 @@ def try_import(mod_name, attr_name=None, attr_default=None):
         return False, None
 
 
-def generate(fevents, format, backends,
+def generate(events, group, format, backends,
              binary=None, probe_prefix=None):
     """Generate the output for the given (format, backends) pair.
 
     Parameters
     ----------
-    fevents : file
-        Event description file.
+    events : list
+        list of Event objects to generate for
+    group: str
+        Name of the tracing group
     format : str
         Output format name.
     backends : list
@@ -389,6 +403,4 @@ def generate(fevents, format, backends,
     tracetool.backend.dtrace.BINARY = binary
     tracetool.backend.dtrace.PROBEPREFIX = probe_prefix
 
-    events = _read_events(fevents)
-
-    tracetool.format.generate(events, format, backend)
+    tracetool.format.generate(events, format, backend, group)
diff --git a/scripts/tracetool/backend/__init__.py b/scripts/tracetool/backend/__init__.py
index d4b6dab9ca..f735a259c0 100644
--- a/scripts/tracetool/backend/__init__.py
+++ b/scripts/tracetool/backend/__init__.py
@@ -113,11 +113,11 @@ def _run_function(self, name, *args, **kwargs):
             if func is not None:
                 func(*args, **kwargs)
 
-    def generate_begin(self, events):
-        self._run_function("generate_%s_begin", events)
+    def generate_begin(self, events, group):
+        self._run_function("generate_%s_begin", events, group)
 
-    def generate(self, event):
-        self._run_function("generate_%s", event)
+    def generate(self, event, group):
+        self._run_function("generate_%s", event, group)
 
-    def generate_end(self, events):
-        self._run_function("generate_%s_end", events)
+    def generate_end(self, events, group):
+        self._run_function("generate_%s_end", events, group)
diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py
index ab9ecfab30..79505c6b1a 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -35,12 +35,12 @@ def binary():
     return BINARY
 
 
-def generate_h_begin(events):
+def generate_h_begin(events, group):
     out('#include "trace/generated-tracers-dtrace.h"',
         '')
 
 
-def generate_h(event):
+def generate_h(event, group):
     out('        QEMU_%(uppername)s(%(argnames)s);',
         uppername=event.name.upper(),
         argnames=", ".join(event.args.names()))
diff --git a/scripts/tracetool/backend/ftrace.py b/scripts/tracetool/backend/ftrace.py
index 80dcf30478..db9fe7ad57 100644
--- a/scripts/tracetool/backend/ftrace.py
+++ b/scripts/tracetool/backend/ftrace.py
@@ -19,13 +19,12 @@
 PUBLIC = True
 
 
-def generate_h_begin(events):
+def generate_h_begin(events, group):
     out('#include "trace/ftrace.h"',
-        '#include "trace/control.h"',
         '')
 
 
-def generate_h(event):
+def generate_h(event, group):
     argnames = ", ".join(event.args.names())
     if len(event.args) > 0:
         argnames = ", " + argnames
diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py
index b3ff064011..4f4a4d38b1 100644
--- a/scripts/tracetool/backend/log.py
+++ b/scripts/tracetool/backend/log.py
@@ -19,13 +19,12 @@
 PUBLIC = True
 
 
-def generate_h_begin(events):
-    out('#include "trace/control.h"',
-        '#include "qemu/log.h"',
+def generate_h_begin(events, group):
+    out('#include "qemu/log.h"',
         '')
 
 
-def generate_h(event):
+def generate_h(event, group):
     argnames = ", ".join(event.args.names())
     if len(event.args) > 0:
         argnames = ", " + argnames
diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py
index 1bccada63d..85f61028e2 100644
--- a/scripts/tracetool/backend/simple.py
+++ b/scripts/tracetool/backend/simple.py
@@ -21,13 +21,14 @@
 
 def is_string(arg):
     strtype = ('const char*', 'char*', 'const char *', 'char *')
-    if arg.lstrip().startswith(strtype):
+    arg_strip = arg.lstrip()
+    if arg_strip.startswith(strtype) and arg_strip.count('*') == 1:
         return True
     else:
         return False
 
 
-def generate_h_begin(events):
+def generate_h_begin(events, group):
     for event in events:
         out('void _simple_%(api)s(%(args)s);',
             api=event.api(),
@@ -35,13 +36,13 @@ def generate_h_begin(events):
     out('')
 
 
-def generate_h(event):
+def generate_h(event, group):
     out('        _simple_%(api)s(%(args)s);',
         api=event.api(),
         args=", ".join(event.args.names()))
 
 
-def generate_c_begin(events):
+def generate_c_begin(events, group):
     out('#include "qemu/osdep.h"',
         '#include "trace.h"',
         '#include "trace/control.h"',
@@ -49,7 +50,7 @@ def generate_c_begin(events):
         '')
 
 
-def generate_c(event):
+def generate_c(event, group):
     out('void _simple_%(api)s(%(args)s)',
         '{',
         '    TraceBufferRecord rec;',
@@ -80,11 +81,11 @@ def generate_c(event):
         '        return;',
         '    }',
         '',
-        '    if (trace_record_start(&rec, %(event_id)s, %(size_str)s)) {',
+        '    if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {',
         '        return; /* Trace Buffer Full, Event Dropped ! */',
         '    }',
         cond=cond,
-        event_id=event_id,
+        event_obj=event.api(event.QEMU_EVENT),
         size_str=sizestr)
 
     if len(event.args) > 0:
diff --git a/scripts/tracetool/backend/syslog.py b/scripts/tracetool/backend/syslog.py
new file mode 100644
index 0000000000..b8ff2790c4
--- /dev/null
+++ b/scripts/tracetool/backend/syslog.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Syslog built-in backend.
+"""
+
+__author__     = "Paul Durrant <paul.durrant@citrix.com>"
+__copyright__  = "Copyright 2016, Citrix Systems Inc."
+__license__    = "GPL version 2 or (at your option) any later version"
+
+__maintainer__ = "Stefan Hajnoczi"
+__email__      = "stefanha@redhat.com"
+
+
+from tracetool import out
+
+
+PUBLIC = True
+
+
+def generate_h_begin(events, group):
+    out('#include <syslog.h>',
+        '')
+
+
+def generate_h(event, group):
+    argnames = ", ".join(event.args.names())
+    if len(event.args) > 0:
+        argnames = ", " + argnames
+
+    if "vcpu" in event.properties:
+        # already checked on the generic format code
+        cond = "true"
+    else:
+        cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
+
+    out('        if (%(cond)s) {',
+        '            syslog(LOG_INFO, "%(name)s " %(fmt)s %(argnames)s);',
+        '        }',
+        cond=cond,
+        name=event.name,
+        fmt=event.fmt.rstrip("\n"),
+        argnames=argnames)
diff --git a/scripts/tracetool/backend/ust.py b/scripts/tracetool/backend/ust.py
index ed4c227f69..4594db6128 100644
--- a/scripts/tracetool/backend/ust.py
+++ b/scripts/tracetool/backend/ust.py
@@ -19,13 +19,13 @@
 PUBLIC = True
 
 
-def generate_h_begin(events):
+def generate_h_begin(events, group):
     out('#include <lttng/tracepoint.h>',
         '#include "trace/generated-ust-provider.h"',
         '')
 
 
-def generate_h(event):
+def generate_h(event, group):
     argnames = ", ".join(event.args.names())
     if len(event.args) > 0:
         argnames = ", " + argnames
diff --git a/scripts/tracetool/format/__init__.py b/scripts/tracetool/format/__init__.py
index 812570ff6f..cf6e0e2da5 100644
--- a/scripts/tracetool/format/__init__.py
+++ b/scripts/tracetool/format/__init__.py
@@ -74,7 +74,7 @@ def exists(name):
     return tracetool.try_import("tracetool.format." + name)[1]
 
 
-def generate(events, format, backend):
+def generate(events, format, backend, group):
     if not exists(format):
         raise ValueError("unknown format: %s" % format)
     format = format.replace("-", "_")
@@ -82,4 +82,4 @@ def generate(events, format, backend):
                                 "generate")[1]
     if func is None:
         raise AttributeError("format has no 'generate': %s" % format)
-    func(events, backend)
+    func(events, backend, group)
diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py
index 699598fb02..47115ed8af 100644
--- a/scripts/tracetool/format/c.py
+++ b/scripts/tracetool/format/c.py
@@ -16,13 +16,55 @@
 from tracetool import out
 
 
-def generate(events, backend):
-    events = [e for e in events
-              if "disable" not in e.properties]
+def generate(events, backend, group):
+    active_events = [e for e in events
+                     if "disable" not in e.properties]
 
     out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        '#include "qemu/osdep.h"',
+        '#include "trace.h"',
         '')
-    backend.generate_begin(events)
-    for event in events:
-        backend.generate(event)
-    backend.generate_end(events)
+
+    for e in events:
+        out('uint16_t %s;' % e.api(e.QEMU_DSTATE))
+
+    for e in events:
+        if "vcpu" in e.properties:
+            vcpu_id = 0
+        else:
+            vcpu_id = "TRACE_VCPU_EVENT_NONE"
+        out('TraceEvent %(event)s = {',
+            '    .id = 0,',
+            '    .vcpu_id = %(vcpu_id)s,',
+            '    .name = \"%(name)s\",',
+            '    .sstate = %(sstate)s,',
+            '    .dstate = &%(dstate)s ',
+            '};',
+            event = e.api(e.QEMU_EVENT),
+            vcpu_id = vcpu_id,
+            name = e.name,
+            sstate = "TRACE_%s_ENABLED" % e.name.upper(),
+            dstate = e.api(e.QEMU_DSTATE))
+
+    out('TraceEvent *%(group)s_trace_events[] = {',
+        group = group.lower())
+
+    for e in events:
+        out('    &%(event)s,', event = e.api(e.QEMU_EVENT))
+
+    out('  NULL,',
+        '};',
+        '')
+
+    out('static void trace_%(group)s_register_events(void)',
+        '{',
+        '    trace_event_register_group(%(group)s_trace_events);',
+        '}',
+        'trace_init(trace_%(group)s_register_events)',
+        group = group.lower())
+
+    backend.generate_begin(active_events, group)
+    for event in active_events:
+        backend.generate(event, group)
+    backend.generate_end(active_events, group)
diff --git a/scripts/tracetool/format/d.py b/scripts/tracetool/format/d.py
index c77d5b7ab0..78397c24d2 100644
--- a/scripts/tracetool/format/d.py
+++ b/scripts/tracetool/format/d.py
@@ -29,7 +29,7 @@
 )
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disable" not in e.properties]
 
diff --git a/scripts/tracetool/format/events_c.py b/scripts/tracetool/format/events_c.py
deleted file mode 100644
index 4012063283..0000000000
--- a/scripts/tracetool/format/events_c.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-trace/generated-events.c
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@linux.vnet.ibm.com"
-
-
-from tracetool import out
-
-
-def generate(events, backend):
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '',
-        '#include "qemu/osdep.h"',
-        '#include "trace.h"',
-        '#include "trace/generated-events.h"',
-        '#include "trace/control.h"',
-        '')
-
-    out('TraceEvent trace_events[TRACE_EVENT_COUNT] = {')
-
-    for e in events:
-        if "vcpu" in e.properties:
-            vcpu_id = "TRACE_VCPU_" + e.name.upper()
-        else:
-            vcpu_id = "TRACE_VCPU_EVENT_COUNT"
-        out('    { .id = %(id)s, .vcpu_id = %(vcpu_id)s,'
-            ' .name = \"%(name)s\",'
-            ' .sstate = %(sstate)s },',
-            id = "TRACE_" + e.name.upper(),
-            vcpu_id = vcpu_id,
-            name = e.name,
-            sstate = "TRACE_%s_ENABLED" % e.name.upper())
-
-    out('};',
-        '')
diff --git a/scripts/tracetool/format/events_h.py b/scripts/tracetool/format/events_h.py
deleted file mode 100644
index a9da60b7fa..0000000000
--- a/scripts/tracetool/format/events_h.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-"""
-trace/generated-events.h
-"""
-
-__author__     = "Lluís Vilanova <vilanova@ac.upc.edu>"
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova <vilanova@ac.upc.edu>"
-__license__    = "GPL version 2 or (at your option) any later version"
-
-__maintainer__ = "Stefan Hajnoczi"
-__email__      = "stefanha@linux.vnet.ibm.com"
-
-
-from tracetool import out
-
-
-def generate(events, backend):
-    out('/* This file is autogenerated by tracetool, do not edit. */',
-        '',
-        '#ifndef TRACE__GENERATED_EVENTS_H',
-        '#define TRACE__GENERATED_EVENTS_H',
-        '')
-
-    # event identifiers
-    out('typedef enum {')
-
-    for e in events:
-        out('    TRACE_%s,' % e.name.upper())
-
-    out('    TRACE_EVENT_COUNT',
-        '} TraceEventID;')
-
-    # per-vCPU event identifiers
-    out('typedef enum {')
-
-    for e in events:
-        if "vcpu" in e.properties:
-            out('    TRACE_VCPU_%s,' % e.name.upper())
-
-    out('    TRACE_VCPU_EVENT_COUNT',
-        '} TraceEventVCPUID;')
-
-    # static state
-    for e in events:
-        if 'disable' in e.properties:
-            enabled = 0
-        else:
-            enabled = 1
-        if "tcg-exec" in e.properties:
-            # a single define for the two "sub-events"
-            out('#define TRACE_%(name)s_ENABLED %(enabled)d',
-                name=e.original.name.upper(),
-                enabled=enabled)
-        out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled))
-
-    out('#include "trace/event-internal.h"',
-        '',
-        '#endif  /* TRACE__GENERATED_EVENTS_H */')
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index 3763e9aecb..3682f4e6a8 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -16,24 +16,43 @@
 from tracetool import out
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     out('/* This file is autogenerated by tracetool, do not edit. */',
         '',
-        '#ifndef TRACE__GENERATED_TRACERS_H',
-        '#define TRACE__GENERATED_TRACERS_H',
+        '#ifndef TRACE_%s_GENERATED_TRACERS_H' % group.upper(),
+        '#define TRACE_%s_GENERATED_TRACERS_H' % group.upper(),
         '',
         '#include "qemu-common.h"',
         '#include "trace/control.h"',
         '')
 
-    backend.generate_begin(events)
+    for e in events:
+        out('extern TraceEvent %(event)s;',
+            event = e.api(e.QEMU_EVENT))
+
+    for e in events:
+        out('extern uint16_t %s;' % e.api(e.QEMU_DSTATE))
+
+    # static state
+    for e in events:
+        if 'disable' in e.properties:
+            enabled = 0
+        else:
+            enabled = 1
+        if "tcg-exec" in e.properties:
+            # a single define for the two "sub-events"
+            out('#define TRACE_%(name)s_ENABLED %(enabled)d',
+                name=e.original.name.upper(),
+                enabled=enabled)
+        out('#define TRACE_%s_ENABLED %d' % (e.name.upper(), enabled))
+
+    backend.generate_begin(events, group)
 
     for e in events:
         if "vcpu" in e.properties:
             trace_cpu = next(iter(e.args))[1]
             cond = "trace_event_get_vcpu_state(%(cpu)s,"\
-                   " TRACE_%(id)s,"\
-                   " TRACE_VCPU_%(id)s)"\
+                   " TRACE_%(id)s)"\
                    % dict(
                        cpu=trace_cpu,
                        id=e.name.upper())
@@ -49,11 +68,11 @@ def generate(events, backend):
             cond=cond)
 
         if "disable" not in e.properties:
-            backend.generate(e)
+            backend.generate(e, group)
 
         out('    }',
             '}')
 
-    backend.generate_end(events)
+    backend.generate_end(events, group)
 
-    out('#endif /* TRACE__GENERATED_TRACERS_H */')
+    out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/simpletrace_stap.py b/scripts/tracetool/format/simpletrace_stap.py
index 7e44bc1811..c35e662e00 100644
--- a/scripts/tracetool/format/simpletrace_stap.py
+++ b/scripts/tracetool/format/simpletrace_stap.py
@@ -19,8 +19,27 @@
 from tracetool.format.stap import stap_escape
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     out('/* This file is autogenerated by tracetool, do not edit. */',
+        '',
+        'global event_name_to_id_map',
+        'global event_next_id',
+        'function simple_trace_map_event(name)',
+        '',
+        '{',
+        '    if (!([name] in event_name_to_id_map)) {',
+        '        event_name_to_id_map[name] = event_next_id',
+        '        name_len = strlen(name)',
+        '        printf("%%8b%%8b%%4b%%.*s", 0, ',
+        '               event_next_id, name_len, name_len, name)',
+        '        event_next_id = event_next_id + 1',
+        '    }',
+        '    return event_name_to_id_map[name]',
+        '}',
+        'probe begin',
+        '{',
+        '    printf("%%8b%%8b%%8b", 0xffffffffffffffff, 0xf2b177cb0aa429b4, 4)',
+        '}',
         '')
 
     for event_id, e in enumerate(events):
@@ -29,6 +48,7 @@ def generate(events, backend):
 
         out('probe %(probeprefix)s.simpletrace.%(name)s = %(probeprefix)s.%(name)s ?',
             '{',
+            '    id = simple_trace_map_event("%(name)s")',
             probeprefix=probeprefix(),
             name=e.name)
 
@@ -48,7 +68,7 @@ def generate(events, backend):
         sizestr = ' + '.join(sizes)
 
         # Generate format string and value pairs for record header and arguments
-        fields = [('8b', str(event_id)),
+        fields = [('8b', 'id'),
                   ('8b', 'gettimeofday_ns()'),
                   ('4b', sizestr),
                   ('4b', 'pid()')]
@@ -63,7 +83,7 @@ def generate(events, backend):
         # Emit the entire record in a single SystemTap printf()
         fmt_str = '%'.join(fmt for fmt, _ in fields)
         arg_str = ', '.join(arg for _, arg in fields)
-        out('    printf("%%%(fmt_str)s", %(arg_str)s)',
+        out('    printf("%%8b%%%(fmt_str)s", 1, %(arg_str)s)',
             fmt_str=fmt_str, arg_str=arg_str)
 
         out('}')
diff --git a/scripts/tracetool/format/stap.py b/scripts/tracetool/format/stap.py
index 9e780f1b06..e8ef3e762d 100644
--- a/scripts/tracetool/format/stap.py
+++ b/scripts/tracetool/format/stap.py
@@ -34,7 +34,7 @@ def stap_escape(identifier):
     return identifier
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disable" not in e.properties]
 
diff --git a/scripts/tracetool/format/tcg_h.py b/scripts/tracetool/format/tcg_h.py
index e2331f251d..5f213f6cba 100644
--- a/scripts/tracetool/format/tcg_h.py
+++ b/scripts/tracetool/format/tcg_h.py
@@ -27,12 +27,12 @@ def vcpu_transform_args(args):
     ])
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     out('/* This file is autogenerated by tracetool, do not edit. */',
         '/* You must include this file after the inclusion of helper.h */',
         '',
-        '#ifndef TRACE__GENERATED_TCG_TRACERS_H',
-        '#define TRACE__GENERATED_TCG_TRACERS_H',
+        '#ifndef TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
+        '#define TRACE_%s_GENERATED_TCG_TRACERS_H' % group.upper(),
         '',
         '#include "trace.h"',
         '#include "exec/helper-proto.h"',
@@ -63,4 +63,4 @@ def generate(events, backend):
         out('}')
 
     out('',
-        '#endif /* TRACE__GENERATED_TCG_TRACERS_H */')
+        '#endif /* TRACE_%s_GENERATED_TCG_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/tcg_helper_c.py b/scripts/tracetool/format/tcg_helper_c.py
index e3485b7f92..cc26e03008 100644
--- a/scripts/tracetool/format/tcg_helper_c.py
+++ b/scripts/tracetool/format/tcg_helper_c.py
@@ -40,7 +40,7 @@ def vcpu_transform_args(args, mode):
             assert False
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disable" not in e.properties]
 
diff --git a/scripts/tracetool/format/tcg_helper_h.py b/scripts/tracetool/format/tcg_helper_h.py
index dc76c15ebc..6b184b641b 100644
--- a/scripts/tracetool/format/tcg_helper_h.py
+++ b/scripts/tracetool/format/tcg_helper_h.py
@@ -18,7 +18,7 @@
 import tracetool.vcpu
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disable" not in e.properties]
 
diff --git a/scripts/tracetool/format/tcg_helper_wrapper_h.py b/scripts/tracetool/format/tcg_helper_wrapper_h.py
index 020f4422a9..ff53447512 100644
--- a/scripts/tracetool/format/tcg_helper_wrapper_h.py
+++ b/scripts/tracetool/format/tcg_helper_wrapper_h.py
@@ -18,7 +18,7 @@
 import tracetool.vcpu
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disable" not in e.properties]
 
diff --git a/scripts/tracetool/format/ust_events_c.py b/scripts/tracetool/format/ust_events_c.py
index 9967c7a82e..cd87d8ab8f 100644
--- a/scripts/tracetool/format/ust_events_c.py
+++ b/scripts/tracetool/format/ust_events_c.py
@@ -16,7 +16,7 @@
 from tracetool import out
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disabled" not in e.properties]
 
diff --git a/scripts/tracetool/format/ust_events_h.py b/scripts/tracetool/format/ust_events_h.py
index 3e8a7cdf19..d853155d21 100644
--- a/scripts/tracetool/format/ust_events_h.py
+++ b/scripts/tracetool/format/ust_events_h.py
@@ -16,7 +16,7 @@
 from tracetool import out
 
 
-def generate(events, backend):
+def generate(events, backend, group):
     events = [e for e in events
               if "disabled" not in e.properties]
 
@@ -28,8 +28,9 @@ def generate(events, backend):
         '#undef TRACEPOINT_INCLUDE_FILE',
         '#define TRACEPOINT_INCLUDE_FILE ./generated-ust-provider.h',
         '',
-        '#if !defined (TRACE__GENERATED_UST_H) || defined(TRACEPOINT_HEADER_MULTI_READ)',
-        '#define TRACE__GENERATED_UST_H',
+        '#if !defined (TRACE_%s_GENERATED_UST_H) || \\'  % group.upper(),
+        '     defined(TRACEPOINT_HEADER_MULTI_READ)',
+        '#define TRACE_%s_GENERATED_UST_H' % group.upper(),
         '',
         '#include "qemu-common.h"',
         '#include <lttng/tracepoint.h>',
@@ -94,7 +95,7 @@ def generate(events, backend):
                 '',
                 name=e.name)
 
-    out('#endif /* TRACE__GENERATED_UST_H */',
+    out('#endif /* TRACE_%s_GENERATED_UST_H */' % group.upper(),
         '',
         '/* This part must be outside ifdef protection */',
         '#include <lttng/tracepoint-event.h>')
diff --git a/slirp/slirp.c b/slirp/slirp.c
index d67eda12f4..6e2b4e5a90 100644
--- a/slirp/slirp.c
+++ b/slirp/slirp.c
@@ -1072,7 +1072,9 @@ int slirp_add_exec(Slirp *slirp, int do_pty, const void *args,
 ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags)
 {
     if (so->s == -1 && so->extra) {
-        qemu_chr_fe_write(so->extra, buf, len);
+        /* XXX this blocks entire thread. Rewrite to use
+         * qemu_chr_fe_write and background I/O callbacks */
+        qemu_chr_fe_write_all(so->extra, buf, len);
         return len;
     }
 
diff --git a/slirp/socket.c b/slirp/socket.c
index 280050a21f..6c18971368 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -66,6 +66,23 @@ void
 sofree(struct socket *so)
 {
   Slirp *slirp = so->slirp;
+  struct mbuf *ifm;
+
+  for (ifm = (struct mbuf *) slirp->if_fastq.qh_link;
+       (struct quehead *) ifm != &slirp->if_fastq;
+       ifm = ifm->ifq_next) {
+    if (ifm->ifq_so == so) {
+      ifm->ifq_so = NULL;
+    }
+  }
+
+  for (ifm = (struct mbuf *) slirp->if_batchq.qh_link;
+       (struct quehead *) ifm != &slirp->if_batchq;
+       ifm = ifm->ifq_next) {
+    if (ifm->ifq_so == so) {
+      ifm->ifq_so = NULL;
+    }
+  }
 
   if (so->so_emu==EMU_RSH && so->extra) {
 	sofree(so->extra);
diff --git a/softmmu_template.h b/softmmu_template.h
index 284ab2c7b2..4a2b6653f6 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -21,12 +21,6 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
-#include "qemu/timer.h"
-#include "exec/address-spaces.h"
-#include "exec/memory.h"
-
-#define DATA_SIZE (1 << SHIFT)
-
 #if DATA_SIZE == 8
 #define SUFFIX q
 #define LSUFFIX q
@@ -84,14 +78,6 @@
 # define BSWAP(X)  (X)
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-# define TGT_BE(X)  (X)
-# define TGT_LE(X)  BSWAP(X)
-#else
-# define TGT_BE(X)  BSWAP(X)
-# define TGT_LE(X)  (X)
-#endif
-
 #if DATA_SIZE == 1
 # define helper_le_ld_name  glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
 # define helper_be_ld_name  helper_le_ld_name
@@ -108,35 +94,14 @@
 # define helper_be_st_name  glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-# define helper_te_ld_name  helper_be_ld_name
-# define helper_te_st_name  helper_be_st_name
-#else
-# define helper_te_ld_name  helper_le_ld_name
-# define helper_te_st_name  helper_le_st_name
-#endif
-
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
-                                              CPUIOTLBEntry *iotlbentry,
+                                              size_t mmu_idx, size_t index,
                                               target_ulong addr,
                                               uintptr_t retaddr)
 {
-    uint64_t val;
-    CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
-
-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-    cpu->mem_io_pc = retaddr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
-        cpu_io_recompile(cpu, retaddr);
-    }
-
-    cpu->mem_io_vaddr = addr;
-    memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT,
-                                iotlbentry->attrs);
-    return val;
+    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+    return io_readx(env, iotlbentry, addr, retaddr, DATA_SIZE);
 }
 #endif
 
@@ -146,14 +111,11 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
     unsigned mmu_idx = get_mmuidx(oi);
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    int a_bits = get_alignment_bits(get_memop(oi));
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
     DATA_TYPE res;
 
-    /* Adjust the given return address.  */
-    retaddr -= GETPC_ADJ;
-
-    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
+    if (addr & ((1 << a_bits) - 1)) {
         cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                              mmu_idx, retaddr);
     }
@@ -170,15 +132,13 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
-        res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
+        res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
         res = TGT_LE(res);
         return res;
     }
@@ -193,10 +153,8 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
     do_unaligned_access:
         addr1 = addr & ~(DATA_SIZE - 1);
         addr2 = addr1 + DATA_SIZE;
-        /* Note the adjustment at the beginning of the function.
-           Undo that for the recursion.  */
-        res1 = helper_le_ld_name(env, addr1, oi, retaddr + GETPC_ADJ);
-        res2 = helper_le_ld_name(env, addr2, oi, retaddr + GETPC_ADJ);
+        res1 = helper_le_ld_name(env, addr1, oi, retaddr);
+        res2 = helper_le_ld_name(env, addr2, oi, retaddr);
         shift = (addr & (DATA_SIZE - 1)) * 8;
 
         /* Little-endian combine.  */
@@ -220,14 +178,11 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     unsigned mmu_idx = get_mmuidx(oi);
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
-    int a_bits = get_alignment_bits(get_memop(oi));
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
     DATA_TYPE res;
 
-    /* Adjust the given return address.  */
-    retaddr -= GETPC_ADJ;
-
-    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
+    if (addr & ((1 << a_bits) - 1)) {
         cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
                              mmu_idx, retaddr);
     }
@@ -244,15 +199,13 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
-        res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
+        res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr);
         res = TGT_BE(res);
         return res;
     }
@@ -267,10 +220,8 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
     do_unaligned_access:
         addr1 = addr & ~(DATA_SIZE - 1);
         addr2 = addr1 + DATA_SIZE;
-        /* Note the adjustment at the beginning of the function.
-           Undo that for the recursion.  */
-        res1 = helper_be_ld_name(env, addr1, oi, retaddr + GETPC_ADJ);
-        res2 = helper_be_ld_name(env, addr2, oi, retaddr + GETPC_ADJ);
+        res1 = helper_be_ld_name(env, addr1, oi, retaddr);
+        res2 = helper_be_ld_name(env, addr2, oi, retaddr);
         shift = (addr & (DATA_SIZE - 1)) * 8;
 
         /* Big-endian combine.  */
@@ -305,24 +256,13 @@ WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
 #endif
 
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
-                                          CPUIOTLBEntry *iotlbentry,
+                                          size_t mmu_idx, size_t index,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           uintptr_t retaddr)
 {
-    CPUState *cpu = ENV_GET_CPU(env);
-    hwaddr physaddr = iotlbentry->addr;
-    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
-
-    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
-    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
-        cpu_io_recompile(cpu, retaddr);
-    }
-
-    cpu->mem_io_vaddr = addr;
-    cpu->mem_io_pc = retaddr;
-    memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
-                                 iotlbentry->attrs);
+    CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+    return io_writex(env, iotlbentry, val, addr, retaddr, DATA_SIZE);
 }
 
 void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
@@ -331,13 +271,10 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     unsigned mmu_idx = get_mmuidx(oi);
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    int a_bits = get_alignment_bits(get_memop(oi));
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
 
-    /* Adjust the given return address.  */
-    retaddr -= GETPC_ADJ;
-
-    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
+    if (addr & ((1 << a_bits) - 1)) {
         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
                              mmu_idx, retaddr);
     }
@@ -353,16 +290,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
         val = TGT_LE(val);
-        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+        glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
         return;
     }
 
@@ -391,10 +326,8 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         for (i = 0; i < DATA_SIZE; ++i) {
             /* Little-endian extract.  */
             uint8_t val8 = val >> (i * 8);
-            /* Note the adjustment at the beginning of the function.
-               Undo that for the recursion.  */
             glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
-                                            oi, retaddr + GETPC_ADJ);
+                                            oi, retaddr);
         }
         return;
     }
@@ -414,13 +347,10 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     unsigned mmu_idx = get_mmuidx(oi);
     int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
     target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-    int a_bits = get_alignment_bits(get_memop(oi));
+    unsigned a_bits = get_alignment_bits(get_memop(oi));
     uintptr_t haddr;
 
-    /* Adjust the given return address.  */
-    retaddr -= GETPC_ADJ;
-
-    if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
+    if (addr & ((1 << a_bits) - 1)) {
         cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
                              mmu_idx, retaddr);
     }
@@ -436,16 +366,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
         val = TGT_BE(val);
-        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+        glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr);
         return;
     }
 
@@ -474,10 +402,8 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
         for (i = 0; i < DATA_SIZE; ++i) {
             /* Big-endian extract.  */
             uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
-            /* Note the adjustment at the beginning of the function.
-               Undo that for the recursion.  */
             glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
-                                            oi, retaddr + GETPC_ADJ);
+                                            oi, retaddr);
         }
         return;
     }
@@ -486,33 +412,9 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
     glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
 }
 #endif /* DATA_SIZE > 1 */
-
-#if DATA_SIZE == 1
-/* Probe for whether the specified guest write access is permitted.
- * If it is not permitted then an exception will be taken in the same
- * way as if this were a real write access (and we will not return).
- * Otherwise the function will return, and there will be a valid
- * entry in the TLB for this access.
- */
-void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
-                 uintptr_t retaddr)
-{
-    int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
-
-    if ((addr & TARGET_PAGE_MASK)
-        != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
-        /* TLB entry is for a different page */
-        if (!VICTIM_TLB_HIT(addr_write, addr)) {
-            tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
-        }
-    }
-}
-#endif
 #endif /* !defined(SOFTMMU_CODE_ACCESS) */
 
 #undef READ_ACCESS_TYPE
-#undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
 #undef LSUFFIX
@@ -523,15 +425,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
 #undef USUFFIX
 #undef SSUFFIX
 #undef BSWAP
-#undef TGT_BE
-#undef TGT_LE
-#undef CPU_BE
-#undef CPU_LE
 #undef helper_le_ld_name
 #undef helper_be_ld_name
 #undef helper_le_lds_name
 #undef helper_be_lds_name
 #undef helper_le_st_name
 #undef helper_be_st_name
-#undef helper_te_ld_name
-#undef helper_te_st_name
diff --git a/spice-qemu-char.c b/spice-qemu-char.c
index 351fcaa033..276c4aef68 100644
--- a/spice-qemu-char.c
+++ b/spice-qemu-char.c
@@ -199,7 +199,7 @@ static int spice_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
     return read_bytes;
 }
 
-static void spice_chr_close(struct CharDriverState *chr)
+static void spice_chr_free(struct CharDriverState *chr)
 {
     SpiceCharDriver *s = chr->opaque;
 
@@ -236,15 +236,6 @@ static void spice_port_set_fe_open(struct CharDriverState *chr, int fe_open)
 #endif
 }
 
-static void spice_chr_fe_event(struct CharDriverState *chr, int event)
-{
-#if SPICE_SERVER_VERSION >= 0x000c02
-    SpiceCharDriver *s = chr->opaque;
-
-    spice_server_port_event(&s->sin, event);
-#endif
-}
-
 static void print_allowed_subtypes(void)
 {
     const char** psubtype;
@@ -289,10 +280,8 @@ static CharDriverState *chr_open(const char *subtype,
     chr->opaque = s;
     chr->chr_write = spice_chr_write;
     chr->chr_add_watch = spice_chr_add_watch;
-    chr->chr_close = spice_chr_close;
+    chr->chr_free = spice_chr_free;
     chr->chr_set_fe_open = set_fe_open;
-    chr->explicit_be_open = true;
-    chr->chr_fe_event = spice_chr_fe_event;
     chr->chr_accept_input = spice_chr_accept_input;
 
     QLIST_INSERT_HEAD(&spice_chars, s, next);
@@ -303,6 +292,7 @@ static CharDriverState *chr_open(const char *subtype,
 static CharDriverState *qemu_chr_open_spice_vmc(const char *id,
                                                 ChardevBackend *backend,
                                                 ChardevReturn *ret,
+                                                bool *be_opened,
                                                 Error **errp)
 {
     ChardevSpiceChannel *spicevmc = backend->u.spicevmc.data;
@@ -321,6 +311,7 @@ static CharDriverState *qemu_chr_open_spice_vmc(const char *id,
         return NULL;
     }
 
+    *be_opened = false;
     return chr_open(type, spice_vmc_set_fe_open, common, errp);
 }
 
@@ -328,6 +319,7 @@ static CharDriverState *qemu_chr_open_spice_vmc(const char *id,
 static CharDriverState *qemu_chr_open_spice_port(const char *id,
                                                  ChardevBackend *backend,
                                                  ChardevReturn *ret,
+                                                 bool *be_opened,
                                                  Error **errp)
 {
     ChardevSpicePort *spiceport = backend->u.spiceport.data;
@@ -345,6 +337,7 @@ static CharDriverState *qemu_chr_open_spice_port(const char *id,
     if (!chr) {
         return NULL;
     }
+    *be_opened = false;
     s = chr->opaque;
     s->sin.portname = g_strdup(name);
 
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 55edd15d71..2b5bb74fce 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -1,4 +1,7 @@
 stub-obj-y += arch-query-cpu-def.o
+stub-obj-y += arch-query-cpu-model-expansion.o
+stub-obj-y += arch-query-cpu-model-comparison.o
+stub-obj-y += arch-query-cpu-model-baseline.o
 stub-obj-y += bdrv-next-monitor-owned.o
 stub-obj-y += blk-commit-all.o
 stub-obj-y += blockdev-close-all-bdrv-states.o
@@ -6,6 +9,7 @@ stub-obj-y += clock-warp.o
 stub-obj-y += cpu-get-clock.o
 stub-obj-y += cpu-get-icount.o
 stub-obj-y += dump.o
+stub-obj-y += error-printf.o
 stub-obj-y += fdset-add-fd.o
 stub-obj-y += fdset-find-fd.o
 stub-obj-y += fdset-get-fd.o
@@ -14,12 +18,12 @@ stub-obj-y += gdbstub.o
 stub-obj-y += get-fd.o
 stub-obj-y += get-next-serial.o
 stub-obj-y += get-vm-name.o
+stub-obj-y += iothread.o
 stub-obj-y += iothread-lock.o
 stub-obj-y += is-daemonized.o
 stub-obj-y += machine-init-done.o
 stub-obj-y += migr-blocker.o
 stub-obj-y += mon-is-qmp.o
-stub-obj-y += mon-printf.o
 stub-obj-y += monitor-init.o
 stub-obj-y += notify-event.o
 stub-obj-y += qtest.o
@@ -45,3 +49,4 @@ stub-obj-y += iohandler.o
 stub-obj-y += smbios_type_38.o
 stub-obj-y += ipmi.o
 stub-obj-y += pc_madt_cpu_entry.o
+stub-obj-y += migration-colo.o
diff --git a/stubs/arch-query-cpu-model-baseline.c b/stubs/arch-query-cpu-model-baseline.c
new file mode 100644
index 0000000000..094ec13c2c
--- /dev/null
+++ b/stubs/arch-query-cpu-model-baseline.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/arch_init.h"
+#include "qapi/qmp/qerror.h"
+
+CpuModelBaselineInfo *arch_query_cpu_model_baseline(CpuModelInfo *modela,
+                                                    CpuModelInfo *modelb,
+                                                    Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/arch-query-cpu-model-comparison.c b/stubs/arch-query-cpu-model-comparison.c
new file mode 100644
index 0000000000..d5486ae980
--- /dev/null
+++ b/stubs/arch-query-cpu-model-comparison.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/arch_init.h"
+#include "qapi/qmp/qerror.h"
+
+CpuModelCompareInfo *arch_query_cpu_model_comparison(CpuModelInfo *modela,
+                                                     CpuModelInfo *modelb,
+                                                     Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/arch-query-cpu-model-expansion.c b/stubs/arch-query-cpu-model-expansion.c
new file mode 100644
index 0000000000..ae7cf554d1
--- /dev/null
+++ b/stubs/arch-query-cpu-model-expansion.c
@@ -0,0 +1,12 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "sysemu/arch_init.h"
+#include "qapi/qmp/qerror.h"
+
+CpuModelExpansionInfo *arch_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *mode,
+                                                      Error **errp)
+{
+    error_setg(errp, QERR_UNSUPPORTED);
+    return NULL;
+}
diff --git a/stubs/error-printf.c b/stubs/error-printf.c
new file mode 100644
index 0000000000..ac6b92aa69
--- /dev/null
+++ b/stubs/error-printf.c
@@ -0,0 +1,19 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+
+void error_vprintf(const char *fmt, va_list ap)
+{
+    if (g_test_initialized() && !g_test_subprocess()) {
+        char *msg = g_strdup_vprintf(fmt, ap);
+        g_test_message("%s", msg);
+        g_free(msg);
+    } else {
+        vfprintf(stderr, fmt, ap);
+    }
+}
+
+void error_vprintf_unless_qmp(const char *fmt, va_list ap)
+{
+    error_vprintf(fmt, ap);
+}
diff --git a/stubs/iothread.c b/stubs/iothread.c
new file mode 100644
index 0000000000..8cc9e28c55
--- /dev/null
+++ b/stubs/iothread.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qemu/main-loop.h"
+
+AioContext *qemu_get_current_aio_context(void)
+{
+    return qemu_get_aio_context();
+}
diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
new file mode 100644
index 0000000000..7811764c4b
--- /dev/null
+++ b/stubs/migration-colo.c
@@ -0,0 +1,46 @@
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "migration/colo.h"
+#include "qmp-commands.h"
+
+bool colo_supported(void)
+{
+    return false;
+}
+
+bool migration_in_colo_state(void)
+{
+    return false;
+}
+
+bool migration_incoming_in_colo_state(void)
+{
+    return false;
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+}
+
+void *colo_process_incoming_thread(void *opaque)
+{
+    return NULL;
+}
+
+void qmp_x_colo_lost_heartbeat(Error **errp)
+{
+    error_setg(errp, "COLO is not supported, please rerun configure"
+                     " with --enable-colo option in order to support"
+                     " COLO feature");
+}
diff --git a/stubs/mon-printf.c b/stubs/mon-printf.c
deleted file mode 100644
index e7c1e0cf74..0000000000
--- a/stubs/mon-printf.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "monitor/monitor.h"
-
-void monitor_printf(Monitor *mon, const char *fmt, ...)
-{
-}
-
-void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
-{
-}
diff --git a/stubs/replay.c b/stubs/replay.c
index de9fa1ec98..d9a6da99d2 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -67,3 +67,8 @@ void replay_char_read_all_save_buf(uint8_t *buf, int offset)
 void replay_block_event(QEMUBH *bh, uint64_t id)
 {
 }
+
+uint64_t blkreplay_next_id(void)
+{
+    return 0;
+}
diff --git a/stubs/trace-control.c b/stubs/trace-control.c
index fe59836fce..7f856e5c24 100644
--- a/stubs/trace-control.c
+++ b/stubs/trace-control.c
@@ -11,13 +11,30 @@
 #include "trace/control.h"
 
 
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state)
+{
+    trace_event_set_state_dynamic(ev, state);
+}
+
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
 {
-    TraceEventID id;
+    bool state_pre;
     assert(trace_event_get_state_static(ev));
-    id = trace_event_get_id(ev);
-    trace_events_enabled_count += state - trace_events_dstate[id];
-    trace_events_dstate[id] = state;
+
+    /*
+     * We ignore the "vcpu" property here, since there's no target code. Then
+     * dstate can only be 1 or 0.
+     */
+    state_pre = *(ev->dstate);
+    if (state_pre != state) {
+        if (state) {
+            trace_events_enabled_count++;
+            *(ev->dstate) = 1;
+        } else {
+            trace_events_enabled_count--;
+            *(ev->dstate) = 0;
+        }
+    }
 }
 
 void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
@@ -26,3 +43,9 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
     /* should never be called on non-target binaries */
     abort();
 }
+
+void trace_init_vcpu(CPUState *vcpu)
+{
+    /* should never be called on non-target binaries */
+    abort();
+}
diff --git a/stubs/uuid.c b/stubs/uuid.c
index 92ad717831..a880de8d61 100644
--- a/stubs/uuid.c
+++ b/stubs/uuid.c
@@ -1,6 +1,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "sysemu/sysemu.h"
+#include "qemu/uuid.h"
 #include "qmp-commands.h"
 
 UuidInfo *qmp_query_uuid(Error **errp)
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 6d01d7f75e..30d77ce71c 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -59,6 +59,13 @@ static void alpha_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     AlphaCPUClass *acc = ALPHA_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     qemu_init_vcpu(cs);
 
@@ -266,7 +273,6 @@ static void alpha_cpu_initfn(Object *obj)
     CPUAlphaState *env = &cpu->env;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
     tlb_flush(cs, 1);
 
     alpha_translate_init();
@@ -309,13 +315,6 @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
     cc->disas_set_info = alpha_cpu_disas_set_info;
 
     cc->gdb_num_core_regs = 67;
-
-    /*
-     * Reason: alpha_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo alpha_cpu_type_info = {
diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index ac5e801fb4..b08d1601d1 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -201,7 +201,7 @@ enum {
 
 /* MMU modes definitions */
 
-/* Alpha has 5 MMU modes: PALcode, kernel, executive, supervisor, and user.
+/* Alpha has 5 MMU modes: PALcode, Kernel, Executive, Supervisor, and User.
    The Unix PALcode only exposes the kernel and user modes; presumably
    executive and supervisor are used by VMS.
 
@@ -209,22 +209,18 @@ enum {
    there are PALmode instructions that can access data via physical mode
    or via an os-installed "alternate mode", which is one of the 4 above.
 
-   QEMU does not currently properly distinguish between code/data when
-   looking up addresses.  To avoid having to address this issue, our
-   emulated PALcode will cheat and use the KSEG mapping for its code+data
-   rather than physical addresses.
+   That said, we're only emulating Unix PALcode, and not attempting VMS,
+   so we don't need to implement Executive and Supervisor.  QEMU's own
+   PALcode cheats and usees the KSEG mapping for its code+data rather than
+   physical addresses.  */
 
-   Moreover, we're only emulating Unix PALcode, and not attempting VMS.
-
-   All of which allows us to drop all but kernel and user modes.
-   Elide the unused MMU modes to save space.  */
-
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
 #define MMU_KERNEL_IDX   0
 #define MMU_USER_IDX     1
+#define MMU_PHYS_IDX     2
 
 typedef struct CPUAlphaState CPUAlphaState;
 
@@ -234,7 +230,6 @@ struct CPUAlphaState {
     uint64_t pc;
     uint64_t unique;
     uint64_t lock_addr;
-    uint64_t lock_st_addr;
     uint64_t lock_value;
 
     /* The FPCR, and disassembled portions thereof.  */
@@ -350,9 +345,6 @@ enum {
     EXCP_ARITH,
     EXCP_FEN,
     EXCP_CALL_PAL,
-    /* For Usermode emulation.  */
-    EXCP_STL_C,
-    EXCP_STQ_C,
 };
 
 /* Alpha-specific interrupt pending bits.  */
@@ -474,7 +466,6 @@ int cpu_alpha_signal_handler(int host_signum, void *pinfo,
                              void *puc);
 int alpha_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, int rw,
                                int mmu_idx);
-void do_restore_state(CPUAlphaState *, uintptr_t retaddr);
 void QEMU_NORETURN dynamic_excp(CPUAlphaState *, uintptr_t, int, int);
 void QEMU_NORETURN arith_excp(CPUAlphaState *, uintptr_t, int, uint64_t);
 
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index 85168b7ed1..a5c308859b 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -126,6 +126,14 @@ static int get_physical_address(CPUAlphaState *env, target_ulong addr,
     int prot = 0;
     int ret = MM_K_ACV;
 
+    /* Handle physical accesses.  */
+    if (mmu_idx == MMU_PHYS_IDX) {
+        phys = addr;
+        prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        ret = -1;
+        goto exit;
+    }
+
     /* Ensure that the virtual address is properly sign-extended from
        the last implemented virtual address bit.  */
     if (saddr >> TARGET_VIRT_ADDR_SPACE_BITS != saddr >> 63) {
@@ -298,15 +306,11 @@ void alpha_cpu_do_interrupt(CPUState *cs)
         case EXCP_CALL_PAL:
             name = "call_pal";
             break;
-        case EXCP_STL_C:
-            name = "stl_c";
-            break;
-        case EXCP_STQ_C:
-            name = "stq_c";
-            break;
         }
-        qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n",
-                 ++count, name, env->error_code, env->pc, env->ir[IR_SP]);
+        qemu_log("INT %6d: %s(%#x) cpu=%d pc=%016"
+                 PRIx64 " sp=%016" PRIx64 "\n",
+                 ++count, name, env->error_code, cs->cpu_index,
+                 env->pc, env->ir[IR_SP]);
     }
 
     cs->exception_index = -1;
diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index c3d8a3ee49..004221df8c 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -92,15 +92,6 @@ DEF_HELPER_FLAGS_2(ieee_input_cmp, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_2(ieee_input_s, TCG_CALL_NO_WG, void, env, i64)
 
 #if !defined (CONFIG_USER_ONLY)
-DEF_HELPER_2(ldl_phys, i64, env, i64)
-DEF_HELPER_2(ldq_phys, i64, env, i64)
-DEF_HELPER_2(ldl_l_phys, i64, env, i64)
-DEF_HELPER_2(ldq_l_phys, i64, env, i64)
-DEF_HELPER_3(stl_phys, void, env, i64, i64)
-DEF_HELPER_3(stq_phys, void, env, i64, i64)
-DEF_HELPER_3(stl_c_phys, i64, env, i64, i64)
-DEF_HELPER_3(stq_c_phys, i64, env, i64, i64)
-
 DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(tb_flush, TCG_CALL_NO_RWG, void, env)
diff --git a/target-alpha/machine.c b/target-alpha/machine.c
index 710b7835b4..b99a123a39 100644
--- a/target-alpha/machine.c
+++ b/target-alpha/machine.c
@@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = {
     VMSTATE_UINTTL(unique, CPUAlphaState),
     VMSTATE_UINTTL(lock_addr, CPUAlphaState),
     VMSTATE_UINTTL(lock_value, CPUAlphaState),
-    /* Note that lock_st_addr is not saved; it is a temporary
-       used during the execution of the st[lq]_c insns.  */
 
     VMSTATE_UINT8(ps, CPUAlphaState),
     VMSTATE_UINT8(intr_flag, CPUAlphaState),
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index 1b2be50be7..78a7d45590 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -25,79 +25,6 @@
 
 /* Softmmu support */
 #ifndef CONFIG_USER_ONLY
-
-uint64_t helper_ldl_phys(CPUAlphaState *env, uint64_t p)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    return (int32_t)ldl_phys(cs->as, p);
-}
-
-uint64_t helper_ldq_phys(CPUAlphaState *env, uint64_t p)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    return ldq_phys(cs->as, p);
-}
-
-uint64_t helper_ldl_l_phys(CPUAlphaState *env, uint64_t p)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    env->lock_addr = p;
-    return env->lock_value = (int32_t)ldl_phys(cs->as, p);
-}
-
-uint64_t helper_ldq_l_phys(CPUAlphaState *env, uint64_t p)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    env->lock_addr = p;
-    return env->lock_value = ldq_phys(cs->as, p);
-}
-
-void helper_stl_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    stl_phys(cs->as, p, v);
-}
-
-void helper_stq_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    stq_phys(cs->as, p, v);
-}
-
-uint64_t helper_stl_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    uint64_t ret = 0;
-
-    if (p == env->lock_addr) {
-        int32_t old = ldl_phys(cs->as, p);
-        if (old == (int32_t)env->lock_value) {
-            stl_phys(cs->as, p, v);
-            ret = 1;
-        }
-    }
-    env->lock_addr = -1;
-
-    return ret;
-}
-
-uint64_t helper_stq_c_phys(CPUAlphaState *env, uint64_t p, uint64_t v)
-{
-    CPUState *cs = CPU(alpha_env_get_cpu(env));
-    uint64_t ret = 0;
-
-    if (p == env->lock_addr) {
-        uint64_t old = ldq_phys(cs->as, p);
-        if (old == env->lock_value) {
-            stq_phys(cs->as, p, v);
-            ret = 1;
-        }
-    }
-    env->lock_addr = -1;
-
-    return ret;
-}
-
 void alpha_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
                                    MMUAccessType access_type,
                                    int mmu_idx, uintptr_t retaddr)
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 0ea0e6e146..114927b751 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31];
 static TCGv cpu_fir[31];
 static TCGv cpu_pc;
 static TCGv cpu_lock_addr;
-static TCGv cpu_lock_st_addr;
 static TCGv cpu_lock_value;
 
 #ifndef CONFIG_USER_ONLY
@@ -116,7 +115,6 @@ void alpha_translate_init(void)
     static const GlobalVar vars[] = {
         DEF_VAR(pc),
         DEF_VAR(lock_addr),
-        DEF_VAR(lock_st_addr),
         DEF_VAR(lock_value),
     };
 
@@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx)
     return ctx->sink;
 }
 
+static void free_context_temps(DisasContext *ctx)
+{
+    if (!TCGV_IS_UNUSED_I64(ctx->sink)) {
+        tcg_gen_discard_i64(ctx->sink);
+        tcg_temp_free(ctx->sink);
+        TCGV_UNUSED_I64(ctx->sink);
+    }
+    if (!TCGV_IS_UNUSED_I64(ctx->zero)) {
+        tcg_temp_free(ctx->zero);
+        TCGV_UNUSED_I64(ctx->zero);
+    }
+    if (!TCGV_IS_UNUSED_I64(ctx->lit)) {
+        tcg_temp_free(ctx->lit);
+        TCGV_UNUSED_I64(ctx->lit);
+    }
+}
+
 static TCGv load_gpr(DisasContext *ctx, unsigned reg)
 {
     if (likely(reg < 31)) {
@@ -392,59 +407,40 @@ static inline void gen_store_mem(DisasContext *ctx,
 }
 
 static ExitStatus gen_store_conditional(DisasContext *ctx, int ra, int rb,
-                                        int32_t disp16, int quad)
+                                        int32_t disp16, int mem_idx,
+                                        TCGMemOp op)
 {
-    TCGv addr;
-
-    if (ra == 31) {
-        /* ??? Don't bother storing anything.  The user can't tell
-           the difference, since the zero register always reads zero.  */
-        return NO_EXIT;
-    }
-
-#if defined(CONFIG_USER_ONLY)
-    addr = cpu_lock_st_addr;
-#else
-    addr = tcg_temp_local_new();
-#endif
+    TCGLabel *lab_fail, *lab_done;
+    TCGv addr, val;
 
+    addr = tcg_temp_new_i64();
     tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16);
+    free_context_temps(ctx);
 
-#if defined(CONFIG_USER_ONLY)
-    /* ??? This is handled via a complicated version of compare-and-swap
-       in the cpu_loop.  Hopefully one day we'll have a real CAS opcode
-       in TCG so that this isn't necessary.  */
-    return gen_excp(ctx, quad ? EXCP_STQ_C : EXCP_STL_C, ra);
-#else
-    /* ??? In system mode we are never multi-threaded, so CAS can be
-       implemented via a non-atomic load-compare-store sequence.  */
-    {
-        TCGLabel *lab_fail, *lab_done;
-        TCGv val;
-
-        lab_fail = gen_new_label();
-        lab_done = gen_new_label();
-        tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail);
+    lab_fail = gen_new_label();
+    lab_done = gen_new_label();
+    tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail);
+    tcg_temp_free_i64(addr);
 
-        val = tcg_temp_new();
-        tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, quad ? MO_LEQ : MO_LESL);
-        tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail);
+    val = tcg_temp_new_i64();
+    tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value,
+                               load_gpr(ctx, ra), mem_idx, op);
+    free_context_temps(ctx);
 
-        tcg_gen_qemu_st_i64(ctx->ir[ra], addr, ctx->mem_idx,
-                            quad ? MO_LEQ : MO_LEUL);
-        tcg_gen_movi_i64(ctx->ir[ra], 1);
-        tcg_gen_br(lab_done);
+    if (ra != 31) {
+        tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value);
+    }
+    tcg_temp_free_i64(val);
+    tcg_gen_br(lab_done);
 
-        gen_set_label(lab_fail);
+    gen_set_label(lab_fail);
+    if (ra != 31) {
         tcg_gen_movi_i64(ctx->ir[ra], 0);
-
-        gen_set_label(lab_done);
-        tcg_gen_movi_i64(cpu_lock_addr, -1);
-
-        tcg_temp_free(addr);
-        return NO_EXIT;
     }
-#endif
+
+    gen_set_label(lab_done);
+    tcg_gen_movi_i64(cpu_lock_addr, -1);
+    return NO_EXIT;
 }
 
 static bool in_superpage(DisasContext *ctx, int64_t addr)
@@ -2338,11 +2334,11 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             break;
         case 0x4000:
             /* MB */
-            /* No-op */
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
             break;
         case 0x4400:
             /* WMB */
-            /* No-op */
+            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
             break;
         case 0x8000:
             /* FETCH */
@@ -2423,19 +2419,19 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
             switch ((insn >> 12) & 0xF) {
             case 0x0:
                 /* Longword physical access (hw_ldl/p) */
-                gen_helper_ldl_phys(va, cpu_env, addr);
+                tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LESL);
                 break;
             case 0x1:
                 /* Quadword physical access (hw_ldq/p) */
-                gen_helper_ldq_phys(va, cpu_env, addr);
+                tcg_gen_qemu_ld_i64(va, addr, MMU_PHYS_IDX, MO_LEQ);
                 break;
             case 0x2:
                 /* Longword physical access with lock (hw_ldl_l/p) */
-                gen_helper_ldl_l_phys(va, cpu_env, addr);
+                gen_qemu_ldl_l(va, addr, MMU_PHYS_IDX);
                 break;
             case 0x3:
                 /* Quadword physical access with lock (hw_ldq_l/p) */
-                gen_helper_ldq_l_phys(va, cpu_env, addr);
+                gen_qemu_ldq_l(va, addr, MMU_PHYS_IDX);
                 break;
             case 0x4:
                 /* Longword virtual PTE fetch (hw_ldl/v) */
@@ -2674,27 +2670,34 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
 #ifndef CONFIG_USER_ONLY
         REQUIRE_TB_FLAG(TB_FLAGS_PAL_MODE);
         {
-            TCGv addr = tcg_temp_new();
-            va = load_gpr(ctx, ra);
-            vb = load_gpr(ctx, rb);
-
-            tcg_gen_addi_i64(addr, vb, disp12);
             switch ((insn >> 12) & 0xF) {
             case 0x0:
                 /* Longword physical access */
-                gen_helper_stl_phys(cpu_env, addr, va);
+                va = load_gpr(ctx, ra);
+                vb = load_gpr(ctx, rb);
+                tmp = tcg_temp_new();
+                tcg_gen_addi_i64(tmp, vb, disp12);
+                tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LESL);
+                tcg_temp_free(tmp);
                 break;
             case 0x1:
                 /* Quadword physical access */
-                gen_helper_stq_phys(cpu_env, addr, va);
+                va = load_gpr(ctx, ra);
+                vb = load_gpr(ctx, rb);
+                tmp = tcg_temp_new();
+                tcg_gen_addi_i64(tmp, vb, disp12);
+                tcg_gen_qemu_st_i64(va, tmp, MMU_PHYS_IDX, MO_LEQ);
+                tcg_temp_free(tmp);
                 break;
             case 0x2:
                 /* Longword physical access with lock */
-                gen_helper_stl_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va);
+                ret = gen_store_conditional(ctx, ra, rb, disp12,
+                                            MMU_PHYS_IDX, MO_LESL);
                 break;
             case 0x3:
                 /* Quadword physical access with lock */
-                gen_helper_stq_c_phys(dest_gpr(ctx, ra), cpu_env, addr, va);
+                ret = gen_store_conditional(ctx, ra, rb, disp12,
+                                            MMU_PHYS_IDX, MO_LEQ);
                 break;
             case 0x4:
                 /* Longword virtual access */
@@ -2733,7 +2736,6 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
                 /* Invalid */
                 goto invalid_opc;
             }
-            tcg_temp_free(addr);
             break;
         }
 #else
@@ -2797,11 +2799,13 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn)
         break;
     case 0x2E:
         /* STL_C */
-        ret = gen_store_conditional(ctx, ra, rb, disp16, 0);
+        ret = gen_store_conditional(ctx, ra, rb, disp16,
+                                    ctx->mem_idx, MO_LESL);
         break;
     case 0x2F:
         /* STQ_C */
-        ret = gen_store_conditional(ctx, ra, rb, disp16, 1);
+        ret = gen_store_conditional(ctx, ra, rb, disp16,
+                                    ctx->mem_idx, MO_LEQ);
         break;
     case 0x30:
         /* BR */
@@ -2906,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
     /* Similarly for flush-to-zero.  */
     ctx.tb_ftz = -1;
 
+    TCGV_UNUSED_I64(ctx.zero);
+    TCGV_UNUSED_I64(ctx.sink);
+    TCGV_UNUSED_I64(ctx.lit);
+
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0) {
@@ -2940,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
         }
         insn = cpu_ldl_code(env, ctx.pc);
 
-        TCGV_UNUSED_I64(ctx.zero);
-        TCGV_UNUSED_I64(ctx.sink);
-        TCGV_UNUSED_I64(ctx.lit);
-
         ctx.pc += 4;
         ret = translate_one(ctxp, insn);
-
-        if (!TCGV_IS_UNUSED_I64(ctx.sink)) {
-            tcg_gen_discard_i64(ctx.sink);
-            tcg_temp_free(ctx.sink);
-        }
-        if (!TCGV_IS_UNUSED_I64(ctx.zero)) {
-            tcg_temp_free(ctx.zero);
-        }
-        if (!TCGV_IS_UNUSED_I64(ctx.lit)) {
-            tcg_temp_free(ctx.lit);
-        }
+        free_context_temps(ctxp);
 
         /* If we reach a page boundary, are single stepping,
            or exhaust instruction count, stop generation.  */
@@ -3000,9 +2994,11 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, ctx.pc - pc_start, 1);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index f20641163c..847fb52ee0 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -9,4 +9,4 @@ obj-y += neon_helper.o iwmmxt_helper.o
 obj-y += gdbstub.o
 obj-$(TARGET_AARCH64) += cpu64.o translate-a64.o helper-a64.o gdbstub64.o
 obj-y += crypto_helper.o
-obj-y += arm-powerctl.o
+obj-$(CONFIG_SOFTMMU) += arm-powerctl.o
diff --git a/target-arm/arm-powerctl.c b/target-arm/arm-powerctl.c
index 6519d52cae..fbb7a15daa 100644
--- a/target-arm/arm-powerctl.c
+++ b/target-arm/arm-powerctl.c
@@ -166,6 +166,8 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id,
     /* Start the new CPU at the requested address */
     cpu_set_pc(target_cpu_state, entry);
 
+    qemu_cpu_kick(target_cpu_state);
+
     /* We are good to go */
     return QEMU_ARM_POWERCTL_RET_SUCCESS;
 }
diff --git a/target-arm/cpu-qom.h b/target-arm/cpu-qom.h
index 74d4470681..e9052da7a0 100644
--- a/target-arm/cpu-qom.h
+++ b/target-arm/cpu-qom.h
@@ -81,9 +81,11 @@ void arm_gt_stimer_cb(void *opaque);
 #define ARM_AFF2_MASK  (0xFFULL << ARM_AFF2_SHIFT)
 #define ARM_AFF3_SHIFT 32
 #define ARM_AFF3_MASK  (0xFFULL << ARM_AFF3_SHIFT)
+#define ARM_DEFAULT_CPUS_PER_CLUSTER 8
 
 #define ARM32_AFFINITY_MASK (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK)
 #define ARM64_AFFINITY_MASK \
     (ARM_AFF0_MASK|ARM_AFF1_MASK|ARM_AFF2_MASK|ARM_AFF3_MASK)
+#define ARM64_AFFINITY_INVALID (~ARM64_AFFINITY_MASK)
 
 #endif
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 116856c78e..0189cd1fd8 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -21,6 +21,7 @@
 #include "config-host.h"
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "cpu.h"
 #include "internals.h"
@@ -450,29 +451,16 @@ static void arm_disas_set_info(CPUState *cpu, disassemble_info *info)
     }
 }
 
-#define ARM_CPUS_PER_CLUSTER 8
-
 static void arm_cpu_initfn(Object *obj)
 {
     CPUState *cs = CPU(obj);
     ARMCPU *cpu = ARM_CPU(obj);
     static bool inited;
-    uint32_t Aff1, Aff0;
 
     cs->env_ptr = &cpu->env;
-    cpu_exec_init(cs, &error_abort);
     cpu->cp_regs = g_hash_table_new_full(g_int_hash, g_int_equal,
                                          g_free, g_free);
 
-    /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it.
-     * We don't support setting cluster ID ([16..23]) (known as Aff2
-     * in later ARM ARM versions), or any of the higher affinity level fields,
-     * so these bits always RAZ.
-     */
-    Aff1 = cs->cpu_index / ARM_CPUS_PER_CLUSTER;
-    Aff0 = cs->cpu_index % ARM_CPUS_PER_CLUSTER;
-    cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0;
-
 #ifndef CONFIG_USER_ONLY
     /* Our inbound IRQ and FIQ lines */
     if (kvm_enabled()) {
@@ -525,6 +513,10 @@ static Property arm_cpu_rvbar_property =
 static Property arm_cpu_has_el3_property =
             DEFINE_PROP_BOOL("has_el3", ARMCPU, has_el3, true);
 
+/* use property name "pmu" to match other archs and virt tools */
+static Property arm_cpu_has_pmu_property =
+            DEFINE_PROP_BOOL("pmu", ARMCPU, has_pmu, true);
+
 static Property arm_cpu_has_mpu_property =
             DEFINE_PROP_BOOL("has-mpu", ARMCPU, has_mpu, true);
 
@@ -568,6 +560,11 @@ static void arm_cpu_post_init(Object *obj)
 #endif
     }
 
+    if (arm_feature(&cpu->env, ARM_FEATURE_PMU)) {
+        qdev_property_add_static(DEVICE(obj), &arm_cpu_has_pmu_property,
+                                 &error_abort);
+    }
+
     if (arm_feature(&cpu->env, ARM_FEATURE_MPU)) {
         qdev_property_add_static(DEVICE(obj), &arm_cpu_has_mpu_property,
                                  &error_abort);
@@ -592,6 +589,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
     ARMCPU *cpu = ARM_CPU(dev);
     ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev);
     CPUARMState *env = &cpu->env;
+    int pagebits;
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     /* Some features automatically imply others: */
     if (arm_feature(env, ARM_FEATURE_V8)) {
@@ -647,6 +652,40 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         set_feature(env, ARM_FEATURE_THUMB_DSP);
     }
 
+    if (arm_feature(env, ARM_FEATURE_V7) &&
+        !arm_feature(env, ARM_FEATURE_M) &&
+        !arm_feature(env, ARM_FEATURE_MPU)) {
+        /* v7VMSA drops support for the old ARMv5 tiny pages, so we
+         * can use 4K pages.
+         */
+        pagebits = 12;
+    } else {
+        /* For CPUs which might have tiny 1K pages, or which have an
+         * MPU and might have small region sizes, stick with 1K pages.
+         */
+        pagebits = 10;
+    }
+    if (!set_preferred_target_page_bits(pagebits)) {
+        /* This can only ever happen for hotplugging a CPU, or if
+         * the board code incorrectly creates a CPU which it has
+         * promised via minimum_page_size that it will not.
+         */
+        error_setg(errp, "This CPU requires a smaller page size than the "
+                   "system is using");
+        return;
+    }
+
+    /* This cpu-id-to-MPIDR affinity is used only for TCG; KVM will override it.
+     * We don't support setting cluster ID ([16..23]) (known as Aff2
+     * in later ARM ARM versions), or any of the higher affinity level fields,
+     * so these bits always RAZ.
+     */
+    if (cpu->mp_affinity == ARM64_AFFINITY_INVALID) {
+        uint32_t Aff1 = cs->cpu_index / ARM_DEFAULT_CPUS_PER_CLUSTER;
+        uint32_t Aff0 = cs->cpu_index % ARM_DEFAULT_CPUS_PER_CLUSTER;
+        cpu->mp_affinity = (Aff1 << ARM_AFF1_SHIFT) | Aff0;
+    }
+
     if (cpu->reset_hivecs) {
             cpu->reset_sctlr |= (1 << 13);
     }
@@ -664,6 +703,11 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         cpu->id_aa64pfr0 &= ~0xf000;
     }
 
+    if (!cpu->has_pmu || !kvm_enabled()) {
+        cpu->has_pmu = false;
+        unset_feature(env, ARM_FEATURE_PMU);
+    }
+
     if (!arm_feature(env, ARM_FEATURE_EL2)) {
         /* Disable the hypervisor feature bits in the processor feature
          * registers if we don't have EL2. These are id_pfr1[15:12] and
@@ -1201,6 +1245,51 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = {
     REGINFO_SENTINEL
 };
 
+static void cortex_a7_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    cpu->dtb_compatible = "arm,cortex-a7";
+    set_feature(&cpu->env, ARM_FEATURE_V7);
+    set_feature(&cpu->env, ARM_FEATURE_VFP4);
+    set_feature(&cpu->env, ARM_FEATURE_NEON);
+    set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
+    set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
+    set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
+    set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS);
+    set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+    set_feature(&cpu->env, ARM_FEATURE_LPAE);
+    set_feature(&cpu->env, ARM_FEATURE_EL3);
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A7;
+    cpu->midr = 0x410fc075;
+    cpu->reset_fpsid = 0x41023075;
+    cpu->mvfr0 = 0x10110222;
+    cpu->mvfr1 = 0x11111111;
+    cpu->ctr = 0x84448003;
+    cpu->reset_sctlr = 0x00c50078;
+    cpu->id_pfr0 = 0x00001131;
+    cpu->id_pfr1 = 0x00011011;
+    cpu->id_dfr0 = 0x02010555;
+    cpu->pmceid0 = 0x00000000;
+    cpu->pmceid1 = 0x00000000;
+    cpu->id_afr0 = 0x00000000;
+    cpu->id_mmfr0 = 0x10101105;
+    cpu->id_mmfr1 = 0x40000000;
+    cpu->id_mmfr2 = 0x01240000;
+    cpu->id_mmfr3 = 0x02102211;
+    cpu->id_isar0 = 0x01101110;
+    cpu->id_isar1 = 0x13112111;
+    cpu->id_isar2 = 0x21232041;
+    cpu->id_isar3 = 0x11112131;
+    cpu->id_isar4 = 0x10011142;
+    cpu->dbgdidr = 0x3515f005;
+    cpu->clidr = 0x0a200023;
+    cpu->ccsidr[0] = 0x701fe00a; /* 32K L1 dcache */
+    cpu->ccsidr[1] = 0x201fe00a; /* 32K L1 icache */
+    cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */
+    define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */
+}
+
 static void cortex_a15_initfn(Object *obj)
 {
     ARMCPU *cpu = ARM_CPU(obj);
@@ -1475,6 +1564,7 @@ static const ARMCPUInfo arm_cpus[] = {
 
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
     { .name = "cortex-r5",   .initfn = cortex_r5_initfn },
+    { .name = "cortex-a7",   .initfn = cortex_a7_initfn },
     { .name = "cortex-a8",   .initfn = cortex_a8_initfn },
     { .name = "cortex-a9",   .initfn = cortex_a9_initfn },
     { .name = "cortex-a15",  .initfn = cortex_a15_initfn },
@@ -1506,7 +1596,8 @@ static Property arm_cpu_properties[] = {
     DEFINE_PROP_BOOL("start-powered-off", ARMCPU, start_powered_off, false),
     DEFINE_PROP_UINT32("psci-conduit", ARMCPU, psci_conduit, 0),
     DEFINE_PROP_UINT32("midr", ARMCPU, midr, 0),
-    DEFINE_PROP_UINT64("mp-affinity", ARMCPU, mp_affinity, 0),
+    DEFINE_PROP_UINT64("mp-affinity", ARMCPU,
+                        mp_affinity, ARM64_AFFINITY_INVALID),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -1585,17 +1676,6 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
     cc->debug_check_watchpoint = arm_debug_check_watchpoint;
 
     cc->disas_set_info = arm_disas_set_info;
-
-    /*
-     * Reason: arm_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     *
-     * Once this is fixed, the devices that create ARM CPUs should be
-     * updated not to set cannot_destroy_with_object_finalize_yet,
-     * unless they still screw up something else.
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index f4ece787e2..a0130754af 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -46,13 +46,12 @@
 #define EXCP_BKPT            7
 #define EXCP_EXCEPTION_EXIT  8   /* Return from v7M exception.  */
 #define EXCP_KERNEL_TRAP     9   /* Jumped to kernel code page.  */
-#define EXCP_STREX          10
 #define EXCP_HVC            11   /* HyperVisor Call */
 #define EXCP_HYP_TRAP       12
 #define EXCP_SMC            13   /* Secure Monitor Call */
 #define EXCP_VIRQ           14
 #define EXCP_VFIQ           15
-#define EXCP_SEMIHOST       16   /* semihosting call (A64 only) */
+#define EXCP_SEMIHOST       16   /* semihosting call */
 
 #define ARMV7M_EXCP_RESET   1
 #define ARMV7M_EXCP_NMI     2
@@ -475,10 +474,6 @@ typedef struct CPUARMState {
     uint64_t exclusive_addr;
     uint64_t exclusive_val;
     uint64_t exclusive_high;
-#if defined(CONFIG_USER_ONLY)
-    uint64_t exclusive_test;
-    uint32_t exclusive_info;
-#endif
 
     /* iwMMXt coprocessor state.  */
     struct {
@@ -1132,6 +1127,7 @@ enum arm_features {
     ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
     ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
     ARM_FEATURE_THUMB_DSP, /* DSP insns supported in the Thumb encodings */
+    ARM_FEATURE_PMU, /* has PMU support */
 };
 
 static inline int arm_feature(CPUARMState *env, int feature)
@@ -1769,10 +1765,11 @@ bool write_cpustate_to_list(ARMCPU *cpu);
 #if defined(CONFIG_USER_ONLY)
 #define TARGET_PAGE_BITS 12
 #else
-/* The ARM MMU allows 1k pages.  */
-/* ??? Linux doesn't actually use these, and they're deprecated in recent
-   architecture revisions.  Maybe a configure option to disable them.  */
-#define TARGET_PAGE_BITS 10
+/* ARMv7 and later CPUs have 4K pages minimum, but ARMv5 and v6
+ * have to support 1K tiny pages.
+ */
+#define TARGET_PAGE_BITS_VARY
+#define TARGET_PAGE_BITS_MIN 10
 #endif
 
 #if defined(TARGET_AARCH64)
@@ -2194,7 +2191,11 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
 #define ARM_TBFLAG_BE_DATA_SHIFT    20
 #define ARM_TBFLAG_BE_DATA_MASK     (1 << ARM_TBFLAG_BE_DATA_SHIFT)
 
-/* Bit usage when in AArch64 state: currently we have no A64 specific bits */
+/* Bit usage when in AArch64 state */
+#define ARM_TBFLAG_TBI0_SHIFT 0        /* TBI0 for EL0/1 or TBI for EL2/3 */
+#define ARM_TBFLAG_TBI0_MASK (0x1ull << ARM_TBFLAG_TBI0_SHIFT)
+#define ARM_TBFLAG_TBI1_SHIFT 1        /* TBI1 for EL0/1  */
+#define ARM_TBFLAG_TBI1_MASK (0x1ull << ARM_TBFLAG_TBI1_SHIFT)
 
 /* some convenience accessor macros */
 #define ARM_TBFLAG_AARCH64_STATE(F) \
@@ -2225,6 +2226,10 @@ static inline bool arm_cpu_data_is_big_endian(CPUARMState *env)
     (((F) & ARM_TBFLAG_NS_MASK) >> ARM_TBFLAG_NS_SHIFT)
 #define ARM_TBFLAG_BE_DATA(F) \
     (((F) & ARM_TBFLAG_BE_DATA_MASK) >> ARM_TBFLAG_BE_DATA_SHIFT)
+#define ARM_TBFLAG_TBI0(F) \
+    (((F) & ARM_TBFLAG_TBI0_MASK) >> ARM_TBFLAG_TBI0_SHIFT)
+#define ARM_TBFLAG_TBI1(F) \
+    (((F) & ARM_TBFLAG_TBI1_MASK) >> ARM_TBFLAG_TBI1_SHIFT)
 
 static inline bool bswap_code(bool sctlr_b)
 {
@@ -2322,12 +2327,51 @@ static inline bool arm_cpu_bswap_data(CPUARMState *env)
 }
 #endif
 
+#ifndef CONFIG_USER_ONLY
+/**
+ * arm_regime_tbi0:
+ * @env: CPUARMState
+ * @mmu_idx: MMU index indicating required translation regime
+ *
+ * Extracts the TBI0 value from the appropriate TCR for the current EL
+ *
+ * Returns: the TBI0 value.
+ */
+uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx);
+
+/**
+ * arm_regime_tbi1:
+ * @env: CPUARMState
+ * @mmu_idx: MMU index indicating required translation regime
+ *
+ * Extracts the TBI1 value from the appropriate TCR for the current EL
+ *
+ * Returns: the TBI1 value.
+ */
+uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx);
+#else
+/* We can't handle tagged addresses properly in user-only mode */
+static inline uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    return 0;
+}
+
+static inline uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    return 0;
+}
+#endif
+
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                         target_ulong *cs_base, uint32_t *flags)
 {
+    ARMMMUIdx mmu_idx = cpu_mmu_index(env, false);
     if (is_a64(env)) {
         *pc = env->pc;
         *flags = ARM_TBFLAG_AARCH64_STATE_MASK;
+        /* Get control bits for tagged addresses */
+        *flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT);
+        *flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
     } else {
         *pc = env->regs[15];
         *flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
@@ -2346,7 +2390,8 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                    << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
     }
 
-    *flags |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT);
+    *flags |= (mmu_idx << ARM_TBFLAG_MMUIDX_SHIFT);
+
     /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
      * states defined in the ARM ARM for software singlestep:
      *  SS_ACTIVE   PSTATE.SS   State
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 1635debc1a..549cb1ee93 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -111,6 +111,7 @@ static void aarch64_a57_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
     set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
     cpu->midr = 0x411fd070;
     cpu->revidr = 0x00000000;
@@ -166,6 +167,7 @@ static void aarch64_a53_initfn(Object *obj)
     set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
     set_feature(&cpu->env, ARM_FEATURE_CRC);
     set_feature(&cpu->env, ARM_FEATURE_EL3);
+    set_feature(&cpu->env, ARM_FEATURE_PMU);
     cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A53;
     cpu->midr = 0x410fd034;
     cpu->revidr = 0x00000000;
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 41e48a41b4..98b97df461 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -27,6 +27,10 @@
 #include "qemu/bitops.h"
 #include "internals.h"
 #include "qemu/crc32c.h"
+#include "exec/exec-all.h"
+#include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
 #include <zlib.h> /* For crc32 */
 
 /* C2.4.7 Multiply and divide */
@@ -444,3 +448,112 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
     /* Linux crc32c converts the output to one's complement.  */
     return crc32c(acc, buf, bytes) ^ 0xffffffff;
 }
+
+/* Returns 0 on success; 1 otherwise.  */
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
+{
+    uintptr_t ra = GETPC();
+    Int128 oldv, cmpv, newv;
+    bool success;
+
+    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    newv = int128_make128(new_lo, new_hi);
+
+    if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
+        success = int128_eq(oldv, cmpv);
+#endif
+    } else {
+        uint64_t o0, o1;
+
+#ifdef CONFIG_USER_ONLY
+        /* ??? Enforce alignment.  */
+        uint64_t *haddr = g2h(addr);
+        o0 = ldq_le_p(haddr + 0);
+        o1 = ldq_le_p(haddr + 1);
+        oldv = int128_make128(o0, o1);
+
+        success = int128_eq(oldv, cmpv);
+        if (success) {
+            stq_le_p(haddr + 0, int128_getlo(newv));
+            stq_le_p(haddr + 1, int128_gethi(newv));
+        }
+#else
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+        TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
+
+        o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
+        o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
+        oldv = int128_make128(o0, o1);
+
+        success = int128_eq(oldv, cmpv);
+        if (success) {
+            helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
+            helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
+        }
+#endif
+    }
+
+    return !success;
+}
+
+uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
+{
+    uintptr_t ra = GETPC();
+    Int128 oldv, cmpv, newv;
+    bool success;
+
+    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
+    newv = int128_make128(new_lo, new_hi);
+
+    if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+        success = int128_eq(oldv, cmpv);
+#endif
+    } else {
+        uint64_t o0, o1;
+
+#ifdef CONFIG_USER_ONLY
+        /* ??? Enforce alignment.  */
+        uint64_t *haddr = g2h(addr);
+        o1 = ldq_be_p(haddr + 0);
+        o0 = ldq_be_p(haddr + 1);
+        oldv = int128_make128(o0, o1);
+
+        success = int128_eq(oldv, cmpv);
+        if (success) {
+            stq_be_p(haddr + 0, int128_gethi(newv));
+            stq_be_p(haddr + 1, int128_getlo(newv));
+        }
+#else
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
+        TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
+
+        o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
+        o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
+        oldv = int128_make128(o0, o1);
+
+        success = int128_eq(oldv, cmpv);
+        if (success) {
+            helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
+            helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
+        }
+#endif
+    }
+
+    return !success;
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 1d3d10fffb..dd32000e63 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -46,3 +46,5 @@ DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
 DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 20b05110b0..c94c016b09 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1,6 +1,7 @@
 #include "config-host.h"
 
 #include "qemu/osdep.h"
+#include "trace.h"
 #include "cpu.h"
 #include "internals.h"
 #include "exec/gdbstub.h"
@@ -1567,10 +1568,13 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
         /* Note that this must be unsigned 64 bit arithmetic: */
         int istatus = count - offset >= gt->cval;
         uint64_t nexttick;
+        int irqstate;
 
         gt->ctl = deposit32(gt->ctl, 2, 1, istatus);
-        qemu_set_irq(cpu->gt_timer_outputs[timeridx],
-                     (istatus && !(gt->ctl & 2)));
+
+        irqstate = (istatus && !(gt->ctl & 2));
+        qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
+
         if (istatus) {
             /* Next transition is when count rolls back over to zero */
             nexttick = UINT64_MAX;
@@ -1587,11 +1591,13 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
             nexttick = INT64_MAX / GTIMER_SCALE;
         }
         timer_mod(cpu->gt_timer[timeridx], nexttick);
+        trace_arm_gt_recalc(timeridx, irqstate, nexttick);
     } else {
         /* Timer disabled: ISTATUS and timer output always clear */
         gt->ctl &= ~4;
         qemu_set_irq(cpu->gt_timer_outputs[timeridx], 0);
         timer_del(cpu->gt_timer[timeridx]);
+        trace_arm_gt_recalc_disabled(timeridx);
     }
 }
 
@@ -1617,6 +1623,7 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           int timeridx,
                           uint64_t value)
 {
+    trace_arm_gt_cval_write(timeridx, value);
     env->cp15.c14_timer[timeridx].cval = value;
     gt_recalc_timer(arm_env_get_cpu(env), timeridx);
 }
@@ -1636,6 +1643,7 @@ static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     uint64_t offset = timeridx == GTIMER_VIRT ? env->cp15.cntvoff_el2 : 0;
 
+    trace_arm_gt_tval_write(timeridx, value);
     env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
                                          sextract64(value, 0, 32);
     gt_recalc_timer(arm_env_get_cpu(env), timeridx);
@@ -1648,6 +1656,7 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
     ARMCPU *cpu = arm_env_get_cpu(env);
     uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
 
+    trace_arm_gt_ctl_write(timeridx, value);
     env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value);
     if ((oldval ^ value) & 1) {
         /* Enable toggled */
@@ -1656,8 +1665,10 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
         /* IMASK toggled: don't need to recalculate,
          * just set the interrupt line based on ISTATUS
          */
-        qemu_set_irq(cpu->gt_timer_outputs[timeridx],
-                     (oldval & 4) && !(value & 2));
+        int irqstate = (oldval & 4) && !(value & 2);
+
+        trace_arm_gt_imask_toggle(timeridx, irqstate);
+        qemu_set_irq(cpu->gt_timer_outputs[timeridx], irqstate);
     }
 }
 
@@ -1722,6 +1733,7 @@ static void gt_cntvoff_write(CPUARMState *env, const ARMCPRegInfo *ri,
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
 
+    trace_arm_gt_cntvoff_write(value);
     raw_write(env, ri, value);
     gt_recalc_timer(cpu, GTIMER_VIRT);
 }
@@ -4067,6 +4079,14 @@ static const ARMCPRegInfo debug_cp_reginfo[] = {
       .cp = 14, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 0,
       .access = PL1_RW, .accessfn = access_tda,
       .type = ARM_CP_NOP },
+    /* Dummy MDCCINT_EL1, since we don't implement the Debug Communications
+     * Channel but Linux may try to access this register. The 32-bit
+     * alias is DBGDCCINT.
+     */
+    { .name = "MDCCINT_EL1", .state = ARM_CP_STATE_BOTH,
+      .cp = 14, .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .accessfn = access_tda,
+      .type = ARM_CP_NOP },
     REGINFO_SENTINEL
 };
 
@@ -6504,7 +6524,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
     /* Set new mode endianness */
     env->uncached_cpsr &= ~CPSR_E;
     if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
-        env->uncached_cpsr |= ~CPSR_E;
+        env->uncached_cpsr |= CPSR_E;
     }
     env->daif |= mask;
     /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
@@ -6639,12 +6659,19 @@ static inline bool check_for_semihosting(CPUState *cs)
         /* Only intercept calls from privileged modes, to provide some
          * semblance of security.
          */
-        if (!semihosting_enabled() ||
-            ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR)) {
+        if (cs->exception_index != EXCP_SEMIHOST &&
+            (!semihosting_enabled() ||
+             ((env->uncached_cpsr & CPSR_M) == ARM_CPU_MODE_USR))) {
             return false;
         }
 
         switch (cs->exception_index) {
+        case EXCP_SEMIHOST:
+            /* This is always a semihosting call; the "is this usermode"
+             * and "is semihosting enabled" checks have been done at
+             * translate time.
+             */
+            break;
         case EXCP_SWI:
             /* Check for semihosting interrupt.  */
             if (env->thumb) {
@@ -6806,6 +6833,52 @@ static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
     return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
 }
 
+/* Returns TBI0 value for current regime el */
+uint32_t arm_regime_tbi0(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    TCR *tcr;
+    uint32_t el;
+
+    /* For EL0 and EL1, TBI is controlled by stage 1's TCR, so convert
+       * a stage 1+2 mmu index into the appropriate stage 1 mmu index.
+       */
+    if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
+        mmu_idx += ARMMMUIdx_S1NSE0;
+    }
+
+    tcr = regime_tcr(env, mmu_idx);
+    el = regime_el(env, mmu_idx);
+
+    if (el > 1) {
+        return extract64(tcr->raw_tcr, 20, 1);
+    } else {
+        return extract64(tcr->raw_tcr, 37, 1);
+    }
+}
+
+/* Returns TBI1 value for current regime el */
+uint32_t arm_regime_tbi1(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    TCR *tcr;
+    uint32_t el;
+
+    /* For EL0 and EL1, TBI is controlled by stage 1's TCR, so convert
+       * a stage 1+2 mmu index into the appropriate stage 1 mmu index.
+       */
+    if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
+        mmu_idx += ARMMMUIdx_S1NSE0;
+    }
+
+    tcr = regime_tcr(env, mmu_idx);
+    el = regime_el(env, mmu_idx);
+
+    if (el > 1) {
+        return 0;
+    } else {
+        return extract64(tcr->raw_tcr, 38, 1);
+    }
+}
+
 /* Return the TTBR associated with this translation regime */
 static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
                                    int ttbrn)
@@ -7584,7 +7657,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
          * is unpredictable. Flag this as a guest error.  */
         if (sign != sext) {
             qemu_log_mask(LOG_GUEST_ERROR,
-                          "AArch32: VTCR.S / VTCR.T0SZ[3] missmatch\n");
+                          "AArch32: VTCR.S / VTCR.T0SZ[3] mismatch\n");
         }
     }
     t1sz = extract32(tcr->raw_tcr, 16, 6);
@@ -8424,12 +8497,12 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
              * this purpose use the actual register value passed to us
              * so that we get the fault address right.
              */
-            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETRA());
+            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
             /* Now we can populate the other TLB entries, if any */
             for (i = 0; i < maxidx; i++) {
                 uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
                 if (va != (vaddr_in & TARGET_PAGE_MASK)) {
-                    helper_ret_stb_mmu(env, va, 0, oi, GETRA());
+                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());
                 }
             }
         }
@@ -8446,7 +8519,7 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
          *    bounce buffer was in use
          */
         for (i = 0; i < blocklen; i++) {
-            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETRA());
+            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
         }
     }
 #else
diff --git a/target-arm/internals.h b/target-arm/internals.h
index cd574017c6..3edccd2529 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -46,8 +46,7 @@ static inline bool excp_is_internal(int excp)
         || excp == EXCP_HALTED
         || excp == EXCP_EXCEPTION_EXIT
         || excp == EXCP_KERNEL_TRAP
-        || excp == EXCP_SEMIHOST
-        || excp == EXCP_STREX;
+        || excp == EXCP_SEMIHOST;
 }
 
 /* Exception names for debug logging; note that not all of these
@@ -63,7 +62,6 @@ static const char * const excnames[] = {
     [EXCP_BKPT] = "Breakpoint",
     [EXCP_EXCEPTION_EXIT] = "QEMU v7M exception exit",
     [EXCP_KERNEL_TRAP] = "QEMU intercept of kernel commpage",
-    [EXCP_STREX] = "QEMU intercept of STREX",
     [EXCP_HVC] = "Hypervisor Call",
     [EXCP_HYP_TRAP] = "Hypervisor Trap",
     [EXCP_SMC] = "Secure Monitor Call",
diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index dbe393c109..c00b94e42a 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -23,6 +23,7 @@
 #include "internals.h"
 #include "hw/arm/arm.h"
 #include "exec/memattrs.h"
+#include "exec/address-spaces.h"
 #include "hw/boards.h"
 #include "qemu/log.h"
 
@@ -283,7 +284,7 @@ void kvm_arm_register_device(MemoryRegion *mr, uint64_t devid, uint64_t group,
     }
 
     if (QSLIST_EMPTY(&kvm_devices_head)) {
-        memory_listener_register(&devlistener, NULL);
+        memory_listener_register(&devlistener, &address_space_memory);
         qemu_add_machine_init_done_notifier(&notify);
     }
     kd = g_new0(KVMDevice, 1);
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 5faa76c57e..61111091ad 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -428,6 +428,11 @@ static inline void set_feature(uint64_t *features, int feature)
     *features |= 1ULL << feature;
 }
 
+static inline void unset_feature(uint64_t *features, int feature)
+{
+    *features &= ~(1ULL << feature);
+}
+
 bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
 {
     /* Identify the feature bits corresponding to the host CPU, and
@@ -469,6 +474,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc)
     set_feature(&features, ARM_FEATURE_VFP4);
     set_feature(&features, ARM_FEATURE_NEON);
     set_feature(&features, ARM_FEATURE_AARCH64);
+    set_feature(&features, ARM_FEATURE_PMU);
 
     ahcc->features = features;
 
@@ -482,6 +488,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     int ret;
     uint64_t mpidr;
     ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
 
     if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE ||
         !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) {
@@ -501,10 +508,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
     if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT;
     }
-    if (kvm_irqchip_in_kernel() &&
-        kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
-        cpu->has_pmu = true;
+    if (!kvm_irqchip_in_kernel() ||
+        !kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) {
+            cpu->has_pmu = false;
+    }
+    if (cpu->has_pmu) {
         cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3;
+    } else {
+        unset_feature(&env->features, ARM_FEATURE_PMU);
     }
 
     /* Do KVM_ARM_VCPU_INIT ioctl */
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index a4193684a8..633d08828a 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -13,6 +13,7 @@
 
 #include "sysemu/kvm.h"
 #include "exec/memory.h"
+#include "qemu/error-report.h"
 
 /**
  * kvm_arm_vcpu_init:
@@ -223,7 +224,20 @@ static inline const char *gic_class_name(void)
  *
  * Returns: class name to use
  */
-const char *gicv3_class_name(void);
+static inline const char *gicv3_class_name(void)
+{
+    if (kvm_irqchip_in_kernel()) {
+#ifdef TARGET_AARCH64
+        return "kvm-arm-gicv3";
+#else
+        error_report("KVM GICv3 acceleration is not supported on this "
+                     "platform");
+        exit(1);
+#endif
+    } else {
+        return "arm-gicv3";
+    }
+}
 
 /**
  * kvm_arm_handle_debug:
@@ -255,4 +269,23 @@ struct kvm_guest_debug_arch;
 
 void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr);
 
+/**
+ * its_class_name
+ *
+ * Return the ITS class name to use depending on whether KVM acceleration
+ * and KVM CAP_SIGNAL_MSI are supported
+ *
+ * Returns: class name to use or NULL
+ */
+static inline const char *its_class_name(void)
+{
+    if (kvm_irqchip_in_kernel()) {
+        /* KVM implementation requires this capability */
+        return kvm_direct_msi_enabled() ? "arm-its-kvm" : NULL;
+    } else {
+        /* Software emulation is not implemented yet */
+        return NULL;
+    }
+}
+
 #endif
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 7a6ca31a8e..d90943b6db 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -331,18 +331,3 @@ const VMStateDescription vmstate_arm_cpu = {
         NULL
     }
 };
-
-const char *gicv3_class_name(void)
-{
-    if (kvm_irqchip_in_kernel()) {
-#ifdef TARGET_AARCH64
-        return "kvm-arm-gicv3";
-#else
-        error_report("KVM GICv3 acceleration is not supported on this "
-                     "platform");
-        exit(1);
-#endif
-    } else {
-        return "arm-gicv3";
-    }
-}
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 3e8588ee6a..cd94216591 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -194,7 +194,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
      * the LPAE long descriptor format, or the short descriptor format
      */
     if (arm_s1_regime_using_lpae_format(env, cpu_mmu_index(env, false))) {
-        env->exception.fsr = 0x21;
+        env->exception.fsr = (1 << 9) | 0x21;
     } else {
         env->exception.fsr = 0x1;
     }
@@ -479,6 +479,13 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
 {
     cpsr_write(env, val, CPSR_ERET_MASK, CPSRWriteExceptionReturn);
 
+    /* Generated code has already stored the new PC value, but
+     * without masking out its low bits, because which bits need
+     * masking depends on whether we're returning to Thumb or ARM
+     * state. Do the masking now.
+     */
+    env->regs[15] &= (env->thumb ? ~1 : ~3);
+
     arm_call_el_change_hook(arm_env_get_cpu(env));
 }
 
diff --git a/target-arm/trace-events b/target-arm/trace-events
new file mode 100644
index 0000000000..9f726bdae3
--- /dev/null
+++ b/target-arm/trace-events
@@ -0,0 +1,10 @@
+# See docs/tracing.txt for syntax documentation.
+
+# target-arm/helper.c
+arm_gt_recalc(int timer, int irqstate, uint64_t nexttick) "gt recalc: timer %d irqstate %d next tick %" PRIx64
+arm_gt_recalc_disabled(int timer) "gt recalc: timer %d irqstate 0 timer disabled"
+arm_gt_cval_write(int timer, uint64_t value) "gt_cval_write: timer %d value %" PRIx64
+arm_gt_tval_write(int timer, uint64_t value) "gt_tval_write: timer %d value %" PRIx64
+arm_gt_ctl_write(int timer, uint64_t value) "gt_ctl_write: timer %d value %" PRIx64
+arm_gt_imask_toggle(int timer, int irqstate) "gt_ctl_write: timer %d IMASK toggle, new irqstate %d"
+arm_gt_cntvoff_write(uint64_t value) "gt_cntvoff_write: value %" PRIx64
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f5e29d20a1..6dc27a6115 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -41,6 +41,7 @@ static TCGv_i64 cpu_pc;
 
 /* Load/store exclusive handling */
 static TCGv_i64 cpu_exclusive_high;
+static TCGv_i64 cpu_reg(DisasContext *s, int reg);
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -176,6 +177,76 @@ void gen_a64_set_pc_im(uint64_t val)
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
+/* Load the PC from a generic TCG variable.
+ *
+ * If address tagging is enabled via the TCR TBI bits, then loading
+ * an address into the PC will clear out any tag in the it:
+ *  + for EL2 and EL3 there is only one TBI bit, and if it is set
+ *    then the address is zero-extended, clearing bits [63:56]
+ *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
+ *    and TBI1 controls addressses with bit 55 == 1.
+ *    If the appropriate TBI bit is set for the address then
+ *    the address is sign-extended from bit 55 into bits [63:56]
+ *
+ * We can avoid doing this for relative-branches, because the
+ * PC + offset can never overflow into the tag bits (assuming
+ * that virtual addresses are less than 56 bits wide, as they
+ * are currently), but we must handle it for branch-to-register.
+ */
+static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
+{
+
+    if (s->current_el <= 1) {
+        /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
+         * examine bit 55 of address, can just generate code.
+         * If mixed, then test via generated code
+         */
+        if (s->tbi0 && s->tbi1) {
+            TCGv_i64 tmp_reg = tcg_temp_new_i64();
+            /* Both bits set, sign extension from bit 55 into [63:56] will
+             * cover both cases
+             */
+            tcg_gen_shli_i64(tmp_reg, src, 8);
+            tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
+            tcg_temp_free_i64(tmp_reg);
+        } else if (!s->tbi0 && !s->tbi1) {
+            /* Neither bit set, just load it as-is */
+            tcg_gen_mov_i64(cpu_pc, src);
+        } else {
+            TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
+            TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
+            TCGv_i64 tcg_zero   = tcg_const_i64(0);
+
+            tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
+
+            if (s->tbi0) {
+                /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
+                tcg_gen_andi_i64(tcg_tmpval, src,
+                                 0x00FFFFFFFFFFFFFFull);
+                tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
+                                    tcg_tmpval, src);
+            } else {
+                /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
+                tcg_gen_ori_i64(tcg_tmpval, src,
+                                0xFF00000000000000ull);
+                tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
+                                    tcg_tmpval, src);
+            }
+            tcg_temp_free_i64(tcg_zero);
+            tcg_temp_free_i64(tcg_bit55);
+            tcg_temp_free_i64(tcg_tmpval);
+        }
+    } else {  /* EL > 1 */
+        if (s->tbi0) {
+            /* Force tag byte to all zero */
+            tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
+        } else {
+            /* Load unmodified address */
+            tcg_gen_mov_i64(cpu_pc, src);
+        }
+    }
+}
+
 typedef struct DisasCompare64 {
     TCGCond cond;
     TCGv_i64 value;
@@ -1294,6 +1365,8 @@ static void gen_clrex(DisasContext *s, uint32_t insn)
 static void handle_sync(DisasContext *s, uint32_t insn,
                         unsigned int op1, unsigned int op2, unsigned int crm)
 {
+    TCGBar bar;
+
     if (op1 != 3) {
         unallocated_encoding(s);
         return;
@@ -1305,7 +1378,18 @@ static void handle_sync(DisasContext *s, uint32_t insn,
         return;
     case 4: /* DSB */
     case 5: /* DMB */
-        /* We don't emulate caches so barriers are no-ops */
+        switch (crm & 3) {
+        case 1: /* MBReqTypes_Reads */
+            bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
+            break;
+        case 2: /* MBReqTypes_Writes */
+            bar = TCG_BAR_SC | TCG_MO_ST_ST;
+            break;
+        default: /* MBReqTypes_All */
+            bar = TCG_BAR_SC | TCG_MO_ALL;
+            break;
+        }
+        tcg_gen_mb(bar);
         return;
     case 6: /* ISB */
         /* We need to break the TB after this insn to execute
@@ -1583,12 +1667,12 @@ static void disas_exc(DisasContext *s, uint32_t insn)
          * instruction works properly.
          */
         switch (op2_ll) {
-        case 1:
+        case 1:                                                     /* SVC */
             gen_ss_advance(s);
             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
                                default_exception_el(s));
             break;
-        case 2:
+        case 2:                                                     /* HVC */
             if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
@@ -1601,7 +1685,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
             gen_ss_advance(s);
             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
             break;
-        case 3:
+        case 3:                                                     /* SMC */
             if (s->current_el == 0) {
                 unallocated_encoding(s);
                 break;
@@ -1691,12 +1775,13 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
 
     switch (opc) {
     case 0: /* BR */
-    case 2: /* RET */
-        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
-        break;
     case 1: /* BLR */
-        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
-        tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
+    case 2: /* RET */
+        gen_a64_set_pc(s, cpu_reg(s, rn));
+        /* BLR also needs to load return address */
+        if (opc == 1) {
+            tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
+        }
         break;
     case 4: /* ERET */
         if (s->current_el == 0) {
@@ -1761,9 +1846,9 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
  * mandated semantics, but it works for typical guest code sequences
  * and avoids having to monitor regular stores.
  *
- * In system emulation mode only one CPU will be running at once, so
- * this sequence is effectively atomic.  In user emulation mode we
- * throw an exception and handle the atomic operation elsewhere.
+ * The store exclusive uses the atomic cmpxchg primitives to avoid
+ * races in multi-threaded linux-user and when MTTCG softmmu is
+ * enabled.
  */
 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                                TCGv_i64 addr, int size, bool is_pair)
@@ -1794,16 +1879,6 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
 }
 
-#ifdef CONFIG_USER_ONLY
-static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
-                                TCGv_i64 addr, int size, int is_pair)
-{
-    tcg_gen_mov_i64(cpu_exclusive_test, addr);
-    tcg_gen_movi_i32(cpu_exclusive_info,
-                     size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
-    gen_exception_internal_insn(s, 4, EXCP_STREX);
-}
-#else
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                 TCGv_i64 inaddr, int size, int is_pair)
 {
@@ -1831,46 +1906,42 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
 
     tmp = tcg_temp_new_i64();
-    tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
-    tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
-    tcg_temp_free_i64(tmp);
-
-    if (is_pair) {
-        TCGv_i64 addrhi = tcg_temp_new_i64();
-        TCGv_i64 tmphi = tcg_temp_new_i64();
-
-        tcg_gen_addi_i64(addrhi, addr, 1 << size);
-        tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
-                            s->be_data + size);
-        tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
-
-        tcg_temp_free_i64(tmphi);
-        tcg_temp_free_i64(addrhi);
-    }
-
-    /* We seem to still have the exclusive monitor, so do the store */
-    tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
-                        s->be_data + size);
     if (is_pair) {
-        TCGv_i64 addrhi = tcg_temp_new_i64();
-
-        tcg_gen_addi_i64(addrhi, addr, 1 << size);
-        tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
-                            get_mem_index(s), s->be_data + size);
-        tcg_temp_free_i64(addrhi);
+        if (size == 2) {
+            TCGv_i64 val = tcg_temp_new_i64();
+            tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
+            tcg_gen_concat32_i64(val, cpu_exclusive_val, cpu_exclusive_high);
+            tcg_gen_atomic_cmpxchg_i64(tmp, addr, val, tmp,
+                                       get_mem_index(s),
+                                       size | MO_ALIGN | s->be_data);
+            tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, val);
+            tcg_temp_free_i64(val);
+        } else if (s->be_data == MO_LE) {
+            gen_helper_paired_cmpxchg64_le(tmp, cpu_env, addr, cpu_reg(s, rt),
+                                           cpu_reg(s, rt2));
+        } else {
+            gen_helper_paired_cmpxchg64_be(tmp, cpu_env, addr, cpu_reg(s, rt),
+                                           cpu_reg(s, rt2));
+        }
+    } else {
+        TCGv_i64 val = cpu_reg(s, rt);
+        tcg_gen_atomic_cmpxchg_i64(tmp, addr, cpu_exclusive_val, val,
+                                   get_mem_index(s),
+                                   size | MO_ALIGN | s->be_data);
+        tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
     }
 
     tcg_temp_free_i64(addr);
 
-    tcg_gen_movi_i64(cpu_reg(s, rd), 0);
+    tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
+    tcg_temp_free_i64(tmp);
     tcg_gen_br(done_label);
+
     gen_set_label(fail_label);
     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
     gen_set_label(done_label);
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
-
 }
-#endif
 
 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
  * from the ARMv8 specs for LDR (Shared decode for all encodings).
@@ -1934,7 +2005,13 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
         if (!is_store) {
             s->is_ldex = true;
             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
+            if (is_lasr) {
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+            }
         } else {
+            if (is_lasr) {
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+            }
             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
         }
     } else {
@@ -1943,11 +2020,17 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
 
         /* Generate ISS for non-exclusive accesses including LASR.  */
         if (is_store) {
+            if (is_lasr) {
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+            }
             do_gpr_st(s, tcg_rt, tcg_addr, size,
                       true, rt, iss_sf, is_lasr);
         } else {
             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
                       true, rt, iss_sf, is_lasr);
+            if (is_lasr) {
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+            }
         }
     }
 }
@@ -2000,7 +2083,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
         do_fp_ld(s, rt, tcg_addr, size);
     } else {
         /* Only unsigned 32bit loads target 32bit registers.  */
-        bool iss_sf = opc == 0 ? 32 : 64;
+        bool iss_sf = opc != 0;
 
         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
                   true, rt, iss_sf, false);
@@ -11150,6 +11233,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
     dc->condexec_mask = 0;
     dc->condexec_cond = 0;
     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
+    dc->tbi0 = ARM_TBFLAG_TBI0(tb->flags);
+    dc->tbi1 = ARM_TBFLAG_TBI1(tb->flags);
     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
 #if !defined(CONFIG_USER_ONLY)
     dc->user = (dc->current_el == 0);
@@ -11331,11 +11416,13 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
         qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start,
                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
     tb->size = dc->pc - pc_start;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index bd5d5cb576..0ad9070b45 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -28,6 +28,7 @@
 #include "qemu/log.h"
 #include "qemu/bitops.h"
 #include "arm_ldst.h"
+#include "exec/semihost.h"
 
 #include "exec/helper-proto.h"
 #include "exec/helper-gen.h"
@@ -64,10 +65,6 @@ static TCGv_i32 cpu_R[16];
 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 TCGv_i64 cpu_exclusive_addr;
 TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-TCGv_i64 cpu_exclusive_test;
-TCGv_i32 cpu_exclusive_info;
-#endif
 
 /* FIXME:  These should be removed.  */
 static TCGv_i32 cpu_F0s, cpu_F1s;
@@ -101,12 +98,6 @@ void arm_translate_init(void)
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
         offsetof(CPUARMState, exclusive_val), "exclusive_val");
-#ifdef CONFIG_USER_ONLY
-    cpu_exclusive_test = tcg_global_mem_new_i64(cpu_env,
-        offsetof(CPUARMState, exclusive_test), "exclusive_test");
-    cpu_exclusive_info = tcg_global_mem_new_i32(cpu_env,
-        offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 
     a64_translate_init();
 }
@@ -180,7 +171,12 @@ static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 {
     if (reg == 15) {
-        tcg_gen_andi_i32(var, var, ~1);
+        /* In Thumb mode, we must ignore bit 0.
+         * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
+         * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
+         * We choose to ignore [1:0] in ARM mode for all architecture versions.
+         */
+        tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
         s->is_jmp = DISAS_JUMP;
     }
     tcg_gen_mov_i32(cpu_R[reg], var);
@@ -926,145 +922,103 @@ static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
  * These functions work like tcg_gen_qemu_{ld,st}* except
  * that the address argument is TCGv_i32 rather than TCGv.
  */
-#if TARGET_LONG_BITS == 32
-
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)                                   \
-static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 addr, int index)           \
-{                                                                        \
-    TCGMemOp opc = (OPC) | s->be_data;                                   \
-    /* Not needed for user-mode BE32, where we use MO_BE instead.  */    \
-    if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {                       \
-        TCGv addr_be = tcg_temp_new();                                   \
-        tcg_gen_xori_i32(addr_be, addr, BE32_XOR);                       \
-        tcg_gen_qemu_ld_i32(val, addr_be, index, opc);                   \
-        tcg_temp_free(addr_be);                                          \
-        return;                                                          \
-    }                                                                    \
-    tcg_gen_qemu_ld_i32(val, addr, index, opc);                          \
-}
-
-#define DO_GEN_ST(SUFF, OPC, BE32_XOR)                                   \
-static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 addr, int index)           \
-{                                                                        \
-    TCGMemOp opc = (OPC) | s->be_data;                                   \
-    /* Not needed for user-mode BE32, where we use MO_BE instead.  */    \
-    if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {                       \
-        TCGv addr_be = tcg_temp_new();                                   \
-        tcg_gen_xori_i32(addr_be, addr, BE32_XOR);                       \
-        tcg_gen_qemu_st_i32(val, addr_be, index, opc);                   \
-        tcg_temp_free(addr_be);                                          \
-        return;                                                          \
-    }                                                                    \
-    tcg_gen_qemu_st_i32(val, addr, index, opc);                          \
-}
 
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 addr, int index)
+static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
 {
-    TCGMemOp opc = MO_Q | s->be_data;
-    tcg_gen_qemu_ld_i64(val, addr, index, opc);
+    TCGv addr = tcg_temp_new();
+    tcg_gen_extu_i32_tl(addr, a32);
+
     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-    if (!IS_USER_ONLY && s->sctlr_b) {
-        tcg_gen_rotri_i64(val, val, 32);
+    if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
+        tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
     }
+    return addr;
 }
 
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 addr, int index)
+static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                            int index, TCGMemOp opc)
 {
-    TCGMemOp opc = MO_Q | s->be_data;
-    /* Not needed for user-mode BE32, where we use MO_BE instead.  */
-    if (!IS_USER_ONLY && s->sctlr_b) {
-        TCGv_i64 tmp = tcg_temp_new_i64();
-        tcg_gen_rotri_i64(tmp, val, 32);
-        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
-        tcg_temp_free_i64(tmp);
-        return;
-    }
-    tcg_gen_qemu_st_i64(val, addr, index, opc);
+    TCGv addr = gen_aa32_addr(s, a32, opc);
+    tcg_gen_qemu_ld_i32(val, addr, index, opc);
+    tcg_temp_free(addr);
 }
 
-#else
+static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
+                            int index, TCGMemOp opc)
+{
+    TCGv addr = gen_aa32_addr(s, a32, opc);
+    tcg_gen_qemu_st_i32(val, addr, index, opc);
+    tcg_temp_free(addr);
+}
 
-#define DO_GEN_LD(SUFF, OPC, BE32_XOR)                                   \
+#define DO_GEN_LD(SUFF, OPC)                                             \
 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 addr, int index)           \
+                                     TCGv_i32 a32, int index)            \
 {                                                                        \
-    TCGMemOp opc = (OPC) | s->be_data;                                   \
-    TCGv addr64 = tcg_temp_new();                                        \
-    tcg_gen_extu_i32_i64(addr64, addr);                                  \
-    /* Not needed for user-mode BE32, where we use MO_BE instead.  */    \
-    if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {                       \
-        tcg_gen_xori_i64(addr64, addr64, BE32_XOR);                      \
-    }                                                                    \
-    tcg_gen_qemu_ld_i32(val, addr64, index, opc);                        \
-    tcg_temp_free(addr64);                                               \
-}
-
-#define DO_GEN_ST(SUFF, OPC, BE32_XOR)                                   \
+    gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
+}
+
+#define DO_GEN_ST(SUFF, OPC)                                             \
 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
-                                     TCGv_i32 addr, int index)           \
+                                     TCGv_i32 a32, int index)            \
 {                                                                        \
-    TCGMemOp opc = (OPC) | s->be_data;                                   \
-    TCGv addr64 = tcg_temp_new();                                        \
-    tcg_gen_extu_i32_i64(addr64, addr);                                  \
-    /* Not needed for user-mode BE32, where we use MO_BE instead.  */    \
-    if (!IS_USER_ONLY && s->sctlr_b && BE32_XOR) {                       \
-        tcg_gen_xori_i64(addr64, addr64, BE32_XOR);                      \
-    }                                                                    \
-    tcg_gen_qemu_st_i32(val, addr64, index, opc);                        \
-    tcg_temp_free(addr64);                                               \
+    gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 }
 
-static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 addr, int index)
+static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 {
-    TCGMemOp opc = MO_Q | s->be_data;
-    TCGv addr64 = tcg_temp_new();
-    tcg_gen_extu_i32_i64(addr64, addr);
-    tcg_gen_qemu_ld_i64(val, addr64, index, opc);
-
     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
     if (!IS_USER_ONLY && s->sctlr_b) {
         tcg_gen_rotri_i64(val, val, 32);
     }
-    tcg_temp_free(addr64);
 }
 
-static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
-                                 TCGv_i32 addr, int index)
+static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                            int index, TCGMemOp opc)
+{
+    TCGv addr = gen_aa32_addr(s, a32, opc);
+    tcg_gen_qemu_ld_i64(val, addr, index, opc);
+    gen_aa32_frob64(s, val);
+    tcg_temp_free(addr);
+}
+
+static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
+                                 TCGv_i32 a32, int index)
 {
-    TCGMemOp opc = MO_Q | s->be_data;
-    TCGv addr64 = tcg_temp_new();
-    tcg_gen_extu_i32_i64(addr64, addr);
+    gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
+}
+
+static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
+                            int index, TCGMemOp opc)
+{
+    TCGv addr = gen_aa32_addr(s, a32, opc);
 
     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
     if (!IS_USER_ONLY && s->sctlr_b) {
-        TCGv tmp = tcg_temp_new();
+        TCGv_i64 tmp = tcg_temp_new_i64();
         tcg_gen_rotri_i64(tmp, val, 32);
-        tcg_gen_qemu_st_i64(tmp, addr64, index, opc);
-        tcg_temp_free(tmp);
+        tcg_gen_qemu_st_i64(tmp, addr, index, opc);
+        tcg_temp_free_i64(tmp);
     } else {
-        tcg_gen_qemu_st_i64(val, addr64, index, opc);
+        tcg_gen_qemu_st_i64(val, addr, index, opc);
     }
-    tcg_temp_free(addr64);
+    tcg_temp_free(addr);
 }
 
-#endif
+static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
+                                 TCGv_i32 a32, int index)
+{
+    gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
+}
 
-DO_GEN_LD(8s, MO_SB, 3)
-DO_GEN_LD(8u, MO_UB, 3)
-DO_GEN_LD(16s, MO_SW, 2)
-DO_GEN_LD(16u, MO_UW, 2)
-DO_GEN_LD(32u, MO_UL, 0)
-/* 'a' variants include an alignment check */
-DO_GEN_LD(16ua, MO_UW | MO_ALIGN, 2)
-DO_GEN_LD(32ua, MO_UL | MO_ALIGN, 0)
-DO_GEN_ST(8, MO_UB, 3)
-DO_GEN_ST(16, MO_UW, 2)
-DO_GEN_ST(32, MO_UL, 0)
+DO_GEN_LD(8s, MO_SB)
+DO_GEN_LD(8u, MO_UB)
+DO_GEN_LD(16s, MO_SW)
+DO_GEN_LD(16u, MO_UW)
+DO_GEN_LD(32u, MO_UL)
+DO_GEN_ST(8, MO_UB)
+DO_GEN_ST(16, MO_UW)
+DO_GEN_ST(32, MO_UL)
 
 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 {
@@ -1139,6 +1093,33 @@ static inline void gen_lookup_tb(DisasContext *s)
     s->is_jmp = DISAS_JUMP;
 }
 
+static inline void gen_hlt(DisasContext *s, int imm)
+{
+    /* HLT. This has two purposes.
+     * Architecturally, it is an external halting debug instruction.
+     * Since QEMU doesn't implement external debug, we treat this as
+     * it is required for halting debug disabled: it will UNDEF.
+     * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
+     * and "HLT 0xF000" is an A32 semihosting syscall. These traps
+     * must trigger semihosting even for ARMv7 and earlier, where
+     * HLT was an undefined encoding.
+     * In system mode, we don't allow userspace access to
+     * semihosting, to provide some semblance of security
+     * (and for consistency with our 32-bit semihosting).
+     */
+    if (semihosting_enabled() &&
+#ifndef CONFIG_USER_ONLY
+        s->current_el != 0 &&
+#endif
+        (imm == (s->thumb ? 0x3c : 0xf000))) {
+        gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
+        return;
+    }
+
+    gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
+                       default_exception_el(s));
+}
+
 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
                                        TCGv_i32 var)
 {
@@ -4358,26 +4339,35 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
     s->is_jmp = DISAS_UPDATE;
 }
 
-/* Generate an old-style exception return. Marks pc as dead. */
-static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
+/* Store value to PC as for an exception return (ie don't
+ * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
+ * will do the masking based on the new value of the Thumb bit.
+ */
+static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
 {
-    TCGv_i32 tmp;
-    store_reg(s, 15, pc);
-    tmp = load_cpu_field(spsr);
-    gen_helper_cpsr_write_eret(cpu_env, tmp);
-    tcg_temp_free_i32(tmp);
-    s->is_jmp = DISAS_JUMP;
+    tcg_gen_mov_i32(cpu_R[15], pc);
+    tcg_temp_free_i32(pc);
 }
 
 /* Generate a v6 exception return.  Marks both values as dead.  */
 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
 {
+    store_pc_exc_ret(s, pc);
+    /* The cpsr_write_eret helper will mask the low bits of PC
+     * appropriately depending on the new Thumb bit, so it must
+     * be called after storing the new PC.
+     */
     gen_helper_cpsr_write_eret(cpu_env, cpsr);
     tcg_temp_free_i32(cpsr);
-    store_reg(s, 15, pc);
     s->is_jmp = DISAS_JUMP;
 }
 
+/* Generate an old-style exception return. Marks pc as dead. */
+static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
+{
+    gen_rfe(s, pc, load_cpu_field(spsr));
+}
+
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
@@ -7717,45 +7707,30 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
 
 /* Load/Store exclusive instructions are implemented by remembering
    the value/address loaded, and seeing if these are the same
-   when the store is performed. This should be sufficient to implement
+   when the store is performed.  This should be sufficient to implement
    the architecturally mandated semantics, and avoids having to monitor
-   regular stores.
-
-   In system emulation mode only one CPU will be running at once, so
-   this sequence is effectively atomic.  In user emulation mode we
-   throw an exception and handle the atomic operation elsewhere.  */
+   regular stores.  The compare vs the remembered value is done during
+   the cmpxchg operation, but we must compare the addresses manually.  */
 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
                                TCGv_i32 addr, int size)
 {
     TCGv_i32 tmp = tcg_temp_new_i32();
+    TCGMemOp opc = size | MO_ALIGN | s->be_data;
 
     s->is_ldex = true;
 
-    switch (size) {
-    case 0:
-        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-        break;
-    case 1:
-        gen_aa32_ld16ua(s, tmp, addr, get_mem_index(s));
-        break;
-    case 2:
-    case 3:
-        gen_aa32_ld32ua(s, tmp, addr, get_mem_index(s));
-        break;
-    default:
-        abort();
-    }
-
     if (size == 3) {
         TCGv_i32 tmp2 = tcg_temp_new_i32();
-        TCGv_i32 tmp3 = tcg_temp_new_i32();
+        TCGv_i64 t64 = tcg_temp_new_i64();
 
-        tcg_gen_addi_i32(tmp2, addr, 4);
-        gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s));
-        tcg_temp_free_i32(tmp2);
-        tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
-        store_reg(s, rt2, tmp3);
+        gen_aa32_ld_i64(s, t64, addr, get_mem_index(s), opc);
+        tcg_gen_mov_i64(cpu_exclusive_val, t64);
+        tcg_gen_extr_i64_i32(tmp, tmp2, t64);
+        tcg_temp_free_i64(t64);
+
+        store_reg(s, rt2, tmp2);
     } else {
+        gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
     }
 
@@ -7768,23 +7743,15 @@ static void gen_clrex(DisasContext *s)
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 }
 
-#ifdef CONFIG_USER_ONLY
 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                 TCGv_i32 addr, int size)
 {
-    tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
-    tcg_gen_movi_i32(cpu_exclusive_info,
-                     size | (rd << 4) | (rt << 8) | (rt2 << 12));
-    gen_exception_internal_insn(s, 4, EXCP_STREX);
-}
-#else
-static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
-                                TCGv_i32 addr, int size)
-{
-    TCGv_i32 tmp;
-    TCGv_i64 val64, extaddr;
+    TCGv_i32 t0, t1, t2;
+    TCGv_i64 extaddr;
+    TCGv taddr;
     TCGLabel *done_label;
     TCGLabel *fail_label;
+    TCGMemOp opc = size | MO_ALIGN | s->be_data;
 
     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
          [addr] = {Rt};
@@ -7799,69 +7766,45 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
     tcg_temp_free_i64(extaddr);
 
-    tmp = tcg_temp_new_i32();
-    switch (size) {
-    case 0:
-        gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
-        break;
-    case 1:
-        gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
-        break;
-    case 2:
-    case 3:
-        gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
-        break;
-    default:
-        abort();
-    }
-
-    val64 = tcg_temp_new_i64();
+    taddr = gen_aa32_addr(s, addr, opc);
+    t0 = tcg_temp_new_i32();
+    t1 = load_reg(s, rt);
     if (size == 3) {
-        TCGv_i32 tmp2 = tcg_temp_new_i32();
-        TCGv_i32 tmp3 = tcg_temp_new_i32();
-        tcg_gen_addi_i32(tmp2, addr, 4);
-        gen_aa32_ld32u(s, tmp3, tmp2, get_mem_index(s));
-        tcg_temp_free_i32(tmp2);
-        tcg_gen_concat_i32_i64(val64, tmp, tmp3);
-        tcg_temp_free_i32(tmp3);
-    } else {
-        tcg_gen_extu_i32_i64(val64, tmp);
-    }
-    tcg_temp_free_i32(tmp);
+        TCGv_i64 o64 = tcg_temp_new_i64();
+        TCGv_i64 n64 = tcg_temp_new_i64();
 
-    tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
-    tcg_temp_free_i64(val64);
+        t2 = load_reg(s, rt2);
+        tcg_gen_concat_i32_i64(n64, t1, t2);
+        tcg_temp_free_i32(t2);
+        gen_aa32_frob64(s, n64);
 
-    tmp = load_reg(s, rt);
-    switch (size) {
-    case 0:
-        gen_aa32_st8(s, tmp, addr, get_mem_index(s));
-        break;
-    case 1:
-        gen_aa32_st16(s, tmp, addr, get_mem_index(s));
-        break;
-    case 2:
-    case 3:
-        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-        break;
-    default:
-        abort();
-    }
-    tcg_temp_free_i32(tmp);
-    if (size == 3) {
-        tcg_gen_addi_i32(addr, addr, 4);
-        tmp = load_reg(s, rt2);
-        gen_aa32_st32(s, tmp, addr, get_mem_index(s));
-        tcg_temp_free_i32(tmp);
+        tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
+                                   get_mem_index(s), opc);
+        tcg_temp_free_i64(n64);
+
+        gen_aa32_frob64(s, o64);
+        tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
+        tcg_gen_extrl_i64_i32(t0, o64);
+
+        tcg_temp_free_i64(o64);
+    } else {
+        t2 = tcg_temp_new_i32();
+        tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
+        tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
+        tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
+        tcg_temp_free_i32(t2);
     }
-    tcg_gen_movi_i32(cpu_R[rd], 0);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free(taddr);
+    tcg_gen_mov_i32(cpu_R[rd], t0);
+    tcg_temp_free_i32(t0);
     tcg_gen_br(done_label);
+
     gen_set_label(fail_label);
     tcg_gen_movi_i32(cpu_R[rd], 1);
     gen_set_label(done_label);
     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
 }
-#endif
 
 /* gen_srs:
  * @env: CPUARMState
@@ -8083,7 +8026,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
             case 4: /* dsb */
             case 5: /* dmb */
                 ARCH(7);
-                /* We don't emulate caches so these are a no-op.  */
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
                 return;
             case 6: /* isb */
                 /* We need to break the TB after this insn to execute
@@ -8381,6 +8324,10 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
         {
             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
             switch (op1) {
+            case 0:
+                /* HLT */
+                gen_hlt(s, imm16);
+                break;
             case 1:
                 /* bkpt */
                 ARCH(5);
@@ -8405,7 +8352,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                 gen_smc(s);
                 break;
             default:
-                goto illegal_op;
+                g_assert_not_reached();
             }
             break;
         }
@@ -8832,25 +8779,27 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                         }
                         tcg_temp_free_i32(addr);
                     } else {
+                        TCGv taddr;
+                        TCGMemOp opc = s->be_data;
+
                         /* SWP instruction */
                         rm = (insn) & 0xf;
 
-                        /* ??? This is not really atomic.  However we know
-                           we never have multiple CPUs running in parallel,
-                           so it is good enough.  */
-                        addr = load_reg(s, rn);
-                        tmp = load_reg(s, rm);
-                        tmp2 = tcg_temp_new_i32();
                         if (insn & (1 << 22)) {
-                            gen_aa32_ld8u(s, tmp2, addr, get_mem_index(s));
-                            gen_aa32_st8(s, tmp, addr, get_mem_index(s));
+                            opc |= MO_UB;
                         } else {
-                            gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
-                            gen_aa32_st32(s, tmp, addr, get_mem_index(s));
+                            opc |= MO_UL | MO_ALIGN;
                         }
-                        tcg_temp_free_i32(tmp);
+
+                        addr = load_reg(s, rn);
+                        taddr = gen_aa32_addr(s, addr, opc);
                         tcg_temp_free_i32(addr);
-                        store_reg(s, rd, tmp2);
+
+                        tmp = load_reg(s, rm);
+                        tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
+                                                get_mem_index(s), opc);
+                        tcg_temp_free(taddr);
+                        store_reg(s, rd, tmp);
                     }
                 }
             } else {
@@ -9361,6 +9310,8 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                             } else if (i == rn) {
                                 loaded_var = tmp;
                                 loaded_base = 1;
+                            } else if (rn == 15 && exc_return) {
+                                store_pc_exc_ret(s, tmp);
                             } else {
                                 store_reg_from_load(s, i, tmp);
                             }
@@ -10432,7 +10383,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
                             break;
                         case 4: /* dsb */
                         case 5: /* dmb */
-                            /* These execute as NOPs.  */
+                            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
                             break;
                         case 6: /* isb */
                             /* We need to break the TB after this insn
@@ -11435,19 +11386,33 @@ static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
             break;
         }
 
-        case 0xa: /* rev */
+        case 0xa: /* rev, and hlt */
+        {
+            int op1 = extract32(insn, 6, 2);
+
+            if (op1 == 2) {
+                /* HLT */
+                int imm6 = extract32(insn, 0, 6);
+
+                gen_hlt(s, imm6);
+                break;
+            }
+
+            /* Otherwise this is rev */
             ARCH(6);
             rn = (insn >> 3) & 0x7;
             rd = insn & 0x7;
             tmp = load_reg(s, rn);
-            switch ((insn >> 6) & 3) {
+            switch (op1) {
             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
             case 1: gen_rev16(tmp); break;
             case 3: gen_revsh(tmp); break;
-            default: goto illegal_op;
+            default:
+                g_assert_not_reached();
             }
             store_reg(s, rd, tmp);
             break;
+        }
 
         case 6:
             switch ((insn >> 5) & 7) {
@@ -11998,11 +11963,13 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
         qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start,
                          dc->thumb | (dc->sctlr_b << 1));
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
     tb->size = dc->pc - pc_start;
diff --git a/target-arm/translate.h b/target-arm/translate.h
index dbd7ac83d5..285e96f087 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -22,6 +22,8 @@ typedef struct DisasContext {
     int user;
 #endif
     ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
+    bool tbi0;         /* TBI0 for EL0/1 or TBI for EL2/3 */
+    bool tbi1;         /* TBI1 for EL0/1, not used for EL2/3 */
     bool ns;        /* Use non-secure CPREG bank on access */
     int fp_excp_el; /* FP exception EL or 0 if enabled */
     /* Flag indicating that exceptions from secure mode are routed to EL3. */
@@ -77,10 +79,6 @@ extern TCGv_env cpu_env;
 extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 extern TCGv_i64 cpu_exclusive_addr;
 extern TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-extern TCGv_i64 cpu_exclusive_test;
-extern TCGv_i32 cpu_exclusive_info;
-#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
diff --git a/target-cris/cpu.c b/target-cris/cpu.c
index c5a656bb62..2e9ab9700e 100644
--- a/target-cris/cpu.c
+++ b/target-cris/cpu.c
@@ -142,6 +142,13 @@ static void cris_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     CRISCPUClass *ccc = CRIS_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cpu_reset(cs);
     qemu_init_vcpu(cs);
@@ -187,7 +194,6 @@ static void cris_cpu_initfn(Object *obj)
     static bool tcg_initialized;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     env->pregs[PR_VR] = ccc->vr;
 
@@ -246,6 +252,16 @@ static void crisv11_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_read_register = crisv10_cpu_gdb_read_register;
 }
 
+static void crisv17_cpu_class_init(ObjectClass *oc, void *data)
+{
+    CPUClass *cc = CPU_CLASS(oc);
+    CRISCPUClass *ccc = CRIS_CPU_CLASS(oc);
+
+    ccc->vr = 17;
+    cc->do_interrupt = crisv10_cpu_do_interrupt;
+    cc->gdb_read_register = crisv10_cpu_gdb_read_register;
+}
+
 static void crisv32_cpu_class_init(ObjectClass *oc, void *data)
 {
     CRISCPUClass *ccc = CRIS_CPU_CLASS(oc);
@@ -272,6 +288,10 @@ static const TypeInfo cris_cpu_model_type_infos[] = {
         .name = TYPE("crisv11"),
         .parent = TYPE_CRIS_CPU,
         .class_init = crisv11_cpu_class_init,
+    }, {
+        .name = TYPE("crisv17"),
+        .parent = TYPE_CRIS_CPU,
+        .class_init = crisv17_cpu_class_init,
     }, {
         .name = TYPE("crisv32"),
         .parent = TYPE_CRIS_CPU,
@@ -312,13 +332,6 @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_stop_before_watchpoint = true;
 
     cc->disas_set_info = cris_disas_set_info;
-
-    /*
-     * Reason: cris_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo cris_cpu_type_info = {
diff --git a/target-cris/cpu.h b/target-cris/cpu.h
index 7d7fe6eb1c..43d5f9d1da 100644
--- a/target-cris/cpu.h
+++ b/target-cris/cpu.h
@@ -223,6 +223,13 @@ int cpu_cris_signal_handler(int host_signum, void *pinfo,
 void cris_initialize_tcg(void);
 void cris_initialize_crisv10_tcg(void);
 
+/* Instead of computing the condition codes after each CRIS instruction,
+ * QEMU just stores one operand (called CC_SRC), the result
+ * (called CC_DEST) and the type of operation (called CC_OP). When the
+ * condition codes are needed, the condition codes can be calculated
+ * using this information. Condition codes are not generated if they
+ * are only needed for conditional branches.
+ */
 enum {
     CC_OP_DYNAMIC, /* Use env->cc_op  */
     CC_OP_FLAGS,
diff --git a/target-cris/crisv10-decode.h b/target-cris/crisv10-decode.h
index 587fbdd278..bdb4b6d318 100644
--- a/target-cris/crisv10-decode.h
+++ b/target-cris/crisv10-decode.h
@@ -92,6 +92,7 @@
 #define CRISV10_IND_JUMP_M       4
 #define CRISV10_IND_DIP          5
 #define CRISV10_IND_JUMP_R       6
+#define CRISV17_IND_ADDC         6
 #define CRISV10_IND_BOUND        7
 #define CRISV10_IND_BCC_M        7
 #define CRISV10_IND_MOVE_M_SPR   8
diff --git a/target-cris/translate.c b/target-cris/translate.c
index f4a8d7d000..b91042743f 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -140,14 +140,14 @@ static void gen_BUG(DisasContext *dc, const char *file, int line)
     cpu_abort(CPU(dc->cpu), "%s:%d\n", file, line);
 }
 
-static const char *regnames[] =
+static const char *regnames_v32[] =
 {
     "$r0", "$r1", "$r2", "$r3",
     "$r4", "$r5", "$r6", "$r7",
     "$r8", "$r9", "$r10", "$r11",
     "$r12", "$r13", "$sp", "$acr",
 };
-static const char *pregnames[] =
+static const char *pregnames_v32[] =
 {
     "$bz", "$vr", "$pid", "$srs",
     "$wz", "$exs", "$eda", "$mof",
@@ -3135,29 +3135,6 @@ void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb)
 
     dc->cpustate_changed = 0;
 
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
-        qemu_log(
-                "pc=%x %x flg=%" PRIx64 " bt=%x ds=%u ccs=%x\n"
-                "pid=%x usp=%x\n"
-                "%x.%x.%x.%x\n"
-                "%x.%x.%x.%x\n"
-                "%x.%x.%x.%x\n"
-                "%x.%x.%x.%x\n",
-                dc->pc, dc->ppc,
-                (uint64_t)tb->flags,
-                env->btarget, (unsigned)tb->flags & 7,
-                env->pregs[PR_CCS],
-                env->pregs[PR_PID], env->pregs[PR_USP],
-                env->regs[0], env->regs[1], env->regs[2], env->regs[3],
-                env->regs[4], env->regs[5], env->regs[6], env->regs[7],
-                env->regs[8], env->regs[9],
-                env->regs[10], env->regs[11],
-                env->regs[12], env->regs[13],
-                env->regs[14], env->regs[15]);
-        qemu_log("--------------\n");
-        qemu_log("IN: %s\n", lookup_symbol(pc_start));
-    }
-
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -3313,10 +3290,14 @@ void gen_intermediate_code(CPUCRISState *env, struct TranslationBlock *tb)
 #if !DISAS_CRIS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
+        qemu_log("--------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start,
                          env->pregs[PR_VR]);
         qemu_log("\nisize=%d osize=%d\n",
                  dc->pc - pc_start, tcg_op_buf_count());
+        qemu_log_unlock();
     }
 #endif
 #endif
@@ -3327,12 +3308,20 @@ void cris_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
 {
     CRISCPU *cpu = CRIS_CPU(cs);
     CPUCRISState *env = &cpu->env;
+    const char **regnames;
+    const char **pregnames;
     int i;
-    uint32_t srs;
 
     if (!env || !f) {
         return;
     }
+    if (env->pregs[PR_VR] < 32) {
+        pregnames = pregnames_v10;
+        regnames = regnames_v10;
+    } else {
+        pregnames = pregnames_v32;
+        regnames = regnames_v32;
+    }
 
     cpu_fprintf(f, "PC=%x CCS=%x btaken=%d btarget=%x\n"
             "cc_op=%d cc_src=%d cc_dest=%d cc_result=%x cc_mask=%x\n",
@@ -3354,14 +3343,16 @@ void cris_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
             cpu_fprintf(f, "\n");
         }
     }
-    srs = env->pregs[PR_SRS];
-    cpu_fprintf(f, "\nsupport function regs bank %x:\n", srs);
-    if (srs < ARRAY_SIZE(env->sregs)) {
-        for (i = 0; i < 16; i++) {
-            cpu_fprintf(f, "s%2.2d=%8.8x ",
-                    i, env->sregs[srs][i]);
-            if ((i + 1) % 4 == 0) {
-                cpu_fprintf(f, "\n");
+    if (env->pregs[PR_VR] >= 32) {
+        uint32_t srs = env->pregs[PR_SRS];
+        cpu_fprintf(f, "\nsupport function regs bank %x:\n", srs);
+        if (srs < ARRAY_SIZE(env->sregs)) {
+            for (i = 0; i < 16; i++) {
+                cpu_fprintf(f, "s%2.2d=%8.8x ",
+                        i, env->sregs[srs][i]);
+                if ((i + 1) % 4 == 0) {
+                    cpu_fprintf(f, "\n");
+                }
             }
         }
     }
@@ -3406,12 +3397,12 @@ void cris_initialize_tcg(void)
     for (i = 0; i < 16; i++) {
         cpu_R[i] = tcg_global_mem_new(cpu_env,
                                       offsetof(CPUCRISState, regs[i]),
-                                      regnames[i]);
+                                      regnames_v32[i]);
     }
     for (i = 0; i < 16; i++) {
         cpu_PR[i] = tcg_global_mem_new(cpu_env,
                                        offsetof(CPUCRISState, pregs[i]),
-                                       pregnames[i]);
+                                       pregnames_v32[i]);
     }
 }
 
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index 4707a18e77..4a0b485d8e 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -1094,6 +1094,29 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
                 insn_len = dec10_bdap_m(env, dc, size);
                 break;
             default:
+            /*
+             * ADDC for v17:
+             *
+             * Instruction format: ADDC [Rs],Rd
+             *
+             *  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+-+
+             *  |Destination(Rd)| 1   0   0   1   1   0   1   0 |   Source(Rs)|
+             *  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+--+--+
+             *
+             * Instruction format: ADDC [Rs+],Rd
+             *
+             *  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+-+
+             *  |Destination(Rd)| 1   1   0   1   1   0   1   0 |   Source(Rs)|
+             *  +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+-+
+             */
+                if (dc->opcode == CRISV17_IND_ADDC && dc->size == 2 &&
+                    env->pregs[PR_VR] == 17) {
+                    LOG_DIS("addc op=%d %d\n",  dc->src, dc->dst);
+                    cris_cc_mask(dc, CC_MASK_NZVC);
+                    insn_len += dec10_ind_alu(env, dc, CC_OP_ADDC, size);
+                    break;
+                }
+
                 LOG_DIS("pc=%x var-ind.%d %d r%d r%d\n",
                           dc->pc, size, dc->opcode, dc->src, dc->dst);
                 cpu_abort(CPU(dc->cpu), "Unhandled opcode");
diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h
index 5dde658ac4..7c9a07ae65 100644
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -63,7 +63,12 @@ typedef struct X86CPUClass {
 
     bool kvm_required;
 
+    /* Optional description of CPU model.
+     * If unavailable, cpu_def->model_id is used */
+    const char *model_description;
+
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
     void (*parent_reset)(CPUState *cpu);
 } X86CPUClass;
 
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 6a1afab595..de1f30eeda 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -57,6 +57,7 @@
 #define CPUID_2_L1D_32KB_8WAY_64B 0x2c
 #define CPUID_2_L1I_32KB_8WAY_64B 0x30
 #define CPUID_2_L2_2MB_8WAY_64B   0x7d
+#define CPUID_2_L3_16MB_16WAY_64B 0x4d
 
 
 /* CPUID Leaf 4 constants: */
@@ -131,11 +132,18 @@
 #define L2_LINES_PER_TAG       1
 #define L2_SIZE_KB_AMD       512
 
-/* No L3 cache: */
+/* Level 3 unified cache: */
 #define L3_SIZE_KB             0 /* disabled */
 #define L3_ASSOCIATIVITY       0 /* disabled */
 #define L3_LINES_PER_TAG       0 /* disabled */
 #define L3_LINE_SIZE           0 /* disabled */
+#define L3_N_LINE_SIZE         64
+#define L3_N_ASSOCIATIVITY     16
+#define L3_N_SETS           16384
+#define L3_N_PARTITIONS         1
+#define L3_N_DESCRIPTOR CPUID_2_L3_16MB_16WAY_64B
+#define L3_N_LINES_PER_TAG      1
+#define L3_N_SIZE_KB_AMD    16384
 
 /* TLB definitions: */
 
@@ -173,181 +181,6 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
     dst[CPUID_VENDOR_SZ] = '\0';
 }
 
-/* feature flags taken from "Intel Processor Identification and the CPUID
- * Instruction" and AMD's "CPUID Specification".  In cases of disagreement
- * between feature naming conventions, aliases may be added.
- */
-static const char *feature_name[] = {
-    "fpu", "vme", "de", "pse",
-    "tsc", "msr", "pae", "mce",
-    "cx8", "apic", NULL, "sep",
-    "mtrr", "pge", "mca", "cmov",
-    "pat", "pse36", "pn" /* Intel psn */, "clflush" /* Intel clfsh */,
-    NULL, "ds" /* Intel dts */, "acpi", "mmx",
-    "fxsr", "sse", "sse2", "ss",
-    "ht" /* Intel htt */, "tm", "ia64", "pbe",
-};
-static const char *ext_feature_name[] = {
-    "pni|sse3" /* Intel,AMD sse3 */, "pclmulqdq|pclmuldq", "dtes64", "monitor",
-    "ds_cpl", "vmx", "smx", "est",
-    "tm2", "ssse3", "cid", NULL,
-    "fma", "cx16", "xtpr", "pdcm",
-    NULL, "pcid", "dca", "sse4.1|sse4_1",
-    "sse4.2|sse4_2", "x2apic", "movbe", "popcnt",
-    "tsc-deadline", "aes", "xsave", "osxsave",
-    "avx", "f16c", "rdrand", "hypervisor",
-};
-/* Feature names that are already defined on feature_name[] but are set on
- * CPUID[8000_0001].EDX on AMD CPUs don't have their names on
- * ext2_feature_name[]. They are copied automatically to cpuid_ext2_features
- * if and only if CPU vendor is AMD.
- */
-static const char *ext2_feature_name[] = {
-    NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */,
-    NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */,
-    NULL /* cx8 */ /* AMD CMPXCHG8B */, NULL /* apic */, NULL, "syscall",
-    NULL /* mtrr */, NULL /* pge */, NULL /* mca */, NULL /* cmov */,
-    NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
-    "nx|xd", NULL, "mmxext", NULL /* mmx */,
-    NULL /* fxsr */, "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
-    NULL, "lm|i64", "3dnowext", "3dnow",
-};
-static const char *ext3_feature_name[] = {
-    "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */,
-    "cr8legacy" /* AMD AltMovCr8 */, "abm", "sse4a", "misalignsse",
-    "3dnowprefetch", "osvw", "ibs", "xop",
-    "skinit", "wdt", NULL, "lwp",
-    "fma4", "tce", NULL, "nodeid_msr",
-    NULL, "tbm", "topoext", "perfctr_core",
-    "perfctr_nb", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *ext4_feature_name[] = {
-    NULL, NULL, "xstore", "xstore-en",
-    NULL, NULL, "xcrypt", "xcrypt-en",
-    "ace2", "ace2-en", "phe", "phe-en",
-    "pmm", "pmm-en", NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *kvm_feature_name[] = {
-    "kvmclock", "kvm_nopiodelay", "kvm_mmu", "kvmclock",
-    "kvm_asyncpf", "kvm_steal_time", "kvm_pv_eoi", "kvm_pv_unhalt",
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    "kvmclock-stable-bit", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *hyperv_priv_feature_name[] = {
-    NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */,
-    NULL /* hv_msr_synic_access */, NULL /* hv_msr_stimer_access */,
-    NULL /* hv_msr_apic_access */, NULL /* hv_msr_hypercall_access */,
-    NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */,
-    NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */,
-    NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *hyperv_ident_feature_name[] = {
-    NULL /* hv_create_partitions */, NULL /* hv_access_partition_id */,
-    NULL /* hv_access_memory_pool */, NULL /* hv_adjust_message_buffers */,
-    NULL /* hv_post_messages */, NULL /* hv_signal_events */,
-    NULL /* hv_create_port */, NULL /* hv_connect_port */,
-    NULL /* hv_access_stats */, NULL, NULL, NULL /* hv_debugging */,
-    NULL /* hv_cpu_power_management */, NULL /* hv_configure_profiler */,
-    NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *hyperv_misc_feature_name[] = {
-    NULL /* hv_mwait */, NULL /* hv_guest_debugging */,
-    NULL /* hv_perf_monitor */, NULL /* hv_cpu_dynamic_part */,
-    NULL /* hv_hypercall_params_xmm */, NULL /* hv_guest_idle_state */,
-    NULL, NULL,
-    NULL, NULL, NULL /* hv_guest_crash_msr */, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *svm_feature_name[] = {
-    "npt", "lbrv", "svm_lock", "nrip_save",
-    "tsc_scale", "vmcb_clean",  "flushbyasid", "decodeassists",
-    NULL, NULL, "pause_filter", NULL,
-    "pfthreshold", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *cpuid_7_0_ebx_feature_name[] = {
-    "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
-    "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
-    "avx512f", NULL, "rdseed", "adx", "smap", NULL, "pcommit", "clflushopt",
-    "clwb", NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
-};
-
-static const char *cpuid_7_0_ecx_feature_name[] = {
-    NULL, NULL, "umip", "pku",
-    "ospke", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, "rdpid", NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *cpuid_apm_edx_feature_name[] = {
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    "invtsc", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *cpuid_xsave_feature_name[] = {
-    "xsaveopt", "xsavec", "xgetbv1", "xsaves",
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-static const char *cpuid_6_feature_name[] = {
-    NULL, NULL, "arat", NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
 #define I486_FEATURES (CPUID_FP87 | CPUID_VME | CPUID_PSE)
 #define PENTIUM_FEATURES (I486_FEATURES | CPUID_DE | CPUID_TSC | \
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_MMX | CPUID_APIC)
@@ -406,6 +239,7 @@ static const char *cpuid_6_feature_name[] = {
           CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
           CPUID_7_0_EBX_RDSEED */
 #define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
+#define TCG_7_0_EDX_FEATURES 0
 #define TCG_APM_FEATURES 0
 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
 #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@@ -413,96 +247,282 @@ static const char *cpuid_6_feature_name[] = {
           CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */
 
 typedef struct FeatureWordInfo {
-    const char **feat_names;
+    /* feature flags names are taken from "Intel Processor Identification and
+     * the CPUID Instruction" and AMD's "CPUID Specification".
+     * In cases of disagreement between feature naming conventions,
+     * aliases may be added.
+     */
+    const char *feat_names[32];
     uint32_t cpuid_eax;   /* Input EAX for CPUID */
     bool cpuid_needs_ecx; /* CPUID instruction uses ECX as input */
     uint32_t cpuid_ecx;   /* Input ECX value for CPUID */
     int cpuid_reg;        /* output register (R_* constant) */
     uint32_t tcg_features; /* Feature flags supported by TCG */
     uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */
+    uint32_t migratable_flags; /* Feature flags known to be migratable */
 } FeatureWordInfo;
 
 static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
     [FEAT_1_EDX] = {
-        .feat_names = feature_name,
+        .feat_names = {
+            "fpu", "vme", "de", "pse",
+            "tsc", "msr", "pae", "mce",
+            "cx8", "apic", NULL, "sep",
+            "mtrr", "pge", "mca", "cmov",
+            "pat", "pse36", "pn" /* Intel psn */, "clflush" /* Intel clfsh */,
+            NULL, "ds" /* Intel dts */, "acpi", "mmx",
+            "fxsr", "sse", "sse2", "ss",
+            "ht" /* Intel htt */, "tm", "ia64", "pbe",
+        },
         .cpuid_eax = 1, .cpuid_reg = R_EDX,
         .tcg_features = TCG_FEATURES,
     },
     [FEAT_1_ECX] = {
-        .feat_names = ext_feature_name,
+        .feat_names = {
+            "pni" /* Intel,AMD sse3 */, "pclmulqdq", "dtes64", "monitor",
+            "ds-cpl", "vmx", "smx", "est",
+            "tm2", "ssse3", "cid", NULL,
+            "fma", "cx16", "xtpr", "pdcm",
+            NULL, "pcid", "dca", "sse4.1",
+            "sse4.2", "x2apic", "movbe", "popcnt",
+            "tsc-deadline", "aes", "xsave", "osxsave",
+            "avx", "f16c", "rdrand", "hypervisor",
+        },
         .cpuid_eax = 1, .cpuid_reg = R_ECX,
         .tcg_features = TCG_EXT_FEATURES,
     },
+    /* Feature names that are already defined on feature_name[] but
+     * are set on CPUID[8000_0001].EDX on AMD CPUs don't have their
+     * names on feat_names below. They are copied automatically
+     * to features[FEAT_8000_0001_EDX] if and only if CPU vendor is AMD.
+     */
     [FEAT_8000_0001_EDX] = {
-        .feat_names = ext2_feature_name,
+        .feat_names = {
+            NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */,
+            NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */,
+            NULL /* cx8 */, NULL /* apic */, NULL, "syscall",
+            NULL /* mtrr */, NULL /* pge */, NULL /* mca */, NULL /* cmov */,
+            NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
+            "nx", NULL, "mmxext", NULL /* mmx */,
+            NULL /* fxsr */, "fxsr-opt", "pdpe1gb", "rdtscp",
+            NULL, "lm", "3dnowext", "3dnow",
+        },
         .cpuid_eax = 0x80000001, .cpuid_reg = R_EDX,
         .tcg_features = TCG_EXT2_FEATURES,
     },
     [FEAT_8000_0001_ECX] = {
-        .feat_names = ext3_feature_name,
+        .feat_names = {
+            "lahf-lm", "cmp-legacy", "svm", "extapic",
+            "cr8legacy", "abm", "sse4a", "misalignsse",
+            "3dnowprefetch", "osvw", "ibs", "xop",
+            "skinit", "wdt", NULL, "lwp",
+            "fma4", "tce", NULL, "nodeid-msr",
+            NULL, "tbm", "topoext", "perfctr-core",
+            "perfctr-nb", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x80000001, .cpuid_reg = R_ECX,
         .tcg_features = TCG_EXT3_FEATURES,
     },
     [FEAT_C000_0001_EDX] = {
-        .feat_names = ext4_feature_name,
+        .feat_names = {
+            NULL, NULL, "xstore", "xstore-en",
+            NULL, NULL, "xcrypt", "xcrypt-en",
+            "ace2", "ace2-en", "phe", "phe-en",
+            "pmm", "pmm-en", NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0xC0000001, .cpuid_reg = R_EDX,
         .tcg_features = TCG_EXT4_FEATURES,
     },
     [FEAT_KVM] = {
-        .feat_names = kvm_feature_name,
+        .feat_names = {
+            "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
+            "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt",
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            "kvmclock-stable-bit", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX,
         .tcg_features = TCG_KVM_FEATURES,
     },
     [FEAT_HYPERV_EAX] = {
-        .feat_names = hyperv_priv_feature_name,
+        .feat_names = {
+            NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */,
+            NULL /* hv_msr_synic_access */, NULL /* hv_msr_stimer_access */,
+            NULL /* hv_msr_apic_access */, NULL /* hv_msr_hypercall_access */,
+            NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */,
+            NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */,
+            NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x40000003, .cpuid_reg = R_EAX,
     },
     [FEAT_HYPERV_EBX] = {
-        .feat_names = hyperv_ident_feature_name,
+        .feat_names = {
+            NULL /* hv_create_partitions */, NULL /* hv_access_partition_id */,
+            NULL /* hv_access_memory_pool */, NULL /* hv_adjust_message_buffers */,
+            NULL /* hv_post_messages */, NULL /* hv_signal_events */,
+            NULL /* hv_create_port */, NULL /* hv_connect_port */,
+            NULL /* hv_access_stats */, NULL, NULL, NULL /* hv_debugging */,
+            NULL /* hv_cpu_power_management */, NULL /* hv_configure_profiler */,
+            NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x40000003, .cpuid_reg = R_EBX,
     },
     [FEAT_HYPERV_EDX] = {
-        .feat_names = hyperv_misc_feature_name,
+        .feat_names = {
+            NULL /* hv_mwait */, NULL /* hv_guest_debugging */,
+            NULL /* hv_perf_monitor */, NULL /* hv_cpu_dynamic_part */,
+            NULL /* hv_hypercall_params_xmm */, NULL /* hv_guest_idle_state */,
+            NULL, NULL,
+            NULL, NULL, NULL /* hv_guest_crash_msr */, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x40000003, .cpuid_reg = R_EDX,
     },
     [FEAT_SVM] = {
-        .feat_names = svm_feature_name,
+        .feat_names = {
+            "npt", "lbrv", "svm-lock", "nrip-save",
+            "tsc-scale", "vmcb-clean",  "flushbyasid", "decodeassists",
+            NULL, NULL, "pause-filter", NULL,
+            "pfthreshold", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x8000000A, .cpuid_reg = R_EDX,
         .tcg_features = TCG_SVM_FEATURES,
     },
     [FEAT_7_0_EBX] = {
-        .feat_names = cpuid_7_0_ebx_feature_name,
+        .feat_names = {
+            "fsgsbase", "tsc-adjust", NULL, "bmi1",
+            "hle", "avx2", NULL, "smep",
+            "bmi2", "erms", "invpcid", "rtm",
+            NULL, NULL, "mpx", NULL,
+            "avx512f", "avx512dq", "rdseed", "adx",
+            "smap", "avx512ifma", "pcommit", "clflushopt",
+            "clwb", NULL, "avx512pf", "avx512er",
+            "avx512cd", NULL, "avx512bw", "avx512vl",
+        },
         .cpuid_eax = 7,
         .cpuid_needs_ecx = true, .cpuid_ecx = 0,
         .cpuid_reg = R_EBX,
         .tcg_features = TCG_7_0_EBX_FEATURES,
     },
     [FEAT_7_0_ECX] = {
-        .feat_names = cpuid_7_0_ecx_feature_name,
+        .feat_names = {
+            NULL, "avx512vbmi", "umip", "pku",
+            "ospke", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, "rdpid", NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 7,
         .cpuid_needs_ecx = true, .cpuid_ecx = 0,
         .cpuid_reg = R_ECX,
         .tcg_features = TCG_7_0_ECX_FEATURES,
     },
+    [FEAT_7_0_EDX] = {
+        .feat_names = {
+            NULL, NULL, "avx512-4vnniw", "avx512-4fmaps",
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
+        .cpuid_eax = 7,
+        .cpuid_needs_ecx = true, .cpuid_ecx = 0,
+        .cpuid_reg = R_EDX,
+        .tcg_features = TCG_7_0_EDX_FEATURES,
+    },
     [FEAT_8000_0007_EDX] = {
-        .feat_names = cpuid_apm_edx_feature_name,
+        .feat_names = {
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            "invtsc", NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0x80000007,
         .cpuid_reg = R_EDX,
         .tcg_features = TCG_APM_FEATURES,
         .unmigratable_flags = CPUID_APM_INVTSC,
     },
     [FEAT_XSAVE] = {
-        .feat_names = cpuid_xsave_feature_name,
+        .feat_names = {
+            "xsaveopt", "xsavec", "xgetbv1", "xsaves",
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 0xd,
         .cpuid_needs_ecx = true, .cpuid_ecx = 1,
         .cpuid_reg = R_EAX,
         .tcg_features = TCG_XSAVE_FEATURES,
     },
     [FEAT_6_EAX] = {
-        .feat_names = cpuid_6_feature_name,
+        .feat_names = {
+            NULL, NULL, "arat", NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+            NULL, NULL, NULL, NULL,
+        },
         .cpuid_eax = 6, .cpuid_reg = R_EAX,
         .tcg_features = TCG_6_EAX_FEATURES,
     },
+    [FEAT_XSAVE_COMP_LO] = {
+        .cpuid_eax = 0xD,
+        .cpuid_needs_ecx = true, .cpuid_ecx = 0,
+        .cpuid_reg = R_EAX,
+        .tcg_features = ~0U,
+        .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK |
+            XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK |
+            XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK |
+            XSTATE_PKRU_MASK,
+    },
+    [FEAT_XSAVE_COMP_HI] = {
+        .cpuid_eax = 0xD,
+        .cpuid_needs_ecx = true, .cpuid_ecx = 0,
+        .cpuid_reg = R_EDX,
+        .tcg_features = ~0U,
+    },
 };
 
 typedef struct X86RegisterInfo32 {
@@ -526,7 +546,26 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
 };
 #undef REGISTER
 
-const ExtSaveArea x86_ext_save_areas[] = {
+typedef struct ExtSaveArea {
+    uint32_t feature, bits;
+    uint32_t offset, size;
+} ExtSaveArea;
+
+static const ExtSaveArea x86_ext_save_areas[] = {
+    [XSTATE_FP_BIT] = {
+        /* x87 FP state component is always enabled if XSAVE is supported */
+        .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE,
+        /* x87 state is in the legacy region of the XSAVE area */
+        .offset = 0,
+        .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader),
+    },
+    [XSTATE_SSE_BIT] = {
+        /* SSE state component is always enabled if XSAVE is supported */
+        .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE,
+        /* SSE state is in the legacy region of the XSAVE area */
+        .offset = 0,
+        .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader),
+    },
     [XSTATE_YMM_BIT] =
           { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
             .offset = offsetof(X86XSaveArea, avx_state),
@@ -557,6 +596,26 @@ const ExtSaveArea x86_ext_save_areas[] = {
             .size = sizeof(XSavePKRU) },
 };
 
+static uint32_t xsave_area_size(uint64_t mask)
+{
+    int i;
+    uint64_t ret = 0;
+
+    for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) {
+        const ExtSaveArea *esa = &x86_ext_save_areas[i];
+        if ((mask >> i) & 1) {
+            ret = MAX(ret, esa->offset + esa->size);
+        }
+    }
+    return ret;
+}
+
+static inline uint64_t x86_cpu_xsave_components(X86CPU *cpu)
+{
+    return ((uint64_t)cpu->env.features[FEAT_XSAVE_COMP_HI]) << 32 |
+           cpu->env.features[FEAT_XSAVE_COMP_LO];
+}
+
 const char *get_register_name_32(unsigned int reg)
 {
     if (reg >= CPU_NB_REGS32) {
@@ -577,15 +636,13 @@ static uint32_t x86_cpu_get_migratable_flags(FeatureWord w)
 
     for (i = 0; i < 32; i++) {
         uint32_t f = 1U << i;
-        /* If the feature name is unknown, it is not supported by QEMU yet */
-        if (!wi->feat_names[i]) {
-            continue;
-        }
-        /* Skip features known to QEMU, but explicitly marked as unmigratable */
-        if (wi->unmigratable_flags & f) {
-            continue;
+
+        /* If the feature name is known, it is implicitly considered migratable,
+         * unless it is explicitly set in unmigratable_flags */
+        if ((wi->migratable_flags & f) ||
+            (wi->feat_names[i] && !(wi->unmigratable_flags & f))) {
+            r |= f;
         }
-        r |= f;
     }
     return r;
 }
@@ -624,86 +681,6 @@ void host_cpuid(uint32_t function, uint32_t count,
         *edx = vec[3];
 }
 
-#define iswhite(c) ((c) && ((c) <= ' ' || '~' < (c)))
-
-/* general substring compare of *[s1..e1) and *[s2..e2).  sx is start of
- * a substring.  ex if !NULL points to the first char after a substring,
- * otherwise the string is assumed to sized by a terminating nul.
- * Return lexical ordering of *s1:*s2.
- */
-static int sstrcmp(const char *s1, const char *e1,
-                   const char *s2, const char *e2)
-{
-    for (;;) {
-        if (!*s1 || !*s2 || *s1 != *s2)
-            return (*s1 - *s2);
-        ++s1, ++s2;
-        if (s1 == e1 && s2 == e2)
-            return (0);
-        else if (s1 == e1)
-            return (*s2);
-        else if (s2 == e2)
-            return (*s1);
-    }
-}
-
-/* compare *[s..e) to *altstr.  *altstr may be a simple string or multiple
- * '|' delimited (possibly empty) strings in which case search for a match
- * within the alternatives proceeds left to right.  Return 0 for success,
- * non-zero otherwise.
- */
-static int altcmp(const char *s, const char *e, const char *altstr)
-{
-    const char *p, *q;
-
-    for (q = p = altstr; ; ) {
-        while (*p && *p != '|')
-            ++p;
-        if ((q == p && !*s) || (q != p && !sstrcmp(s, e, q, p)))
-            return (0);
-        if (!*p)
-            return (1);
-        else
-            q = ++p;
-    }
-}
-
-/* search featureset for flag *[s..e), if found set corresponding bit in
- * *pval and return true, otherwise return false
- */
-static bool lookup_feature(uint32_t *pval, const char *s, const char *e,
-                           const char **featureset)
-{
-    uint32_t mask;
-    const char **ppc;
-    bool found = false;
-
-    for (mask = 1, ppc = featureset; mask; mask <<= 1, ++ppc) {
-        if (*ppc && !altcmp(s, e, *ppc)) {
-            *pval |= mask;
-            found = true;
-        }
-    }
-    return found;
-}
-
-static void add_flagname_to_bitmaps(const char *flagname,
-                                    FeatureWordArray words,
-                                    Error **errp)
-{
-    FeatureWord w;
-    for (w = 0; w < FEATURE_WORDS; w++) {
-        FeatureWordInfo *wi = &feature_word_info[w];
-        if (wi->feat_names &&
-            lookup_feature(&words[w], flagname, NULL, wi->feat_names)) {
-            break;
-        }
-    }
-    if (w == FEATURE_WORDS) {
-        error_setg(errp, "CPU feature %s not found", flagname);
-    }
-}
-
 /* CPU class name definitions: */
 
 #define X86_CPU_TYPE_SUFFIX "-" TYPE_X86_CPU
@@ -744,7 +721,6 @@ struct X86CPUDefinition {
     const char *name;
     uint32_t level;
     uint32_t xlevel;
-    uint32_t xlevel2;
     /* vendor is zero-terminated, 12 character ASCII string */
     char vendor[CPUID_VENDOR_SZ + 1];
     int family;
@@ -1404,9 +1380,9 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .name = "Opteron_G3",
         .level = 5,
         .vendor = CPUID_VENDOR_AMD,
-        .family = 15,
-        .model = 6,
-        .stepping = 1,
+        .family = 16,
+        .model = 2,
+        .stepping = 3,
         .features[FEAT_1_EDX] =
             CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
@@ -1526,6 +1502,14 @@ static PropValue kvm_default_props[] = {
     { NULL, NULL },
 };
 
+/* TCG-specific defaults that override all CPU models when using TCG
+ */
+static PropValue tcg_default_props[] = {
+    { "vme", "off" },
+    { NULL, NULL },
+};
+
+
 void x86_cpu_change_kvm_default(const char *prop, const char *value)
 {
     PropValue *pv;
@@ -1604,6 +1588,9 @@ static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
     cpu_x86_fill_model_id(host_cpudef.model_id);
 
     xcc->cpu_def = &host_cpudef;
+    xcc->model_description =
+        "KVM processor with all supported host features "
+        "(only available in KVM mode)";
 
     /* level, xlevel, xlevel2, and the feature words are initialized on
      * instance_init, because they require KVM to be initialized.
@@ -1627,9 +1614,12 @@ static void host_x86_cpu_initfn(Object *obj)
 
     /* If KVM is disabled, x86_cpu_realizefn() will report an error later */
     if (kvm_enabled()) {
-        env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
-        env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
-        env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
+        env->cpuid_min_level =
+            kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
+        env->cpuid_min_xlevel =
+            kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
+        env->cpuid_min_xlevel2 =
+            kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
 
         if (lmce_supported()) {
             object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort);
@@ -1972,13 +1962,38 @@ static inline void feat2prop(char *s)
     }
 }
 
+/* Return the feature property name for a feature flag bit */
+static const char *x86_cpu_feature_name(FeatureWord w, int bitnr)
+{
+    /* XSAVE components are automatically enabled by other features,
+     * so return the original feature name instead
+     */
+    if (w == FEAT_XSAVE_COMP_LO || w == FEAT_XSAVE_COMP_HI) {
+        int comp = (w == FEAT_XSAVE_COMP_HI) ? bitnr + 32 : bitnr;
+
+        if (comp < ARRAY_SIZE(x86_ext_save_areas) &&
+            x86_ext_save_areas[comp].bits) {
+            w = x86_ext_save_areas[comp].feature;
+            bitnr = ctz32(x86_ext_save_areas[comp].bits);
+        }
+    }
+
+    assert(bitnr < 32);
+    assert(w < FEATURE_WORDS);
+    return feature_word_info[w].feat_names[bitnr];
+}
+
 /* Compatibily hack to maintain legacy +-feat semantic,
  * where +-feat overwrites any feature set by
  * feat=on|feat even if the later is parsed after +-feat
  * (i.e. "-x2apic,x2apic=on" will result in x2apic disabled)
  */
-static FeatureWordArray plus_features = { 0 };
-static FeatureWordArray minus_features = { 0 };
+static GList *plus_features, *minus_features;
+
+static gint compare_string(gconstpointer a, gconstpointer b)
+{
+    return g_strcmp0(a, b);
+}
 
 /* Parse "+feature,-feature,feature=foo" CPU feature string
  */
@@ -1986,8 +2001,8 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
                                      Error **errp)
 {
     char *featurestr; /* Single 'key=value" string being parsed */
-    Error *local_err = NULL;
     static bool cpu_globals_initialized;
+    bool ambiguous = false;
 
     if (cpu_globals_initialized) {
         return;
@@ -1999,7 +2014,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
     }
 
     for (featurestr = strtok(features, ",");
-         featurestr  && !local_err;
+         featurestr;
          featurestr = strtok(NULL, ",")) {
         const char *name;
         const char *val = NULL;
@@ -2009,10 +2024,12 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
 
         /* Compatibility syntax: */
         if (featurestr[0] == '+') {
-            add_flagname_to_bitmaps(featurestr + 1, plus_features, &local_err);
+            plus_features = g_list_append(plus_features,
+                                          g_strdup(featurestr + 1));
             continue;
         } else if (featurestr[0] == '-') {
-            add_flagname_to_bitmaps(featurestr + 1, minus_features, &local_err);
+            minus_features = g_list_append(minus_features,
+                                           g_strdup(featurestr + 1));
             continue;
         }
 
@@ -2027,6 +2044,19 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
         feat2prop(featurestr);
         name = featurestr;
 
+        if (g_list_find_custom(plus_features, name, compare_string)) {
+            error_report("warning: Ambiguous CPU model string. "
+                         "Don't mix both \"+%s\" and \"%s=%s\"",
+                         name, name, val);
+            ambiguous = true;
+        }
+        if (g_list_find_custom(minus_features, name, compare_string)) {
+            error_report("warning: Ambiguous CPU model string. "
+                         "Don't mix both \"-%s\" and \"%s=%s\"",
+                         name, name, val);
+            ambiguous = true;
+        }
+
         /* Special case: */
         if (!strcmp(name, "tsc-freq")) {
             int64_t tsc_freq;
@@ -2051,11 +2081,65 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features,
         qdev_prop_register_global(prop);
     }
 
-    if (local_err) {
-        error_propagate(errp, local_err);
+    if (ambiguous) {
+        error_report("warning: Compatibility of ambiguous CPU model "
+                     "strings won't be kept on future QEMU versions");
     }
 }
 
+static void x86_cpu_load_features(X86CPU *cpu, Error **errp);
+static int x86_cpu_filter_features(X86CPU *cpu);
+
+/* Check for missing features that may prevent the CPU class from
+ * running using the current machine and accelerator.
+ */
+static void x86_cpu_class_check_missing_features(X86CPUClass *xcc,
+                                                 strList **missing_feats)
+{
+    X86CPU *xc;
+    FeatureWord w;
+    Error *err = NULL;
+    strList **next = missing_feats;
+
+    if (xcc->kvm_required && !kvm_enabled()) {
+        strList *new = g_new0(strList, 1);
+        new->value = g_strdup("kvm");;
+        *missing_feats = new;
+        return;
+    }
+
+    xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc))));
+
+    x86_cpu_load_features(xc, &err);
+    if (err) {
+        /* Errors at x86_cpu_load_features should never happen,
+         * but in case it does, just report the model as not
+         * runnable at all using the "type" property.
+         */
+        strList *new = g_new0(strList, 1);
+        new->value = g_strdup("type");
+        *next = new;
+        next = &new->next;
+    }
+
+    x86_cpu_filter_features(xc);
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        uint32_t filtered = xc->filtered_features[w];
+        int i;
+        for (i = 0; i < 32; i++) {
+            if (filtered & (1UL << i)) {
+                strList *new = g_new0(strList, 1);
+                new->value = g_strdup(x86_cpu_feature_name(w, i));
+                *next = new;
+                next = &new->next;
+            }
+        }
+    }
+
+    object_unref(OBJECT(xc));
+}
+
 /* Print all cpuid feature names in featureset
  */
 static void listflags(FILE *f, fprintf_function print, const char **featureset)
@@ -2071,23 +2155,62 @@ static void listflags(FILE *f, fprintf_function print, const char **featureset)
     }
 }
 
-/* generate CPU information. */
+/* Sort alphabetically by type name, listing kvm_required models last. */
+static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b)
+{
+    ObjectClass *class_a = (ObjectClass *)a;
+    ObjectClass *class_b = (ObjectClass *)b;
+    X86CPUClass *cc_a = X86_CPU_CLASS(class_a);
+    X86CPUClass *cc_b = X86_CPU_CLASS(class_b);
+    const char *name_a, *name_b;
+
+    if (cc_a->kvm_required != cc_b->kvm_required) {
+        /* kvm_required items go last */
+        return cc_a->kvm_required ? 1 : -1;
+    } else {
+        name_a = object_class_get_name(class_a);
+        name_b = object_class_get_name(class_b);
+        return strcmp(name_a, name_b);
+    }
+}
+
+static GSList *get_sorted_cpu_model_list(void)
+{
+    GSList *list = object_class_get_list(TYPE_X86_CPU, false);
+    list = g_slist_sort(list, x86_cpu_list_compare);
+    return list;
+}
+
+static void x86_cpu_list_entry(gpointer data, gpointer user_data)
+{
+    ObjectClass *oc = data;
+    X86CPUClass *cc = X86_CPU_CLASS(oc);
+    CPUListState *s = user_data;
+    char *name = x86_cpu_class_get_model_name(cc);
+    const char *desc = cc->model_description;
+    if (!desc) {
+        desc = cc->cpu_def->model_id;
+    }
+
+    (*s->cpu_fprintf)(s->file, "x86 %16s  %-48s\n",
+                      name, desc);
+    g_free(name);
+}
+
+/* list available CPU models and flags */
 void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    X86CPUDefinition *def;
-    char buf[256];
     int i;
+    CPUListState s = {
+        .file = f,
+        .cpu_fprintf = cpu_fprintf,
+    };
+    GSList *list;
 
-    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
-        def = &builtin_x86_defs[i];
-        snprintf(buf, sizeof(buf), "%s", def->name);
-        (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
-    }
-#ifdef CONFIG_KVM
-    (*cpu_fprintf)(f, "x86 %16s  %-48s\n", "host",
-                   "KVM processor with all supported host features "
-                   "(only available in KVM mode)");
-#endif
+    (*cpu_fprintf)(f, "Available CPUs:\n");
+    list = get_sorted_cpu_model_list();
+    g_slist_foreach(list, x86_cpu_list_entry, &s);
+    g_slist_free(list);
 
     (*cpu_fprintf)(f, "\nRecognized CPUID flags:\n");
     for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) {
@@ -2099,26 +2222,31 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
     }
 }
 
-CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+static void x86_cpu_definition_entry(gpointer data, gpointer user_data)
 {
-    CpuDefinitionInfoList *cpu_list = NULL;
-    X86CPUDefinition *def;
-    int i;
+    ObjectClass *oc = data;
+    X86CPUClass *cc = X86_CPU_CLASS(oc);
+    CpuDefinitionInfoList **cpu_list = user_data;
+    CpuDefinitionInfoList *entry;
+    CpuDefinitionInfo *info;
 
-    for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
-        CpuDefinitionInfoList *entry;
-        CpuDefinitionInfo *info;
-
-        def = &builtin_x86_defs[i];
-        info = g_malloc0(sizeof(*info));
-        info->name = g_strdup(def->name);
+    info = g_malloc0(sizeof(*info));
+    info->name = x86_cpu_class_get_model_name(cc);
+    x86_cpu_class_check_missing_features(cc, &info->unavailable_features);
+    info->has_unavailable_features = true;
 
-        entry = g_malloc0(sizeof(*entry));
-        entry->value = info;
-        entry->next = cpu_list;
-        cpu_list = entry;
-    }
+    entry = g_malloc0(sizeof(*entry));
+    entry->value = info;
+    entry->next = *cpu_list;
+    *cpu_list = entry;
+}
 
+CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+{
+    CpuDefinitionInfoList *cpu_list = NULL;
+    GSList *list = get_sorted_cpu_model_list();
+    g_slist_foreach(list, x86_cpu_definition_entry, &cpu_list);
+    g_slist_free(list);
     return cpu_list;
 }
 
@@ -2156,14 +2284,11 @@ static int x86_cpu_filter_features(X86CPU *cpu)
 
     for (w = 0; w < FEATURE_WORDS; w++) {
         uint32_t host_feat =
-            x86_cpu_get_supported_feature_word(w, cpu->migratable);
+            x86_cpu_get_supported_feature_word(w, false);
         uint32_t requested_features = env->features[w];
         env->features[w] &= host_feat;
         cpu->filtered_features[w] = requested_features & ~env->features[w];
         if (cpu->filtered_features[w]) {
-            if (cpu->check_cpuid || cpu->enforce_cpuid) {
-                report_unavailable_features(w, cpu->filtered_features[w]);
-            }
             rv = 1;
         }
     }
@@ -2171,6 +2296,15 @@ static int x86_cpu_filter_features(X86CPU *cpu)
     return rv;
 }
 
+static void x86_cpu_report_filtered_features(X86CPU *cpu)
+{
+    FeatureWord w;
+
+    for (w = 0; w < FEATURE_WORDS; w++) {
+        report_unavailable_features(w, cpu->filtered_features[w]);
+    }
+}
+
 static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props)
 {
     PropValue *pv;
@@ -2192,12 +2326,13 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
     char host_vendor[CPUID_VENDOR_SZ + 1];
     FeatureWord w;
 
-    object_property_set_int(OBJECT(cpu), def->level, "level", errp);
+    /* CPU models only set _minimum_ values for level/xlevel: */
+    object_property_set_int(OBJECT(cpu), def->level, "min-level", errp);
+    object_property_set_int(OBJECT(cpu), def->xlevel, "min-xlevel", errp);
+
     object_property_set_int(OBJECT(cpu), def->family, "family", errp);
     object_property_set_int(OBJECT(cpu), def->model, "model", errp);
     object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp);
-    object_property_set_int(OBJECT(cpu), def->xlevel, "xlevel", errp);
-    object_property_set_int(OBJECT(cpu), def->xlevel2, "xlevel2", errp);
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp);
     for (w = 0; w < FEATURE_WORDS; w++) {
         env->features[w] = def->features[w];
@@ -2210,6 +2345,8 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
         }
 
         x86_cpu_apply_props(cpu, kvm_default_props);
+    } else if (tcg_enabled()) {
+        x86_cpu_apply_props(cpu, tcg_default_props);
     }
 
     env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR;
@@ -2275,6 +2412,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 {
     X86CPU *cpu = x86_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
+    uint32_t pkg_offset;
 
     /* test if maximum index reached */
     if (index & 0x80000000) {
@@ -2328,7 +2466,11 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         }
         *eax = 1; /* Number of CPUID[EAX=2] calls required */
         *ebx = 0;
-        *ecx = 0;
+        if (!cpu->enable_l3_cache) {
+            *ecx = 0;
+        } else {
+            *ecx = L3_N_DESCRIPTOR;
+        }
         *edx = (L1D_DESCRIPTOR << 16) | \
                (L1I_DESCRIPTOR <<  8) | \
                (L2_DESCRIPTOR);
@@ -2374,6 +2516,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                 *ecx = L2_SETS - 1;
                 *edx = CPUID_4_NO_INVD_SHARING;
                 break;
+            case 3: /* L3 cache info */
+                if (!cpu->enable_l3_cache) {
+                    *eax = 0;
+                    *ebx = 0;
+                    *ecx = 0;
+                    *edx = 0;
+                    break;
+                }
+                *eax |= CPUID_4_TYPE_UNIFIED | \
+                        CPUID_4_LEVEL(3) | \
+                        CPUID_4_SELF_INIT_LEVEL;
+                pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads);
+                *eax |= ((1 << pkg_offset) - 1) << 14;
+                *ebx = (L3_N_LINE_SIZE - 1) | \
+                       ((L3_N_PARTITIONS - 1) << 12) | \
+                       ((L3_N_ASSOCIATIVITY - 1) << 22);
+                *ecx = L3_N_SETS - 1;
+                *edx = CPUID_4_INCLUSIVE | CPUID_4_COMPLEX_IDX;
+                break;
             default: /* end of info */
                 *eax = 0;
                 *ebx = 0;
@@ -2411,7 +2572,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
             if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) {
                 *ecx |= CPUID_7_0_ECX_OSPKE;
             }
-            *edx = 0; /* Reserved */
+            *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
         } else {
             *eax = 0;
             *ebx = 0;
@@ -2454,13 +2615,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
 
         switch (count) {
         case 0:
-            *eax = apicid_core_offset(smp_cores, smp_threads);
-            *ebx = smp_threads;
+            *eax = apicid_core_offset(cs->nr_cores, cs->nr_threads);
+            *ebx = cs->nr_threads;
             *ecx |= CPUID_TOPOLOGY_LEVEL_SMT;
             break;
         case 1:
-            *eax = apicid_pkg_offset(smp_cores, smp_threads);
-            *ebx = smp_cores * smp_threads;
+            *eax = apicid_pkg_offset(cs->nr_cores, cs->nr_threads);
+            *ebx = cs->nr_cores * cs->nr_threads;
             *ecx |= CPUID_TOPOLOGY_LEVEL_CORE;
             break;
         default:
@@ -2473,10 +2634,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ebx &= 0xffff; /* The count doesn't need to be reliable. */
         break;
     case 0xD: {
-        KVMState *s = cs->kvm_state;
-        uint64_t ena_mask;
-        int i;
-
         /* Processor Extended State */
         *eax = 0;
         *ebx = 0;
@@ -2485,36 +2642,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
             break;
         }
-        if (kvm_enabled()) {
-            ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
-            ena_mask <<= 32;
-            ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
-        } else {
-            ena_mask = -1;
-        }
 
         if (count == 0) {
-            *ecx = 0x240;
-            for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) {
-                const ExtSaveArea *esa = &x86_ext_save_areas[i];
-                if ((env->features[esa->feature] & esa->bits) == esa->bits
-                    && ((ena_mask >> i) & 1) != 0) {
-                    if (i < 32) {
-                        *eax |= 1u << i;
-                    } else {
-                        *edx |= 1u << (i - 32);
-                    }
-                    *ecx = MAX(*ecx, esa->offset + esa->size);
-                }
-            }
-            *eax |= ena_mask & (XSTATE_FP_MASK | XSTATE_SSE_MASK);
+            *ecx = xsave_area_size(x86_cpu_xsave_components(cpu));
+            *eax = env->features[FEAT_XSAVE_COMP_LO];
+            *edx = env->features[FEAT_XSAVE_COMP_HI];
             *ebx = *ecx;
         } else if (count == 1) {
             *eax = env->features[FEAT_XSAVE];
         } else if (count < ARRAY_SIZE(x86_ext_save_areas)) {
-            const ExtSaveArea *esa = &x86_ext_save_areas[count];
-            if ((env->features[esa->feature] & esa->bits) == esa->bits
-                && ((ena_mask >> count) & 1) != 0) {
+            if ((x86_cpu_xsave_components(cpu) >> count) & 1) {
+                const ExtSaveArea *esa = &x86_ext_save_areas[count];
                 *eax = esa->size;
                 *ebx = esa->offset;
             }
@@ -2585,9 +2723,15 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *ecx = (L2_SIZE_KB_AMD << 16) | \
                (AMD_ENC_ASSOC(L2_ASSOCIATIVITY) << 12) | \
                (L2_LINES_PER_TAG << 8) | (L2_LINE_SIZE);
-        *edx = ((L3_SIZE_KB/512) << 18) | \
-               (AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \
-               (L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE);
+        if (!cpu->enable_l3_cache) {
+            *edx = ((L3_SIZE_KB / 512) << 18) | \
+                   (AMD_ENC_ASSOC(L3_ASSOCIATIVITY) << 12) | \
+                   (L3_LINES_PER_TAG << 8) | (L3_LINE_SIZE);
+        } else {
+            *edx = ((L3_N_SIZE_KB_AMD / 512) << 18) | \
+                   (AMD_ENC_ASSOC(L3_N_ASSOCIATIVITY) << 12) | \
+                   (L3_N_LINES_PER_TAG << 8) | (L3_N_LINE_SIZE);
+        }
         break;
     case 0x80000007:
         *eax = 0;
@@ -2669,7 +2813,7 @@ static void x86_cpu_reset(CPUState *s)
 
     xcc->parent_reset(s);
 
-    memset(env, 0, offsetof(CPUX86State, cpuid_level));
+    memset(env, 0, offsetof(CPUX86State, end_reset_fields));
 
     tlb_flush(s, 1);
 
@@ -2743,7 +2887,7 @@ static void x86_cpu_reset(CPUState *s)
     }
     for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) {
         const ExtSaveArea *esa = &x86_ext_save_areas[i];
-        if ((env->features[esa->feature] & esa->bits) == esa->bits) {
+        if (env->features[esa->feature] & esa->bits) {
             xcr0 |= 1ull << i;
         }
     }
@@ -2813,9 +2957,8 @@ static void mce_init(X86CPU *cpu)
 }
 
 #ifndef CONFIG_USER_ONLY
-static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
+APICCommonClass *apic_get_class(void)
 {
-    APICCommonState *apic;
     const char *apic_type = "apic";
 
     if (kvm_apic_in_kernel()) {
@@ -2824,13 +2967,21 @@ static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
         apic_type = "xen-apic";
     }
 
-    cpu->apic_state = DEVICE(object_new(apic_type));
+    return APIC_COMMON_CLASS(object_class_by_name(apic_type));
+}
+
+static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
+{
+    APICCommonState *apic;
+    ObjectClass *apic_class = OBJECT_CLASS(apic_get_class());
+
+    cpu->apic_state = DEVICE(object_new(object_class_get_name(apic_class)));
 
     object_property_add_child(OBJECT(cpu), "lapic",
                               OBJECT(cpu->apic_state), &error_abort);
     object_unref(OBJECT(cpu->apic_state));
 
-    qdev_prop_set_uint8(cpu->apic_state, "id", cpu->apic_id);
+    qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id);
     /* TODO: convert to link<> */
     apic = APIC_COMMON(cpu->apic_state);
     apic->cpu = cpu;
@@ -2906,6 +3057,139 @@ static uint32_t x86_host_phys_bits(void)
     return host_phys_bits;
 }
 
+static void x86_cpu_adjust_level(X86CPU *cpu, uint32_t *min, uint32_t value)
+{
+    if (*min < value) {
+        *min = value;
+    }
+}
+
+/* Increase cpuid_min_{level,xlevel,xlevel2} automatically, if appropriate */
+static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w)
+{
+    CPUX86State *env = &cpu->env;
+    FeatureWordInfo *fi = &feature_word_info[w];
+    uint32_t eax = fi->cpuid_eax;
+    uint32_t region = eax & 0xF0000000;
+
+    if (!env->features[w]) {
+        return;
+    }
+
+    switch (region) {
+    case 0x00000000:
+        x86_cpu_adjust_level(cpu, &env->cpuid_min_level, eax);
+    break;
+    case 0x80000000:
+        x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, eax);
+    break;
+    case 0xC0000000:
+        x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel2, eax);
+    break;
+    }
+}
+
+/* Calculate XSAVE components based on the configured CPU feature flags */
+static void x86_cpu_enable_xsave_components(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+    int i;
+    uint64_t mask;
+
+    if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
+        return;
+    }
+
+    mask = 0;
+    for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) {
+        const ExtSaveArea *esa = &x86_ext_save_areas[i];
+        if (env->features[esa->feature] & esa->bits) {
+            mask |= (1ULL << i);
+        }
+    }
+
+    env->features[FEAT_XSAVE_COMP_LO] = mask;
+    env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
+}
+
+/* Load CPUID data based on configured features */
+static void x86_cpu_load_features(X86CPU *cpu, Error **errp)
+{
+    CPUX86State *env = &cpu->env;
+    FeatureWord w;
+    GList *l;
+    Error *local_err = NULL;
+
+    /*TODO: cpu->host_features incorrectly overwrites features
+     * set using "feat=on|off". Once we fix this, we can convert
+     * plus_features & minus_features to global properties
+     * inside x86_cpu_parse_featurestr() too.
+     */
+    if (cpu->host_features) {
+        for (w = 0; w < FEATURE_WORDS; w++) {
+            env->features[w] =
+                x86_cpu_get_supported_feature_word(w, cpu->migratable);
+        }
+    }
+
+    for (l = plus_features; l; l = l->next) {
+        const char *prop = l->data;
+        object_property_set_bool(OBJECT(cpu), true, prop, &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+
+    for (l = minus_features; l; l = l->next) {
+        const char *prop = l->data;
+        object_property_set_bool(OBJECT(cpu), false, prop, &local_err);
+        if (local_err) {
+            goto out;
+        }
+    }
+
+    if (!kvm_enabled() || !cpu->expose_kvm) {
+        env->features[FEAT_KVM] = 0;
+    }
+
+    x86_cpu_enable_xsave_components(cpu);
+
+    /* CPUID[EAX=7,ECX=0].EBX always increased level automatically: */
+    x86_cpu_adjust_feat_level(cpu, FEAT_7_0_EBX);
+    if (cpu->full_cpuid_auto_level) {
+        x86_cpu_adjust_feat_level(cpu, FEAT_1_EDX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_1_ECX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_ECX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_8000_0007_EDX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_C000_0001_EDX);
+        x86_cpu_adjust_feat_level(cpu, FEAT_SVM);
+        x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE);
+        /* SVM requires CPUID[0x8000000A] */
+        if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) {
+            x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A);
+        }
+    }
+
+    /* Set cpuid_*level* based on cpuid_min_*level, if not explicitly set */
+    if (env->cpuid_level == UINT32_MAX) {
+        env->cpuid_level = env->cpuid_min_level;
+    }
+    if (env->cpuid_xlevel == UINT32_MAX) {
+        env->cpuid_xlevel = env->cpuid_min_xlevel;
+    }
+    if (env->cpuid_xlevel2 == UINT32_MAX) {
+        env->cpuid_xlevel2 = env->cpuid_min_xlevel2;
+    }
+
+out:
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+    }
+}
+
 #define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \
                            (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \
                            (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3)
@@ -2920,7 +3204,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
     CPUX86State *env = &cpu->env;
     Error *local_err = NULL;
     static bool ht_warned;
-    FeatureWord w;
 
     if (xcc->kvm_required && !kvm_enabled()) {
         char *name = x86_cpu_class_get_model_name(xcc);
@@ -2934,33 +3217,21 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
         return;
     }
 
-    /*TODO: cpu->host_features incorrectly overwrites features
-     * set using "feat=on|off". Once we fix this, we can convert
-     * plus_features & minus_features to global properties
-     * inside x86_cpu_parse_featurestr() too.
-     */
-    if (cpu->host_features) {
-        for (w = 0; w < FEATURE_WORDS; w++) {
-            env->features[w] =
-                x86_cpu_get_supported_feature_word(w, cpu->migratable);
-        }
-    }
-
-    for (w = 0; w < FEATURE_WORDS; w++) {
-        cpu->env.features[w] |= plus_features[w];
-        cpu->env.features[w] &= ~minus_features[w];
-    }
-
-    if (env->features[FEAT_7_0_EBX] && env->cpuid_level < 7) {
-        env->cpuid_level = 7;
+    x86_cpu_load_features(cpu, &local_err);
+    if (local_err) {
+        goto out;
     }
 
-    if (x86_cpu_filter_features(cpu) && cpu->enforce_cpuid) {
-        error_setg(&local_err,
-                   kvm_enabled() ?
-                       "Host doesn't support requested features" :
-                       "TCG doesn't support requested features");
-        goto out;
+    if (x86_cpu_filter_features(cpu) &&
+        (cpu->check_cpuid || cpu->enforce_cpuid)) {
+        x86_cpu_report_filtered_features(cpu);
+        if (cpu->enforce_cpuid) {
+            error_setg(&local_err,
+                       kvm_enabled() ?
+                           "Host doesn't support requested features" :
+                           "TCG doesn't support requested features");
+            goto out;
+        }
     }
 
     /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on
@@ -3036,7 +3307,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
             cpu->phys_bits = 32;
         }
     }
-    cpu_exec_init(cs, &error_abort);
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     if (tcg_enabled()) {
         tcg_x86_init();
@@ -3117,6 +3392,8 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
 static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp)
 {
     X86CPU *cpu = X86_CPU(dev);
+    X86CPUClass *xcc = X86_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
 
 #ifndef CONFIG_USER_ONLY
     cpu_remove_sync(CPU(dev));
@@ -3127,6 +3404,12 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp)
         object_unparent(OBJECT(cpu->apic_state));
         cpu->apic_state = NULL;
     }
+
+    xcc->parent_unrealize(dev, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 }
 
 typedef struct BitProperty {
@@ -3210,30 +3493,22 @@ static void x86_cpu_register_feature_bit_props(X86CPU *cpu,
                                                FeatureWord w,
                                                int bitnr)
 {
-    Object *obj = OBJECT(cpu);
-    int i;
-    char **names;
     FeatureWordInfo *fi = &feature_word_info[w];
+    const char *name = fi->feat_names[bitnr];
 
-    if (!fi->feat_names) {
+    if (!name) {
         return;
     }
-    if (!fi->feat_names[bitnr]) {
-        return;
-    }
-
-    names = g_strsplit(fi->feat_names[bitnr], "|", 0);
 
-    feat2prop(names[0]);
-    x86_cpu_register_bit_prop(cpu, names[0], &cpu->env.features[w], bitnr);
-
-    for (i = 1; names[i]; i++) {
-        feat2prop(names[i]);
-        object_property_add_alias(obj, names[i], obj, names[0],
-                                  &error_abort);
-    }
-
-    g_strfreev(names);
+    /* Property names should use "-" instead of "_".
+     * Old names containing underscores are registered as aliases
+     * using object_property_add_alias()
+     */
+    assert(!strchr(name, '_'));
+    /* aliases don't use "|" delimiters anymore, they are registered
+     * manually using object_property_add_alias() */
+    assert(!strchr(name, '|'));
+    x86_cpu_register_bit_prop(cpu, name, &cpu->env.features[w], bitnr);
 }
 
 static void x86_cpu_initfn(Object *obj)
@@ -3281,6 +3556,36 @@ static void x86_cpu_initfn(Object *obj)
         }
     }
 
+    object_property_add_alias(obj, "sse3", obj, "pni", &error_abort);
+    object_property_add_alias(obj, "pclmuldq", obj, "pclmulqdq", &error_abort);
+    object_property_add_alias(obj, "sse4-1", obj, "sse4.1", &error_abort);
+    object_property_add_alias(obj, "sse4-2", obj, "sse4.2", &error_abort);
+    object_property_add_alias(obj, "xd", obj, "nx", &error_abort);
+    object_property_add_alias(obj, "ffxsr", obj, "fxsr-opt", &error_abort);
+    object_property_add_alias(obj, "i64", obj, "lm", &error_abort);
+
+    object_property_add_alias(obj, "ds_cpl", obj, "ds-cpl", &error_abort);
+    object_property_add_alias(obj, "tsc_adjust", obj, "tsc-adjust", &error_abort);
+    object_property_add_alias(obj, "fxsr_opt", obj, "fxsr-opt", &error_abort);
+    object_property_add_alias(obj, "lahf_lm", obj, "lahf-lm", &error_abort);
+    object_property_add_alias(obj, "cmp_legacy", obj, "cmp-legacy", &error_abort);
+    object_property_add_alias(obj, "nodeid_msr", obj, "nodeid-msr", &error_abort);
+    object_property_add_alias(obj, "perfctr_core", obj, "perfctr-core", &error_abort);
+    object_property_add_alias(obj, "perfctr_nb", obj, "perfctr-nb", &error_abort);
+    object_property_add_alias(obj, "kvm_nopiodelay", obj, "kvm-nopiodelay", &error_abort);
+    object_property_add_alias(obj, "kvm_mmu", obj, "kvm-mmu", &error_abort);
+    object_property_add_alias(obj, "kvm_asyncpf", obj, "kvm-asyncpf", &error_abort);
+    object_property_add_alias(obj, "kvm_steal_time", obj, "kvm-steal-time", &error_abort);
+    object_property_add_alias(obj, "kvm_pv_eoi", obj, "kvm-pv-eoi", &error_abort);
+    object_property_add_alias(obj, "kvm_pv_unhalt", obj, "kvm-pv-unhalt", &error_abort);
+    object_property_add_alias(obj, "svm_lock", obj, "svm-lock", &error_abort);
+    object_property_add_alias(obj, "nrip_save", obj, "nrip-save", &error_abort);
+    object_property_add_alias(obj, "tsc_scale", obj, "tsc-scale", &error_abort);
+    object_property_add_alias(obj, "vmcb_clean", obj, "vmcb-clean", &error_abort);
+    object_property_add_alias(obj, "pause_filter", obj, "pause-filter", &error_abort);
+    object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort);
+    object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort);
+
     x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort);
 }
 
@@ -3358,12 +3663,17 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0),
     DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false),
     DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
-    DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
-    DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
-    DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, 0),
+    DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX),
+    DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX),
+    DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, UINT32_MAX),
+    DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0),
+    DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0),
+    DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0),
+    DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true),
     DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id),
     DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true),
     DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false),
+    DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true),
     DEFINE_PROP_END_OF_LIST()
 };
 
@@ -3374,6 +3684,7 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     xcc->parent_realize = dc->realize;
+    xcc->parent_unrealize = dc->unrealize;
     dc->realize = x86_cpu_realizefn;
     dc->unrealize = x86_cpu_unrealizefn;
     dc->props = x86_cpu_properties;
@@ -3405,6 +3716,9 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     cc->write_elf32_qemunote = x86_cpu_write_elf32_qemunote;
     cc->vmsd = &vmstate_x86_cpu;
 #endif
+    /* CPU_NB_REGS * 2 = general regs + xmm regs
+     * 25 = eip, eflags, 6 seg regs, st[0-7], fctrl,...,fop, mxcsr.
+     */
     cc->gdb_num_core_regs = CPU_NB_REGS * 2 + 25;
 #ifndef CONFIG_USER_ONLY
     cc->debug_excp_handler = breakpoint_handler;
@@ -3413,11 +3727,6 @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     cc->cpu_exec_exit = x86_cpu_exec_exit;
 
     dc->cannot_instantiate_with_device_add_yet = false;
-    /*
-     * Reason: x86_cpu_initfn() calls cpu_exec_init(), which saves the
-     * object in cpus -> dangling pointer after final object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo x86_cpu_type_info = {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 65615c0f3b..c605724022 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -325,6 +325,7 @@
 #define MSR_IA32_APICBASE               0x1b
 #define MSR_IA32_APICBASE_BSP           (1<<8)
 #define MSR_IA32_APICBASE_ENABLE        (1<<11)
+#define MSR_IA32_APICBASE_EXTD          (1 << 10)
 #define MSR_IA32_APICBASE_BASE          (0xfffffU<<12)
 #define MSR_IA32_FEATURE_CONTROL        0x0000003a
 #define MSR_TSC_ADJUST                  0x0000003b
@@ -442,6 +443,7 @@ typedef enum FeatureWord {
     FEAT_1_ECX,         /* CPUID[1].ECX */
     FEAT_7_0_EBX,       /* CPUID[EAX=7,ECX=0].EBX */
     FEAT_7_0_ECX,       /* CPUID[EAX=7,ECX=0].ECX */
+    FEAT_7_0_EDX,       /* CPUID[EAX=7,ECX=0].EDX */
     FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
     FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
     FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
@@ -453,6 +455,8 @@ typedef enum FeatureWord {
     FEAT_SVM,           /* CPUID[8000_000A].EDX */
     FEAT_XSAVE,         /* CPUID[EAX=0xd,ECX=1].EAX */
     FEAT_6_EAX,         /* CPUID[6].EAX */
+    FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */
+    FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */
     FEATURE_WORDS,
 } FeatureWord;
 
@@ -606,21 +610,29 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPUID_7_0_EBX_RTM      (1U << 11)
 #define CPUID_7_0_EBX_MPX      (1U << 14)
 #define CPUID_7_0_EBX_AVX512F  (1U << 16) /* AVX-512 Foundation */
+#define CPUID_7_0_EBX_AVX512DQ (1U << 17) /* AVX-512 Doubleword & Quadword Instrs */
 #define CPUID_7_0_EBX_RDSEED   (1U << 18)
 #define CPUID_7_0_EBX_ADX      (1U << 19)
 #define CPUID_7_0_EBX_SMAP     (1U << 20)
+#define CPUID_7_0_EBX_AVX512IFMA (1U << 21) /* AVX-512 Integer Fused Multiply Add */
 #define CPUID_7_0_EBX_PCOMMIT  (1U << 22) /* Persistent Commit */
 #define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Flush a Cache Line Optimized */
 #define CPUID_7_0_EBX_CLWB     (1U << 24) /* Cache Line Write Back */
 #define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
 #define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
 #define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
+#define CPUID_7_0_EBX_AVX512BW (1U << 30) /* AVX-512 Byte and Word Instructions */
+#define CPUID_7_0_EBX_AVX512VL (1U << 31) /* AVX-512 Vector Length Extensions */
 
+#define CPUID_7_0_ECX_VBMI     (1U << 1)  /* AVX-512 Vector Byte Manipulation Instrs */
 #define CPUID_7_0_ECX_UMIP     (1U << 2)
 #define CPUID_7_0_ECX_PKU      (1U << 3)
 #define CPUID_7_0_ECX_OSPKE    (1U << 4)
 #define CPUID_7_0_ECX_RDPID    (1U << 22)
 
+#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
+#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */
+
 #define CPUID_XSAVE_XSAVEOPT   (1U << 0)
 #define CPUID_XSAVE_XSAVEC     (1U << 1)
 #define CPUID_XSAVE_XGETBV1    (1U << 2)
@@ -691,6 +703,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 /* Use a clearer name for this.  */
 #define CPU_INTERRUPT_INIT      CPU_INTERRUPT_RESET
 
+/* Instead of computing the condition codes after each x86 instruction,
+ * QEMU just stores one operand (called CC_SRC), the result
+ * (called CC_DST) and the type of operation (called CC_OP). When the
+ * condition codes are needed, the condition codes can be calculated
+ * using this information. Condition codes are not generated if they
+ * are only needed for conditional branches.
+ */
 typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
@@ -872,7 +891,8 @@ typedef union X86LegacyXSaveArea {
 typedef struct X86XSaveHeader {
     uint64_t xstate_bv;
     uint64_t xcomp_bv;
-    uint8_t reserved[48];
+    uint64_t reserve0;
+    uint8_t reserved[40];
 } X86XSaveHeader;
 
 /* Ext. save area 2: AVX State */
@@ -1030,6 +1050,9 @@ typedef struct CPUX86State {
     uint64_t tsc;
     uint64_t tsc_adjust;
     uint64_t tsc_deadline;
+    uint64_t tsc_aux;
+
+    uint64_t xcr0;
 
     uint64_t mcg_status;
     uint64_t msr_ia32_misc_enable;
@@ -1046,6 +1069,8 @@ typedef struct CPUX86State {
     uint64_t pat;
     uint32_t smbase;
 
+    uint32_t pkru;
+
     /* End of state preserved by INIT (dummy marker).  */
     struct {} end_init_save;
 
@@ -1097,11 +1122,15 @@ typedef struct CPUX86State {
     CPU_COMMON
 
     /* Fields from here on are preserved across CPU reset. */
+    struct {} end_reset_fields;
 
     /* processor features (e.g. for CPUID insn) */
-    uint32_t cpuid_level;
-    uint32_t cpuid_xlevel;
-    uint32_t cpuid_xlevel2;
+    /* Minimum level/xlevel/xlevel2, based on CPU model + features */
+    uint32_t cpuid_min_level, cpuid_min_xlevel, cpuid_min_xlevel2;
+    /* Maximum level/xlevel/xlevel2 value for auto-assignment: */
+    uint32_t cpuid_max_level, cpuid_max_xlevel, cpuid_max_xlevel2;
+    /* Actual level/xlevel/xlevel2 value: */
+    uint32_t cpuid_level, cpuid_xlevel, cpuid_xlevel2;
     uint32_t cpuid_vendor1;
     uint32_t cpuid_vendor2;
     uint32_t cpuid_vendor3;
@@ -1130,20 +1159,15 @@ typedef struct CPUX86State {
     uint64_t mcg_ctl;
     uint64_t mcg_ext_ctl;
     uint64_t mce_banks[MCE_BANKS_DEF*4];
-
-    uint64_t tsc_aux;
+    uint64_t xstate_bv;
 
     /* vmstate */
     uint16_t fpus_vmstate;
     uint16_t fptag_vmstate;
     uint16_t fpregs_format_vmstate;
-    uint64_t xstate_bv;
 
-    uint64_t xcr0;
     uint64_t xss;
 
-    uint32_t pkru;
-
     TPRAccess tpr_access_type;
 } CPUX86State;
 
@@ -1202,9 +1226,18 @@ struct X86CPU {
      */
     bool enable_lmce;
 
+    /* Compatibility bits for old machine types.
+     * If true present virtual l3 cache for VM, the vcpus in the same virtual
+     * socket share an virtual l3 cache.
+     */
+    bool enable_l3_cache;
+
     /* Compatibility bits for old machine types: */
     bool enable_cpuid_0xb;
 
+    /* Enable auto level-increase for all CPUID leaves */
+    bool full_cpuid_auto_level;
+
     /* if true fill the top bits of the MTRR_PHYSMASKn variable range */
     bool fill_mtrr_mask;
 
@@ -1381,13 +1414,6 @@ int cpu_x86_signal_handler(int host_signum, void *pinfo,
                            void *puc);
 
 /* cpu.c */
-typedef struct ExtSaveArea {
-    uint32_t feature, bits;
-    uint32_t offset, size;
-} ExtSaveArea;
-
-extern const ExtSaveArea x86_ext_save_areas[];
-
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx);
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 929489bfc8..2049a8c01d 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -1110,6 +1110,8 @@ void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
 }
 #endif
 
+#define XO(X)  offsetof(X86XSaveArea, X)
+
 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
     int fpus, fptag, i;
@@ -1120,17 +1122,18 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
-    cpu_stw_data_ra(env, ptr, env->fpuc, ra);
-    cpu_stw_data_ra(env, ptr + 2, fpus, ra);
-    cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);
+
+    cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
+    cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
+    cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
 
     /* In 32-bit mode this is eip, sel, dp, sel.
        In 64-bit mode this is rip, rdp.
        But in either case we don't write actual data, just zeros.  */
-    cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
-    cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */
+    cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
+    cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
 
-    addr = ptr + 0x20;
+    addr = ptr + XO(legacy.fpregs);
     for (i = 0; i < 8; i++) {
         floatx80 tmp = ST(i);
         helper_fstt(env, tmp, addr, ra);
@@ -1140,8 +1143,8 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
-    cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
+    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
+    cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
 }
 
 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
@@ -1155,7 +1158,7 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
         nb_xmm_regs = 8;
     }
 
-    addr = ptr + 0xa0;
+    addr = ptr + XO(legacy.xmm_regs);
     for (i = 0; i < nb_xmm_regs; i++) {
         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
@@ -1163,8 +1166,9 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     }
 }
 
-static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
+    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
     int i;
 
     for (i = 0; i < 4; i++, addr += 16) {
@@ -1173,15 +1177,17 @@ static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
     }
 }
 
-static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
-    cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
+    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
+                    env->bndcs_regs.cfgu, ra);
+    cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
+                    env->bndcs_regs.sts, ra);
 }
 
-static void do_xsave_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    cpu_stq_data_ra(env, addr, env->pkru, ra);
+    cpu_stq_data_ra(env, ptr, env->pkru, ra);
 }
 
 void helper_fxsave(CPUX86State *env, target_ulong ptr)
@@ -1250,22 +1256,19 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
         do_xsave_sse(env, ptr, ra);
     }
     if (opt & XSTATE_BNDREGS_MASK) {
-        target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
-        do_xsave_bndregs(env, ptr + off, ra);
+        do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
     }
     if (opt & XSTATE_BNDCSR_MASK) {
-        target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
-        do_xsave_bndcsr(env, ptr + off, ra);
+        do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
     }
     if (opt & XSTATE_PKRU_MASK) {
-        target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
-        do_xsave_pkru(env, ptr + off, ra);
+        do_xsave_pkru(env, ptr + XO(pkru_state), ra);
     }
 
     /* Update the XSTATE_BV field.  */
-    old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+    old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
-    cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
+    cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
 }
 
 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
@@ -1281,12 +1284,13 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 
 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    int i, fpus, fptag;
+    int i, fpuc, fpus, fptag;
     target_ulong addr;
 
-    cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
-    fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
-    fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
+    fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
+    fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
+    fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
+    cpu_set_fpuc(env, fpuc);
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     fptag ^= 0xff;
@@ -1294,7 +1298,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
         env->fptags[i] = ((fptag >> i) & 1);
     }
 
-    addr = ptr + 0x20;
+    addr = ptr + XO(legacy.fpregs);
     for (i = 0; i < 8; i++) {
         floatx80 tmp = helper_fldt(env, addr, ra);
         ST(i) = tmp;
@@ -1304,7 +1308,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 
 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
+    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
 }
 
 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
@@ -1318,7 +1322,7 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
         nb_xmm_regs = 8;
     }
 
-    addr = ptr + 0xa0;
+    addr = ptr + XO(legacy.xmm_regs);
     for (i = 0; i < nb_xmm_regs; i++) {
         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
@@ -1326,8 +1330,9 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
     }
 }
 
-static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
+    target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
     int i;
 
     for (i = 0; i < 4; i++, addr += 16) {
@@ -1336,16 +1341,18 @@ static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
     }
 }
 
-static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
     /* FIXME: Extend highest implemented bit of linear address.  */
-    env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
-    env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
+    env->bndcs_regs.cfgu
+        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
+    env->bndcs_regs.sts
+        = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
 }
 
-static void do_xrstor_pkru(CPUX86State *env, target_ulong addr, uintptr_t ra)
+static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
 {
-    env->pkru = cpu_ldq_data_ra(env, addr, ra);
+    env->pkru = cpu_ldq_data_ra(env, ptr, ra);
 }
 
 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
@@ -1373,7 +1380,7 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 {
     uintptr_t ra = GETPC();
-    uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;
+    uint64_t xstate_bv, xcomp_bv, reserve0;
 
     rfbm &= env->xcr0;
 
@@ -1387,7 +1394,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
         raise_exception_ra(env, EXCP0D_GPF, ra);
     }
 
-    xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
+    xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
 
     if ((int64_t)xstate_bv < 0) {
         /* FIXME: Compact form.  */
@@ -1396,15 +1403,19 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
 
     /* Standard form.  */
 
-    /* The XSTATE field must not set bits not present in XCR0.  */
+    /* The XSTATE_BV field must not set bits not present in XCR0.  */
     if (xstate_bv & ~env->xcr0) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
     }
 
-    /* The XCOMP field must be zero.  */
-    xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
-    xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
-    if (xcomp_bv0 || xcomp_bv1) {
+    /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
+       revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
+       describes only XCOMP_BV, but the description of the standard form
+       of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
+       includes the next 64-bit field.  */
+    xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
+    reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
+    if (xcomp_bv || reserve0) {
         raise_exception_ra(env, EXCP0D_GPF, ra);
     }
 
@@ -1430,8 +1441,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
     }
     if (rfbm & XSTATE_BNDREGS_MASK) {
         if (xstate_bv & XSTATE_BNDREGS_MASK) {
-            target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS_BIT].offset;
-            do_xrstor_bndregs(env, ptr + off, ra);
+            do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
             env->hflags |= HF_MPX_IU_MASK;
         } else {
             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
@@ -1440,8 +1450,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
     }
     if (rfbm & XSTATE_BNDCSR_MASK) {
         if (xstate_bv & XSTATE_BNDCSR_MASK) {
-            target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR_BIT].offset;
-            do_xrstor_bndcsr(env, ptr + off, ra);
+            do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
         } else {
             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
         }
@@ -1450,8 +1459,7 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
     if (rfbm & XSTATE_PKRU_MASK) {
         uint64_t old_pkru = env->pkru;
         if (xstate_bv & XSTATE_PKRU_MASK) {
-            target_ulong off = x86_ext_save_areas[XSTATE_PKRU_BIT].offset;
-            do_xrstor_pkru(env, ptr + off, ra);
+            do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
         } else {
             env->pkru = 0;
         }
@@ -1462,6 +1470,8 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
     }
 }
 
+#undef XO
+
 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
 {
     /* The OS must have enabled XSAVE.  */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 1c250b8245..4ecc0912a4 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1113,7 +1113,6 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
 
 typedef struct MCEInjectionParams {
     Monitor *mon;
-    X86CPU *cpu;
     int bank;
     uint64_t status;
     uint64_t mcg_status;
@@ -1122,14 +1121,14 @@ typedef struct MCEInjectionParams {
     int flags;
 } MCEInjectionParams;
 
-static void do_inject_x86_mce(void *data)
+static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
 {
-    MCEInjectionParams *params = data;
-    CPUX86State *cenv = &params->cpu->env;
-    CPUState *cpu = CPU(params->cpu);
+    MCEInjectionParams *params = data.host_ptr;
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *cenv = &cpu->env;
     uint64_t *banks = cenv->mce_banks + 4 * params->bank;
 
-    cpu_synchronize_state(cpu);
+    cpu_synchronize_state(cs);
 
     /*
      * If there is an MCE exception being processed, ignore this SRAO MCE
@@ -1149,7 +1148,7 @@ static void do_inject_x86_mce(void *data)
         if ((cenv->mcg_cap & MCG_CTL_P) && cenv->mcg_ctl != ~(uint64_t)0) {
             monitor_printf(params->mon,
                            "CPU %d: Uncorrected error reporting disabled\n",
-                           cpu->cpu_index);
+                           cs->cpu_index);
             return;
         }
 
@@ -1161,7 +1160,7 @@ static void do_inject_x86_mce(void *data)
             monitor_printf(params->mon,
                            "CPU %d: Uncorrected error reporting disabled for"
                            " bank %d\n",
-                           cpu->cpu_index, params->bank);
+                           cs->cpu_index, params->bank);
             return;
         }
 
@@ -1170,7 +1169,7 @@ static void do_inject_x86_mce(void *data)
             monitor_printf(params->mon,
                            "CPU %d: Previous MCE still in progress, raising"
                            " triple fault\n",
-                           cpu->cpu_index);
+                           cs->cpu_index);
             qemu_log_mask(CPU_LOG_RESET, "Triple fault\n");
             qemu_system_reset_request();
             return;
@@ -1182,7 +1181,7 @@ static void do_inject_x86_mce(void *data)
         banks[3] = params->misc;
         cenv->mcg_status = params->mcg_status;
         banks[1] = params->status;
-        cpu_interrupt(cpu, CPU_INTERRUPT_MCE);
+        cpu_interrupt(cs, CPU_INTERRUPT_MCE);
     } else if (!(banks[1] & MCI_STATUS_VAL)
                || !(banks[1] & MCI_STATUS_UC)) {
         if (banks[1] & MCI_STATUS_VAL) {
@@ -1204,7 +1203,6 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
     CPUX86State *cenv = &cpu->env;
     MCEInjectionParams params = {
         .mon = mon,
-        .cpu = cpu,
         .bank = bank,
         .status = status,
         .mcg_status = mcg_status,
@@ -1232,7 +1230,7 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
         return;
     }
 
-    run_on_cpu(cs, do_inject_x86_mce, &params);
+    run_on_cpu(cs, do_inject_x86_mce, RUN_ON_CPU_HOST_PTR(&params));
     if (flags & MCE_INJECT_BROADCAST) {
         CPUState *other_cs;
 
@@ -1245,8 +1243,7 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
             if (other_cs == cs) {
                 continue;
             }
-            params.cpu = X86_CPU(other_cs);
-            run_on_cpu(other_cs, do_inject_x86_mce, &params);
+            run_on_cpu(other_cs, do_inject_x86_mce, RUN_ON_CPU_HOST_PTR(&params));
         }
     }
 }
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 1320edc016..4e859eba9d 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,8 +1,6 @@
 DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 
-DEF_HELPER_0(lock, void)
-DEF_HELPER_0(unlock, void)
 DEF_HELPER_3(write_eflags, void, env, tl, i32)
 DEF_HELPER_1(read_eflags, tl, env)
 DEF_HELPER_2(divb_AL, void, env, tl)
@@ -74,8 +72,10 @@ DEF_HELPER_3(boundw, void, env, tl, int)
 DEF_HELPER_3(boundl, void, env, tl, int)
 DEF_HELPER_1(rsm, void, env)
 DEF_HELPER_2(into, void, env, int)
+DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg8b, void, env, tl)
 #ifdef TARGET_X86_64
+DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl)
 DEF_HELPER_2(cmpxchg16b, void, env, tl)
 #endif
 DEF_HELPER_1(single_step, void, env)
diff --git a/target-i386/kvm-stub.c b/target-i386/kvm-stub.c
index cdf1506109..bda4dc2f0c 100644
--- a/target-i386/kvm-stub.c
+++ b/target-i386/kvm-stub.c
@@ -25,6 +25,11 @@ bool kvm_has_smm(void)
     return 1;
 }
 
+bool kvm_enable_x2apic(void)
+{
+    return false;
+}
+
 /* This function is only called inside conditionals which we
  * rely on the compiler to optimize out when CONFIG_KVM is not
  * defined.
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d1a25c5465..f62264a7a8 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -83,23 +83,17 @@ static bool has_msr_tsc_aux;
 static bool has_msr_tsc_adjust;
 static bool has_msr_tsc_deadline;
 static bool has_msr_feature_control;
-static bool has_msr_async_pf_en;
-static bool has_msr_pv_eoi_en;
 static bool has_msr_misc_enable;
 static bool has_msr_smbase;
 static bool has_msr_bndcfgs;
-static bool has_msr_kvm_steal_time;
 static int lm_capable_kernel;
 static bool has_msr_hv_hypercall;
-static bool has_msr_hv_vapic;
-static bool has_msr_hv_tsc;
 static bool has_msr_hv_crash;
 static bool has_msr_hv_reset;
 static bool has_msr_hv_vpindex;
 static bool has_msr_hv_runtime;
 static bool has_msr_hv_synic;
 static bool has_msr_hv_stimer;
-static bool has_msr_mtrr;
 static bool has_msr_xss;
 
 static bool has_msr_architectural_pmu;
@@ -128,6 +122,39 @@ bool kvm_allows_irq0_override(void)
     return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing();
 }
 
+static bool kvm_x2apic_api_set_flags(uint64_t flags)
+{
+    KVMState *s = KVM_STATE(current_machine->accelerator);
+
+    return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags);
+}
+
+#define MEMORIZE(fn, _result) \
+    ({ \
+        static bool _memorized; \
+        \
+        if (_memorized) { \
+            return _result; \
+        } \
+        _memorized = true; \
+        _result = fn; \
+    })
+
+static bool has_x2apic_api;
+
+bool kvm_has_x2apic_api(void)
+{
+    return has_x2apic_api;
+}
+
+bool kvm_enable_x2apic(void)
+{
+    return MEMORIZE(
+             kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS |
+                                      KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK),
+             has_x2apic_api);
+}
+
 static int kvm_get_tsc(CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -156,10 +183,8 @@ static int kvm_get_tsc(CPUState *cs)
     return 0;
 }
 
-static inline void do_kvm_synchronize_tsc(void *arg)
+static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
 {
-    CPUState *cpu = arg;
-
     kvm_get_tsc(cpu);
 }
 
@@ -169,7 +194,7 @@ void kvm_synchronize_all_tsc(void)
 
     if (kvm_enabled()) {
         CPU_FOREACH(cpu) {
-            run_on_cpu(cpu, do_kvm_synchronize_tsc, cpu);
+            run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
         }
     }
 }
@@ -604,20 +629,22 @@ static int hyperv_handle_properties(CPUState *cs)
     X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
+    if (cpu->hyperv_time &&
+            kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) <= 0) {
+        cpu->hyperv_time = false;
+    }
+
     if (cpu->hyperv_relaxed_timing) {
         env->features[FEAT_HYPERV_EAX] |= HV_X64_MSR_HYPERCALL_AVAILABLE;
     }
     if (cpu->hyperv_vapic) {
         env->features[FEAT_HYPERV_EAX] |= HV_X64_MSR_HYPERCALL_AVAILABLE;
         env->features[FEAT_HYPERV_EAX] |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
-        has_msr_hv_vapic = true;
     }
-    if (cpu->hyperv_time &&
-            kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
+    if (cpu->hyperv_time) {
         env->features[FEAT_HYPERV_EAX] |= HV_X64_MSR_HYPERCALL_AVAILABLE;
         env->features[FEAT_HYPERV_EAX] |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE;
         env->features[FEAT_HYPERV_EAX] |= 0x200;
-        has_msr_hv_tsc = true;
     }
     if (cpu->hyperv_crash && has_msr_hv_crash) {
         env->features[FEAT_HYPERV_EDX] |= HV_X64_GUEST_CRASH_MSR_AVAILABLE;
@@ -729,7 +756,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
         if (cpu->hyperv_relaxed_timing) {
             c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
         }
-        if (has_msr_hv_vapic) {
+        if (cpu->hyperv_vapic) {
             c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
         }
         c->ebx = cpu->hyperv_spinlock_attempts;
@@ -755,12 +782,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
         c = &cpuid_data.entries[cpuid_i++];
         c->function = KVM_CPUID_FEATURES | kvm_base;
         c->eax = env->features[FEAT_KVM];
-
-        has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
-
-        has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
-
-        has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
     }
 
     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
@@ -975,9 +996,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
     }
     cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);
 
-    if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
-        has_msr_mtrr = true;
-    }
     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
         has_msr_tsc_aux = false;
     }
@@ -1532,6 +1550,22 @@ static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value)
     msrs->nmsrs++;
 }
 
+static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value)
+{
+    kvm_msr_buf_reset(cpu);
+    kvm_msr_entry_add(cpu, index, value);
+
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
+}
+
+void kvm_put_apicbase(X86CPU *cpu, uint64_t value)
+{
+    int ret;
+
+    ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value);
+    assert(ret == 1);
+}
+
 static int kvm_put_tscdeadline_msr(X86CPU *cpu)
 {
     CPUX86State *env = &cpu->env;
@@ -1541,10 +1575,7 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu)
         return 0;
     }
 
-    kvm_msr_buf_reset(cpu);
-    kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline);
-
-    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
+    ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline);
     if (ret < 0) {
         return ret;
     }
@@ -1567,11 +1598,8 @@ static int kvm_put_msr_feature_control(X86CPU *cpu)
         return 0;
     }
 
-    kvm_msr_buf_reset(cpu);
-    kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL,
-                      cpu->env.msr_ia32_feature_control);
-
-    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
+    ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL,
+                          cpu->env.msr_ia32_feature_control);
     if (ret < 0) {
         return ret;
     }
@@ -1633,13 +1661,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
         kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
         kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr);
         kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
-        if (has_msr_async_pf_en) {
+        if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
             kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr);
         }
-        if (has_msr_pv_eoi_en) {
+        if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
             kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr);
         }
-        if (has_msr_kvm_steal_time) {
+        if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
             kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr);
         }
         if (has_msr_architectural_pmu) {
@@ -1675,11 +1703,11 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
             kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL,
                               env->msr_hv_hypercall);
         }
-        if (has_msr_hv_vapic) {
+        if (cpu->hyperv_vapic) {
             kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE,
                               env->msr_hv_vapic);
         }
-        if (has_msr_hv_tsc) {
+        if (cpu->hyperv_time) {
             kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc);
         }
         if (has_msr_hv_crash) {
@@ -1725,7 +1753,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
                                 env->msr_hv_stimer_count[j]);
             }
         }
-        if (has_msr_mtrr) {
+        if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
             uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits);
 
             kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype);
@@ -2042,13 +2070,13 @@ static int kvm_get_msrs(X86CPU *cpu)
 #endif
     kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0);
     kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0);
-    if (has_msr_async_pf_en) {
+    if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) {
         kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0);
     }
-    if (has_msr_pv_eoi_en) {
+    if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) {
         kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0);
     }
-    if (has_msr_kvm_steal_time) {
+    if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) {
         kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0);
     }
     if (has_msr_architectural_pmu) {
@@ -2080,10 +2108,10 @@ static int kvm_get_msrs(X86CPU *cpu)
         kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0);
         kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0);
     }
-    if (has_msr_hv_vapic) {
+    if (cpu->hyperv_vapic) {
         kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0);
     }
-    if (has_msr_hv_tsc) {
+    if (cpu->hyperv_time) {
         kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0);
     }
     if (has_msr_hv_crash) {
@@ -2115,7 +2143,7 @@ static int kvm_get_msrs(X86CPU *cpu)
             kvm_msr_entry_add(cpu, msr, 0);
         }
     }
-    if (has_msr_mtrr) {
+    if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
         kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0);
         kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0);
         kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0);
@@ -2416,19 +2444,6 @@ static int kvm_get_apic(X86CPU *cpu)
     return 0;
 }
 
-static int kvm_put_apic(X86CPU *cpu)
-{
-    DeviceState *apic = cpu->apic_state;
-    struct kvm_lapic_state kapic;
-
-    if (apic && kvm_irqchip_in_kernel()) {
-        kvm_put_apic_state(apic, &kapic);
-
-        return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_LAPIC, &kapic);
-    }
-    return 0;
-}
-
 static int kvm_put_vcpu_events(X86CPU *cpu, int level)
 {
     CPUState *cs = CPU(cpu);
@@ -2455,6 +2470,7 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
     events.nmi.pad = 0;
 
     events.sipi_vector = env->sipi_vector;
+    events.flags = 0;
 
     if (has_msr_smbase) {
         events.smi.smm = !!(env->hflags & HF_SMM_MASK);
@@ -2474,7 +2490,6 @@ static int kvm_put_vcpu_events(X86CPU *cpu, int level)
         events.flags |= KVM_VCPUEVENT_VALID_SMM;
     }
 
-    events.flags = 0;
     if (level >= KVM_PUT_RESET_STATE) {
         events.flags |=
             KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR;
@@ -2670,10 +2685,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
         if (ret < 0) {
             return ret;
         }
-        ret = kvm_put_apic(x86_cpu);
-        if (ret < 0) {
-            return ret;
-        }
     }
 
     ret = kvm_put_tscdeadline_msr(x86_cpu);
@@ -2844,7 +2855,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
     if (run->flags & KVM_RUN_X86_SMM) {
         env->hflags |= HF_SMM_MASK;
     } else {
-        env->hflags &= HF_SMM_MASK;
+        env->hflags &= ~HF_SMM_MASK;
     }
     if (run->if_flag) {
         env->eflags |= IF_MASK;
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index 42b00af1b1..76079295b2 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -41,4 +41,8 @@ int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, uint32_t vector,
 int kvm_device_msix_assign(KVMState *s, uint32_t dev_id);
 int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id);
 
+void kvm_put_apicbase(X86CPU *cpu, uint64_t value);
+
+bool kvm_enable_x2apic(void);
+bool kvm_has_x2apic_api(void);
 #endif
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 71c0e4dc47..760f82b6c7 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -7,10 +7,7 @@
 #include "hw/i386/pc.h"
 #include "hw/isa/isa.h"
 #include "migration/cpu.h"
-#include "exec/exec-all.h"
 
-#include "cpu.h"
-#include "exec/exec-all.h"
 #include "sysemu/kvm.h"
 
 #include "qemu/error-report.h"
@@ -712,6 +709,10 @@ static bool hyperv_runtime_enable_needed(void *opaque)
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;
 
+    if (!cpu->hyperv_runtime) {
+        return false;
+    }
+
     return env->msr_hv_runtime != 0;
 }
 
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 5bc0594dfa..70f67668ab 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -22,87 +22,146 @@
 #include "exec/helper-proto.h"
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
 
-/* broken thread support */
-
-#if defined(CONFIG_USER_ONLY)
-QemuMutex global_cpu_lock;
-
-void helper_lock(void)
+void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0)
 {
-    qemu_mutex_lock(&global_cpu_lock);
-}
+    uintptr_t ra = GETPC();
+    uint64_t oldv, cmpv, newv;
+    int eflags;
 
-void helper_unlock(void)
-{
-    qemu_mutex_unlock(&global_cpu_lock);
-}
+    eflags = cpu_cc_compute_all(env, CC_OP);
 
-void helper_lock_init(void)
-{
-    qemu_mutex_init(&global_cpu_lock);
-}
-#else
-void helper_lock(void)
-{
-}
+    cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+    newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
 
-void helper_unlock(void)
-{
-}
+    oldv = cpu_ldq_data_ra(env, a0, ra);
+    newv = (cmpv == oldv ? newv : oldv);
+    /* always do the store */
+    cpu_stq_data_ra(env, a0, newv, ra);
 
-void helper_lock_init(void)
-{
+    if (oldv == cmpv) {
+        eflags |= CC_Z;
+    } else {
+        env->regs[R_EAX] = (uint32_t)oldv;
+        env->regs[R_EDX] = (uint32_t)(oldv >> 32);
+        eflags &= ~CC_Z;
+    }
+    CC_SRC = eflags;
 }
-#endif
 
 void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
 {
-    uint64_t d;
+#ifdef CONFIG_ATOMIC64
+    uint64_t oldv, cmpv, newv;
     int eflags;
 
     eflags = cpu_cc_compute_all(env, CC_OP);
-    d = cpu_ldq_data_ra(env, a0, GETPC());
-    if (d == (((uint64_t)env->regs[R_EDX] << 32) | (uint32_t)env->regs[R_EAX])) {
-        cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
-                                  | (uint32_t)env->regs[R_EBX], GETPC());
+
+    cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+    newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+#ifdef CONFIG_USER_ONLY
+    {
+        uint64_t *haddr = g2h(a0);
+        cmpv = cpu_to_le64(cmpv);
+        newv = cpu_to_le64(newv);
+        oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv);
+        oldv = le64_to_cpu(oldv);
+    }
+#else
+    {
+        uintptr_t ra = GETPC();
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+        oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+    }
+#endif
+
+    if (oldv == cmpv) {
         eflags |= CC_Z;
     } else {
-        /* always do the store */
-        cpu_stq_data_ra(env, a0, d, GETPC());
-        env->regs[R_EDX] = (uint32_t)(d >> 32);
-        env->regs[R_EAX] = (uint32_t)d;
+        env->regs[R_EAX] = (uint32_t)oldv;
+        env->regs[R_EDX] = (uint32_t)(oldv >> 32);
         eflags &= ~CC_Z;
     }
     CC_SRC = eflags;
+#else
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+#endif /* CONFIG_ATOMIC64 */
 }
 
 #ifdef TARGET_X86_64
-void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0)
 {
-    uint64_t d0, d1;
+    uintptr_t ra = GETPC();
+    Int128 oldv, cmpv, newv;
+    uint64_t o0, o1;
     int eflags;
+    bool success;
 
     if ((a0 & 0xf) != 0) {
         raise_exception_ra(env, EXCP0D_GPF, GETPC());
     }
     eflags = cpu_cc_compute_all(env, CC_OP);
-    d0 = cpu_ldq_data_ra(env, a0, GETPC());
-    d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
-    if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
-        cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
-        cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
+
+    cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+    newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+    o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
+    o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
+
+    oldv = int128_make128(o0, o1);
+    success = int128_eq(oldv, cmpv);
+    if (!success) {
+        newv = oldv;
+    }
+
+    cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
+    cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
+
+    if (success) {
         eflags |= CC_Z;
     } else {
-        /* always do the store */
-        cpu_stq_data_ra(env, a0, d0, GETPC());
-        cpu_stq_data_ra(env, a0 + 8, d1, GETPC());
-        env->regs[R_EDX] = d1;
-        env->regs[R_EAX] = d0;
+        env->regs[R_EAX] = int128_getlo(oldv);
+        env->regs[R_EDX] = int128_gethi(oldv);
         eflags &= ~CC_Z;
     }
     CC_SRC = eflags;
 }
+
+void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
+{
+    uintptr_t ra = GETPC();
+
+    if ((a0 & 0xf) != 0) {
+        raise_exception_ra(env, EXCP0D_GPF, ra);
+    } else {
+#ifndef CONFIG_ATOMIC128
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+        int eflags = cpu_cc_compute_all(env, CC_OP);
+
+        Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+        Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+        int mem_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+        Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv,
+                                                    newv, oi, ra);
+
+        if (int128_eq(oldv, cmpv)) {
+            eflags |= CC_Z;
+        } else {
+            env->regs[R_EAX] = int128_getlo(oldv);
+            env->regs[R_EDX] = int128_gethi(oldv);
+            eflags &= ~CC_Z;
+        }
+        CC_SRC = eflags;
+#endif
+    }
+}
 #endif
 
 void helper_boundw(CPUX86State *env, target_ulong a0, int v)
diff --git a/target-i386/monitor.c b/target-i386/monitor.c
index fccfe40ab7..9a3b4d746e 100644
--- a/target-i386/monitor.c
+++ b/target-i386/monitor.c
@@ -504,7 +504,8 @@ void hmp_info_local_apic(Monitor *mon, const QDict *qdict)
 
 void hmp_info_io_apic(Monitor *mon, const QDict *qdict)
 {
-    if (kvm_irqchip_in_kernel()) {
+    if (kvm_irqchip_in_kernel() &&
+        !kvm_irqchip_is_split()) {
         kvm_ioapic_dump_state(mon, qdict);
     } else {
         ioapic_dump_state(mon, qdict);
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 6cbdf17426..fb79f3180d 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -1137,25 +1137,27 @@ static void do_interrupt_real(CPUX86State *env, int intno, int is_int,
 static void do_interrupt_user(CPUX86State *env, int intno, int is_int,
                               int error_code, target_ulong next_eip)
 {
-    SegmentCache *dt;
-    target_ulong ptr;
-    int dpl, cpl, shift;
-    uint32_t e2;
+    if (is_int) {
+        SegmentCache *dt;
+        target_ulong ptr;
+        int dpl, cpl, shift;
+        uint32_t e2;
 
-    dt = &env->idt;
-    if (env->hflags & HF_LMA_MASK) {
-        shift = 4;
-    } else {
-        shift = 3;
-    }
-    ptr = dt->base + (intno << shift);
-    e2 = cpu_ldl_kernel(env, ptr + 4);
+        dt = &env->idt;
+        if (env->hflags & HF_LMA_MASK) {
+            shift = 4;
+        } else {
+            shift = 3;
+        }
+        ptr = dt->base + (intno << shift);
+        e2 = cpu_ldl_kernel(env, ptr + 4);
 
-    dpl = (e2 >> DESC_DPL_SHIFT) & 3;
-    cpl = env->hflags & HF_CPL_MASK;
-    /* check privilege if software int */
-    if (is_int && dpl < cpl) {
-        raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2);
+        dpl = (e2 >> DESC_DPL_SHIFT) & 3;
+        cpl = env->hflags & HF_CPL_MASK;
+        /* check privilege if software int */
+        if (dpl < cpl) {
+            raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2);
+        }
     }
 
     /* Since we emulate only user space, we cannot do more than
diff --git a/target-i386/translate.c b/target-i386/translate.c
index fa2ac48173..324103c885 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -457,13 +457,12 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0,
 #endif
     case MO_32:
         /* 32 bit address */
+        if (ovr_seg < 0 && s->addseg) {
+            ovr_seg = def_seg;
+        }
         if (ovr_seg < 0) {
-            if (s->addseg) {
-                ovr_seg = def_seg;
-            } else {
-                tcg_gen_ext32u_tl(cpu_A0, a0);
-                return;
-            }
+            tcg_gen_ext32u_tl(cpu_A0, a0);
+            return;
         }
         break;
     case MO_16:
@@ -1258,55 +1257,95 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 {
     if (d != OR_TMP0) {
         gen_op_mov_v_reg(ot, cpu_T0, d);
-    } else {
+    } else if (!(s1->prefix & PREFIX_LOCK)) {
         gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
     }
     switch(op) {
     case OP_ADCL:
         gen_compute_eflags_c(s1, cpu_tmp4);
-        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_add_tl(cpu_T0, cpu_tmp4, cpu_T1);
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                                        s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_ADCB + ot);
         break;
     case OP_SBBL:
         gen_compute_eflags_c(s1, cpu_tmp4);
-        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_add_tl(cpu_T0, cpu_T1, cpu_tmp4);
+            tcg_gen_neg_tl(cpu_T0, cpu_T0);
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                                        s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_tmp4);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update3_cc(cpu_tmp4);
         set_cc_op(s1, CC_OP_SBBB + ot);
         break;
     case OP_ADDL:
-        tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+                                        s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_ADDB + ot);
         break;
     case OP_SUBL:
-        tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
-        tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_neg_tl(cpu_T0, cpu_T1);
+            tcg_gen_atomic_fetch_add_tl(cpu_cc_srcT, cpu_A0, cpu_T0,
+                                        s1->mem_index, ot | MO_LE);
+            tcg_gen_sub_tl(cpu_T0, cpu_cc_srcT, cpu_T1);
+        } else {
+            tcg_gen_mov_tl(cpu_cc_srcT, cpu_T0);
+            tcg_gen_sub_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update2_cc();
         set_cc_op(s1, CC_OP_SUBB + ot);
         break;
     default:
     case OP_ANDL:
-        tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_atomic_and_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+                                        s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_ORL:
-        tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_atomic_or_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+                                       s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
     case OP_XORL:
-        tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
-        gen_op_st_rm_T0_A0(s1, ot, d);
+        if (s1->prefix & PREFIX_LOCK) {
+            tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T1,
+                                        s1->mem_index, ot | MO_LE);
+        } else {
+            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            gen_op_st_rm_T0_A0(s1, ot, d);
+        }
         gen_op_update1_cc();
         set_cc_op(s1, CC_OP_LOGICB + ot);
         break;
@@ -1322,21 +1361,23 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
 /* if d == OR_TMP0, it means memory operand (address in A0) */
 static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
 {
-    if (d != OR_TMP0) {
-        gen_op_mov_v_reg(ot, cpu_T0, d);
+    if (s1->prefix & PREFIX_LOCK) {
+        tcg_gen_movi_tl(cpu_T0, c > 0 ? 1 : -1);
+        tcg_gen_atomic_add_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                                    s1->mem_index, ot | MO_LE);
     } else {
-        gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+        if (d != OR_TMP0) {
+            gen_op_mov_v_reg(ot, cpu_T0, d);
+        } else {
+            gen_op_ld_v(s1, ot, cpu_T0, cpu_A0);
+        }
+        tcg_gen_addi_tl(cpu_T0, cpu_T0, (c > 0 ? 1 : -1));
+        gen_op_st_rm_T0_A0(s1, ot, d);
     }
+
     gen_compute_eflags_c(s1, cpu_cc_src);
-    if (c > 0) {
-        tcg_gen_addi_tl(cpu_T0, cpu_T0, 1);
-        set_cc_op(s1, CC_OP_INCB + ot);
-    } else {
-        tcg_gen_addi_tl(cpu_T0, cpu_T0, -1);
-        set_cc_op(s1, CC_OP_DECB + ot);
-    }
-    gen_op_st_rm_T0_A0(s1, ot, d);
     tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+    set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
 }
 
 static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result,
@@ -2391,11 +2432,13 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
 
     if (qemu_loglevel_mask(LOG_UNIMP)) {
         target_ulong pc = s->pc_start, end = s->pc;
+        qemu_log_lock();
         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
         for (; pc < end; ++pc) {
             qemu_log(" %02x", cpu_ldub_code(env, pc));
         }
         qemu_log("\n");
+        qemu_log_unlock();
     }
 }
 
@@ -4495,10 +4538,6 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     s->aflag = aflag;
     s->dflag = dflag;
 
-    /* lock generation */
-    if (prefixes & PREFIX_LOCK)
-        gen_helper_lock();
-
     /* now check op code */
  reswitch:
     switch(b) {
@@ -4633,10 +4672,15 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
         if (mod != 3) {
-            if (op == 0)
+            if (op == 0) {
                 s->rip_offset = insn_const_size(ot);
+            }
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            /* For those below that handle locked memory, don't load here.  */
+            if (!(s->prefix & PREFIX_LOCK)
+                || op != 2) {
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
@@ -4649,19 +4693,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             set_cc_op(s, CC_OP_LOGICB + ot);
             break;
         case 2: /* not */
-            tcg_gen_not_tl(cpu_T0, cpu_T0);
-            if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            if (s->prefix & PREFIX_LOCK) {
+                if (mod == 3) {
+                    goto illegal_op;
+                }
+                tcg_gen_movi_tl(cpu_T0, ~0);
+                tcg_gen_atomic_xor_fetch_tl(cpu_T0, cpu_A0, cpu_T0,
+                                            s->mem_index, ot | MO_LE);
             } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+                tcg_gen_not_tl(cpu_T0, cpu_T0);
+                if (mod != 3) {
+                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                } else {
+                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                }
             }
             break;
         case 3: /* neg */
-            tcg_gen_neg_tl(cpu_T0, cpu_T0);
-            if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            if (s->prefix & PREFIX_LOCK) {
+                TCGLabel *label1;
+                TCGv a0, t0, t1, t2;
+
+                if (mod == 3) {
+                    goto illegal_op;
+                }
+                a0 = tcg_temp_local_new();
+                t0 = tcg_temp_local_new();
+                label1 = gen_new_label();
+
+                tcg_gen_mov_tl(a0, cpu_A0);
+                tcg_gen_mov_tl(t0, cpu_T0);
+
+                gen_set_label(label1);
+                t1 = tcg_temp_new();
+                t2 = tcg_temp_new();
+                tcg_gen_mov_tl(t2, t0);
+                tcg_gen_neg_tl(t1, t0);
+                tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
+                                          s->mem_index, ot | MO_LE);
+                tcg_temp_free(t1);
+                tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
+
+                tcg_temp_free(t2);
+                tcg_temp_free(a0);
+                tcg_gen_mov_tl(cpu_T0, t0);
+                tcg_temp_free(t0);
             } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+                tcg_gen_neg_tl(cpu_T0, cpu_T0);
+                if (mod != 3) {
+                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                } else {
+                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                }
             }
             gen_op_update_neg_cc();
             set_cc_op(s, CC_OP_SUBB + ot);
@@ -5049,19 +5132,24 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
+        gen_op_mov_v_reg(ot, cpu_T0, reg);
         if (mod == 3) {
             rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
             gen_op_mov_v_reg(ot, cpu_T1, rm);
             tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
             gen_op_mov_reg_v(ot, rm, cpu_T0);
         } else {
             gen_lea_modrm(env, s, modrm);
-            gen_op_mov_v_reg(ot, cpu_T0, reg);
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-            tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            if (s->prefix & PREFIX_LOCK) {
+                tcg_gen_atomic_fetch_add_tl(cpu_T1, cpu_A0, cpu_T0,
+                                            s->mem_index, ot | MO_LE);
+                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+            } else {
+                gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
+                tcg_gen_add_tl(cpu_T0, cpu_T0, cpu_T1);
+                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+            }
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
         gen_op_update2_cc();
@@ -5070,57 +5158,58 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     case 0x1b0:
     case 0x1b1: /* cmpxchg Ev, Gv */
         {
-            TCGLabel *label1, *label2;
-            TCGv t0, t1, t2, a0;
+            TCGv oldv, newv, cmpv;
 
             ot = mo_b_d(b, dflag);
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
-            t0 = tcg_temp_local_new();
-            t1 = tcg_temp_local_new();
-            t2 = tcg_temp_local_new();
-            a0 = tcg_temp_local_new();
-            gen_op_mov_v_reg(ot, t1, reg);
-            if (mod == 3) {
-                rm = (modrm & 7) | REX_B(s);
-                gen_op_mov_v_reg(ot, t0, rm);
-            } else {
+            oldv = tcg_temp_new();
+            newv = tcg_temp_new();
+            cmpv = tcg_temp_new();
+            gen_op_mov_v_reg(ot, newv, reg);
+            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
+
+            if (s->prefix & PREFIX_LOCK) {
+                if (mod == 3) {
+                    goto illegal_op;
+                }
                 gen_lea_modrm(env, s, modrm);
-                tcg_gen_mov_tl(a0, cpu_A0);
-                gen_op_ld_v(s, ot, t0, a0);
-                rm = 0; /* avoid warning */
-            }
-            label1 = gen_new_label();
-            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
-            gen_extu(ot, t0);
-            gen_extu(ot, t2);
-            tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
-            label2 = gen_new_label();
-            if (mod == 3) {
-                gen_op_mov_reg_v(ot, R_EAX, t0);
-                tcg_gen_br(label2);
-                gen_set_label(label1);
-                gen_op_mov_reg_v(ot, rm, t1);
+                tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+                                          s->mem_index, ot | MO_LE);
+                gen_op_mov_reg_v(ot, R_EAX, oldv);
             } else {
-                /* perform no-op store cycle like physical cpu; must be
-                   before changing accumulator to ensure idempotency if
-                   the store faults and the instruction is restarted */
-                gen_op_st_v(s, ot, t0, a0);
-                gen_op_mov_reg_v(ot, R_EAX, t0);
-                tcg_gen_br(label2);
-                gen_set_label(label1);
-                gen_op_st_v(s, ot, t1, a0);
+                if (mod == 3) {
+                    rm = (modrm & 7) | REX_B(s);
+                    gen_op_mov_v_reg(ot, oldv, rm);
+                } else {
+                    gen_lea_modrm(env, s, modrm);
+                    gen_op_ld_v(s, ot, oldv, cpu_A0);
+                    rm = 0; /* avoid warning */
+                }
+                gen_extu(ot, oldv);
+                gen_extu(ot, cmpv);
+                /* store value = (old == cmp ? new : old);  */
+                tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+                if (mod == 3) {
+                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                    gen_op_mov_reg_v(ot, rm, newv);
+                } else {
+                    /* Perform an unconditional store cycle like physical cpu;
+                       must be before changing accumulator to ensure
+                       idempotency if the store faults and the instruction
+                       is restarted */
+                    gen_op_st_v(s, ot, newv, cpu_A0);
+                    gen_op_mov_reg_v(ot, R_EAX, oldv);
+                }
             }
-            gen_set_label(label2);
-            tcg_gen_mov_tl(cpu_cc_src, t0);
-            tcg_gen_mov_tl(cpu_cc_srcT, t2);
-            tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
+            tcg_gen_mov_tl(cpu_cc_src, oldv);
+            tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
             set_cc_op(s, CC_OP_SUBB + ot);
-            tcg_temp_free(t0);
-            tcg_temp_free(t1);
-            tcg_temp_free(t2);
-            tcg_temp_free(a0);
+            tcg_temp_free(oldv);
+            tcg_temp_free(newv);
+            tcg_temp_free(cmpv);
         }
         break;
     case 0x1c7: /* cmpxchg8b */
@@ -5133,14 +5222,22 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
+                gen_helper_cmpxchg16b(cpu_env, cpu_A0);
+            } else {
+                gen_helper_cmpxchg16b_unlocked(cpu_env, cpu_A0);
+            }
         } else
 #endif        
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_lea_modrm(env, s, modrm);
-            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+            if ((s->prefix & PREFIX_LOCK) && parallel_cpus) {
+                gen_helper_cmpxchg8b(cpu_env, cpu_A0);
+            } else {
+                gen_helper_cmpxchg8b_unlocked(cpu_env, cpu_A0);
+            }
         }
         set_cc_op(s, CC_OP_EFLAGS);
         break;
@@ -5372,7 +5469,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         {
             AddressParts a = gen_lea_modrm_0(env, s, modrm);
             TCGv ea = gen_lea_modrm_1(a);
-            gen_op_mov_reg_v(dflag, reg, ea);
+            gen_lea_v_seg(s, s->aflag, ea, -1, -1);
+            gen_op_mov_reg_v(dflag, reg, cpu_A0);
         }
         break;
 
@@ -5464,12 +5562,8 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
             gen_lea_modrm(env, s, modrm);
             gen_op_mov_v_reg(ot, cpu_T0, reg);
             /* for xchg, lock is implicit */
-            if (!(prefixes & PREFIX_LOCK))
-                gen_helper_lock();
-            gen_op_ld_v(s, ot, cpu_T1, cpu_A0);
-            gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-            if (!(prefixes & PREFIX_LOCK))
-                gen_helper_unlock();
+            tcg_gen_atomic_xchg_tl(cpu_T1, cpu_A0, cpu_T0,
+                                   s->mem_index, ot | MO_LE);
             gen_op_mov_reg_v(ot, reg, cpu_T1);
         }
         break;
@@ -6555,7 +6649,9 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         if (mod != 3) {
             s->rip_offset = 1;
             gen_lea_modrm(env, s, modrm);
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            if (!(s->prefix & PREFIX_LOCK)) {
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
@@ -6585,44 +6681,69 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T1, reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            AddressParts a = gen_lea_modrm_0(env, s, modrm);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T1);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T1, 3 + ot);
             tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, ot);
-            tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
-            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            tcg_gen_add_tl(cpu_A0, gen_lea_modrm_1(a), cpu_tmp0);
+            gen_lea_v_seg(s, s->aflag, cpu_A0, a.def_seg, s->override);
+            if (!(s->prefix & PREFIX_LOCK)) {
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            }
         } else {
             gen_op_mov_v_reg(ot, cpu_T0, rm);
         }
     bt_op:
         tcg_gen_andi_tl(cpu_T1, cpu_T1, (1 << (3 + ot)) - 1);
-        tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
-        switch(op) {
-        case 0:
-            break;
-        case 1:
-            tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
-            break;
-        case 2:
-            tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-            tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
-            break;
-        default:
-        case 3:
-            tcg_gen_movi_tl(cpu_tmp0, 1);
-            tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
-            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
-            break;
-        }
-        if (op != 0) {
-            if (mod != 3) {
-                gen_op_st_v(s, ot, cpu_T0, cpu_A0);
-            } else {
-                gen_op_mov_reg_v(ot, rm, cpu_T0);
+        tcg_gen_movi_tl(cpu_tmp0, 1);
+        tcg_gen_shl_tl(cpu_tmp0, cpu_tmp0, cpu_T1);
+        if (s->prefix & PREFIX_LOCK) {
+            switch (op) {
+            case 0: /* bt */
+                /* Needs no atomic ops; we surpressed the normal
+                   memory load for LOCK above so do it now.  */
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+                break;
+            case 1: /* bts */
+                tcg_gen_atomic_fetch_or_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                                           s->mem_index, ot | MO_LE);
+                break;
+            case 2: /* btr */
+                tcg_gen_not_tl(cpu_tmp0, cpu_tmp0);
+                tcg_gen_atomic_fetch_and_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                                            s->mem_index, ot | MO_LE);
+                break;
+            default:
+            case 3: /* btc */
+                tcg_gen_atomic_fetch_xor_tl(cpu_T0, cpu_A0, cpu_tmp0,
+                                            s->mem_index, ot | MO_LE);
+                break;
+            }
+            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+        } else {
+            tcg_gen_shr_tl(cpu_tmp4, cpu_T0, cpu_T1);
+            switch (op) {
+            case 0: /* bt */
+                /* Data already loaded; nothing to do.  */
+                break;
+            case 1: /* bts */
+                tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                break;
+            case 2: /* btr */
+                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                break;
+            default:
+            case 3: /* btc */
+                tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_tmp0);
+                break;
+            }
+            if (op != 0) {
+                if (mod != 3) {
+                    gen_op_st_v(s, ot, cpu_T0, cpu_A0);
+                } else {
+                    gen_op_mov_reg_v(ot, rm, cpu_T0);
+                }
             }
         }
 
@@ -8012,13 +8133,21 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
                 || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
             }
+            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
             break;
         case 0xe8 ... 0xef: /* lfence */
+            if (!(s->cpuid_features & CPUID_SSE)
+                || (prefixes & PREFIX_LOCK)) {
+                goto illegal_op;
+            }
+            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
+            break;
         case 0xf0 ... 0xf7: /* mfence */
             if (!(s->cpuid_features & CPUID_SSE2)
                 || (prefixes & PREFIX_LOCK)) {
                 goto illegal_op;
             }
+            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
             break;
 
         default:
@@ -8080,20 +8209,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
     default:
         goto unknown_op;
     }
-    /* lock generation */
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock();
     return s->pc;
  illegal_op:
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock();
-    /* XXX: ensure that no lock was generated */
     gen_illegal_opcode(s);
     return s->pc;
  unknown_op:
-    if (s->prefix & PREFIX_LOCK)
-        gen_helper_unlock();
-    /* XXX: ensure that no lock was generated */
     gen_unknown_opcode(env, s);
     return s->pc;
 }
@@ -8185,8 +8305,6 @@ void tcg_x86_init(void)
                                      offsetof(CPUX86State, bnd_regs[i].ub),
                                      bnd_regu_names[i]);
     }
-
-    helper_lock_init();
 }
 
 /* generate intermediate code for basic block 'tb'.  */
@@ -8354,6 +8472,7 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
         int disas_flags;
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
 #ifdef TARGET_X86_64
@@ -8364,6 +8483,7 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
             disas_flags = !dc->code32;
         log_target_disas(cs, pc_start, pc_ptr - pc_start, disas_flags);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 
diff --git a/target-lm32/cpu.c b/target-lm32/cpu.c
index a783d461dd..8d939a7779 100644
--- a/target-lm32/cpu.c
+++ b/target-lm32/cpu.c
@@ -144,6 +144,13 @@ static void lm32_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     LM32CPUClass *lcc = LM32_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cpu_reset(cs);
 
@@ -160,7 +167,6 @@ static void lm32_cpu_initfn(Object *obj)
     static bool tcg_initialized;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     env->flags = 0;
 
@@ -285,13 +291,6 @@ static void lm32_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_stop_before_watchpoint = true;
     cc->debug_excp_handler = lm32_debug_excp_handler;
     cc->disas_set_info = lm32_cpu_disas_set_info;
-
-    /*
-     * Reason: lm32_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void lm32_register_cpu_type(const LM32CPUInfo *info)
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 2d8caebbfc..692882f447 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -33,12 +33,14 @@
 #include "exec/log.h"
 
 
-#define DISAS_LM32 1
-#if DISAS_LM32
-#  define LOG_DIS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
-#else
-#  define LOG_DIS(...) do { } while (0)
-#endif
+#define DISAS_LM32 0
+
+#define LOG_DIS(...) \
+    do { \
+        if (DISAS_LM32) { \
+            qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__); \
+        } \
+    } while (0)
 
 #define EXTRACT_FIELD(src, start, end) \
             (((src) >> start) & ((1 << (end - start + 1)) - 1))
@@ -211,7 +213,7 @@ static void dec_and(DisasContext *dc)
 
 static void dec_andhi(DisasContext *dc)
 {
-    LOG_DIS("andhi r%d, r%d, %d\n", dc->r2, dc->r0, dc->imm16);
+    LOG_DIS("andhi r%d, r%d, %d\n", dc->r1, dc->r0, dc->imm16);
 
     tcg_gen_andi_tl(cpu_R[dc->r1], cpu_R[dc->r0], (dc->imm16 << 16));
 }
@@ -274,7 +276,7 @@ static inline void gen_cond_branch(DisasContext *dc, int cond)
 
 static void dec_be(DisasContext *dc)
 {
-    LOG_DIS("be r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("be r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16) * 4);
 
     gen_cond_branch(dc, TCG_COND_EQ);
@@ -282,7 +284,7 @@ static void dec_be(DisasContext *dc)
 
 static void dec_bg(DisasContext *dc)
 {
-    LOG_DIS("bg r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("bg r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16 * 4));
 
     gen_cond_branch(dc, TCG_COND_GT);
@@ -290,7 +292,7 @@ static void dec_bg(DisasContext *dc)
 
 static void dec_bge(DisasContext *dc)
 {
-    LOG_DIS("bge r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("bge r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16) * 4);
 
     gen_cond_branch(dc, TCG_COND_GE);
@@ -298,7 +300,7 @@ static void dec_bge(DisasContext *dc)
 
 static void dec_bgeu(DisasContext *dc)
 {
-    LOG_DIS("bgeu r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("bgeu r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16) * 4);
 
     gen_cond_branch(dc, TCG_COND_GEU);
@@ -306,7 +308,7 @@ static void dec_bgeu(DisasContext *dc)
 
 static void dec_bgu(DisasContext *dc)
 {
-    LOG_DIS("bgu r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("bgu r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16) * 4);
 
     gen_cond_branch(dc, TCG_COND_GTU);
@@ -314,7 +316,7 @@ static void dec_bgu(DisasContext *dc)
 
 static void dec_bne(DisasContext *dc)
 {
-    LOG_DIS("bne r%d, r%d, %d\n", dc->r0, dc->r1,
+    LOG_DIS("bne r%d, r%d, %d\n", dc->r1, dc->r0,
             sign_extend(dc->imm16, 16) * 4);
 
     gen_cond_branch(dc, TCG_COND_NE);
@@ -342,9 +344,6 @@ static void dec_calli(DisasContext *dc)
 
 static inline void gen_compare(DisasContext *dc, int cond)
 {
-    int rX = (dc->format == OP_FMT_RR) ? dc->r2 : dc->r1;
-    int rY = (dc->format == OP_FMT_RR) ? dc->r0 : dc->r0;
-    int rZ = (dc->format == OP_FMT_RR) ? dc->r1 : -1;
     int i;
 
     if (dc->format == OP_FMT_RI) {
@@ -358,16 +357,16 @@ static inline void gen_compare(DisasContext *dc, int cond)
             break;
         }
 
-        tcg_gen_setcondi_tl(cond, cpu_R[rX], cpu_R[rY], i);
+        tcg_gen_setcondi_tl(cond, cpu_R[dc->r1], cpu_R[dc->r0], i);
     } else {
-        tcg_gen_setcond_tl(cond, cpu_R[rX], cpu_R[rY], cpu_R[rZ]);
+        tcg_gen_setcond_tl(cond, cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
     }
 }
 
 static void dec_cmpe(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpei r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpei r%d, r%d, %d\n", dc->r1, dc->r0,
                 sign_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpe r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -379,7 +378,7 @@ static void dec_cmpe(DisasContext *dc)
 static void dec_cmpg(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgi r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpgi r%d, r%d, %d\n", dc->r1, dc->r0,
                 sign_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpg r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -391,7 +390,7 @@ static void dec_cmpg(DisasContext *dc)
 static void dec_cmpge(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgei r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpgei r%d, r%d, %d\n", dc->r1, dc->r0,
                 sign_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpge r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -403,7 +402,7 @@ static void dec_cmpge(DisasContext *dc)
 static void dec_cmpgeu(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgeui r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpgeui r%d, r%d, %d\n", dc->r1, dc->r0,
                 zero_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpgeu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -415,7 +414,7 @@ static void dec_cmpgeu(DisasContext *dc)
 static void dec_cmpgu(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpgui r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpgui r%d, r%d, %d\n", dc->r1, dc->r0,
                 zero_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpgu r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -427,7 +426,7 @@ static void dec_cmpgu(DisasContext *dc)
 static void dec_cmpne(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("cmpnei r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("cmpnei r%d, r%d, %d\n", dc->r1, dc->r0,
                 sign_extend(dc->imm16, 16));
     } else {
         LOG_DIS("cmpne r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -539,7 +538,7 @@ static void dec_modu(DisasContext *dc)
 static void dec_mul(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("muli r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("muli r%d, r%d, %d\n", dc->r1, dc->r0,
                 sign_extend(dc->imm16, 16));
     } else {
         LOG_DIS("mul r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -563,7 +562,7 @@ static void dec_mul(DisasContext *dc)
 static void dec_nor(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("nori r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("nori r%d, r%d, %d\n", dc->r1, dc->r0,
                 zero_extend(dc->imm16, 16));
     } else {
         LOG_DIS("nor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -865,7 +864,7 @@ static void dec_wcsr(DisasContext *dc)
 {
     int no;
 
-    LOG_DIS("wcsr r%d, %d\n", dc->r1, dc->csr);
+    LOG_DIS("wcsr %d, r%d\n", dc->csr, dc->r1);
 
     switch (dc->csr) {
     case CSR_IE:
@@ -959,7 +958,7 @@ static void dec_wcsr(DisasContext *dc)
 static void dec_xnor(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("xnori r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("xnori r%d, r%d, %d\n", dc->r1, dc->r0,
                 zero_extend(dc->imm16, 16));
     } else {
         if (dc->r1 == R_R0) {
@@ -981,7 +980,7 @@ static void dec_xnor(DisasContext *dc)
 static void dec_xor(DisasContext *dc)
 {
     if (dc->format == OP_FMT_RI) {
-        LOG_DIS("xori r%d, r%d, %d\n", dc->r0, dc->r1,
+        LOG_DIS("xori r%d, r%d, %d\n", dc->r1, dc->r0,
                 zero_extend(dc->imm16, 16));
     } else {
         LOG_DIS("xor r%d, r%d, r%d\n", dc->r2, dc->r0, dc->r1);
@@ -1149,10 +1148,12 @@ void gen_intermediate_code(CPULM32State *env, struct TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("\n");
         log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
         qemu_log("\nisize=%d osize=%d\n",
                  dc->pc - pc_start, tcg_op_buf_count());
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index 116b784e63..ba17480098 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -58,15 +58,20 @@ static void m68k_cpu_reset(CPUState *s)
 #endif
     m68k_switch_sp(env);
     /* ??? FP regs should be initialized to NaN.  */
-    env->cc_op = CC_OP_FLAGS;
+    cpu_m68k_set_ccr(env, 0);
     /* TODO: We should set PC from the interrupt vector.  */
     env->pc = 0;
     tlb_flush(s, 1);
 }
 
-static void m68k_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
+static void m68k_cpu_disas_set_info(CPUState *s, disassemble_info *info)
 {
+    M68kCPU *cpu = M68K_CPU(s);
+    CPUM68KState *env = &cpu->env;
     info->print_insn = print_insn_m68k;
+    if (m68k_feature(env, M68K_FEATURE_M68000)) {
+        info->mach = bfd_mach_m68040;
+    }
 }
 
 /* CPU models */
@@ -98,6 +103,57 @@ static void m5206_cpu_initfn(Object *obj)
     m68k_set_feature(env, M68K_FEATURE_CF_ISA_A);
 }
 
+static void m68000_cpu_initfn(Object *obj)
+{
+    M68kCPU *cpu = M68K_CPU(obj);
+    CPUM68KState *env = &cpu->env;
+
+    m68k_set_feature(env, M68K_FEATURE_M68000);
+    m68k_set_feature(env, M68K_FEATURE_USP);
+    m68k_set_feature(env, M68K_FEATURE_WORD_INDEX);
+}
+
+static void m68020_cpu_initfn(Object *obj)
+{
+    M68kCPU *cpu = M68K_CPU(obj);
+    CPUM68KState *env = &cpu->env;
+
+    m68k_set_feature(env, M68K_FEATURE_M68000);
+    m68k_set_feature(env, M68K_FEATURE_USP);
+    m68k_set_feature(env, M68K_FEATURE_WORD_INDEX);
+    m68k_set_feature(env, M68K_FEATURE_QUAD_MULDIV);
+    m68k_set_feature(env, M68K_FEATURE_BRAL);
+    m68k_set_feature(env, M68K_FEATURE_BCCL);
+    m68k_set_feature(env, M68K_FEATURE_BITFIELD);
+    m68k_set_feature(env, M68K_FEATURE_EXT_FULL);
+    m68k_set_feature(env, M68K_FEATURE_SCALED_INDEX);
+    m68k_set_feature(env, M68K_FEATURE_LONG_MULDIV);
+    m68k_set_feature(env, M68K_FEATURE_FPU);
+    m68k_set_feature(env, M68K_FEATURE_CAS);
+    m68k_set_feature(env, M68K_FEATURE_BKPT);
+}
+#define m68030_cpu_initfn m68020_cpu_initfn
+#define m68040_cpu_initfn m68020_cpu_initfn
+
+static void m68060_cpu_initfn(Object *obj)
+{
+    M68kCPU *cpu = M68K_CPU(obj);
+    CPUM68KState *env = &cpu->env;
+
+    m68k_set_feature(env, M68K_FEATURE_M68000);
+    m68k_set_feature(env, M68K_FEATURE_USP);
+    m68k_set_feature(env, M68K_FEATURE_WORD_INDEX);
+    m68k_set_feature(env, M68K_FEATURE_BRAL);
+    m68k_set_feature(env, M68K_FEATURE_BCCL);
+    m68k_set_feature(env, M68K_FEATURE_BITFIELD);
+    m68k_set_feature(env, M68K_FEATURE_EXT_FULL);
+    m68k_set_feature(env, M68K_FEATURE_SCALED_INDEX);
+    m68k_set_feature(env, M68K_FEATURE_LONG_MULDIV);
+    m68k_set_feature(env, M68K_FEATURE_FPU);
+    m68k_set_feature(env, M68K_FEATURE_CAS);
+    m68k_set_feature(env, M68K_FEATURE_BKPT);
+}
+
 static void m5208_cpu_initfn(Object *obj)
 {
     M68kCPU *cpu = M68K_CPU(obj);
@@ -148,6 +204,11 @@ typedef struct M68kCPUInfo {
 } M68kCPUInfo;
 
 static const M68kCPUInfo m68k_cpus[] = {
+    { .name = "m68000", .instance_init = m68000_cpu_initfn },
+    { .name = "m68020", .instance_init = m68020_cpu_initfn },
+    { .name = "m68030", .instance_init = m68030_cpu_initfn },
+    { .name = "m68040", .instance_init = m68040_cpu_initfn },
+    { .name = "m68060", .instance_init = m68060_cpu_initfn },
     { .name = "m5206", .instance_init = m5206_cpu_initfn },
     { .name = "m5208", .instance_init = m5208_cpu_initfn },
     { .name = "cfv4e", .instance_init = cfv4e_cpu_initfn },
@@ -159,6 +220,13 @@ static void m68k_cpu_realizefn(DeviceState *dev, Error **errp)
     CPUState *cs = CPU(dev);
     M68kCPU *cpu = M68K_CPU(dev);
     M68kCPUClass *mcc = M68K_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     m68k_cpu_init_gdb(cpu);
 
@@ -176,7 +244,6 @@ static void m68k_cpu_initfn(Object *obj)
     static bool inited;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled() && !inited) {
         inited = true;
@@ -214,21 +281,12 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
 #else
     cc->get_phys_page_debug = m68k_cpu_get_phys_page_debug;
 #endif
-    cc->cpu_exec_enter = m68k_cpu_exec_enter;
-    cc->cpu_exec_exit = m68k_cpu_exec_exit;
     cc->disas_set_info = m68k_cpu_disas_set_info;
 
     cc->gdb_num_core_regs = 18;
     cc->gdb_core_xml_file = "cf-core.xml";
 
     dc->vmsd = &vmstate_m68k_cpu;
-
-    /*
-     * Reason: m68k_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void register_cpu_type(const M68kCPUInfo *info)
diff --git a/target-m68k/cpu.h b/target-m68k/cpu.h
index b2faa6b605..6dfb54eb70 100644
--- a/target-m68k/cpu.h
+++ b/target-m68k/cpu.h
@@ -30,6 +30,14 @@
 #include "cpu-qom.h"
 #include "fpu/softfloat.h"
 
+#define OS_BYTE     0
+#define OS_WORD     1
+#define OS_LONG     2
+#define OS_SINGLE   3
+#define OS_DOUBLE   4
+#define OS_EXTENDED 5
+#define OS_PACKED   6
+
 #define MAX_QREGS 32
 
 #define EXCP_ACCESS         2   /* Access (MMU) error.  */
@@ -53,6 +61,7 @@
 #define EXCP_HALT_INSN      0x101
 
 #define NB_MMU_MODES 2
+#define TARGET_INSN_START_EXTRA_WORDS 1
 
 typedef struct CPUM68KState {
     uint32_t dregs[8];
@@ -66,9 +75,11 @@ typedef struct CPUM68KState {
 
     /* Condition flags.  */
     uint32_t cc_op;
-    uint32_t cc_dest;
-    uint32_t cc_src;
-    uint32_t cc_x;
+    uint32_t cc_x; /* always 0/1 */
+    uint32_t cc_n; /* in bit 31 (i.e. negative) */
+    uint32_t cc_v; /* in bit 31, unused, or computed from cc_n and cc_v */
+    uint32_t cc_c; /* either 0/1, unused, or computed from cc_n and cc_v */
+    uint32_t cc_z; /* == 0 or unused */
 
     float64 fregs[8];
     float64 fp_result;
@@ -141,9 +152,6 @@ hwaddr m68k_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int m68k_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int m68k_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
 
-void m68k_cpu_exec_enter(CPUState *cs);
-void m68k_cpu_exec_exit(CPUState *cs);
-
 void m68k_tcg_init(void);
 void m68k_cpu_init_gdb(M68kCPU *cpu);
 M68kCPU *cpu_m68k_init(const char *cpu_model);
@@ -152,20 +160,36 @@ M68kCPU *cpu_m68k_init(const char *cpu_model);
    is returned if the signal was handled by the virtual CPU.  */
 int cpu_m68k_signal_handler(int host_signum, void *pinfo,
                            void *puc);
-void cpu_m68k_flush_flags(CPUM68KState *, int);
-
-enum {
-    CC_OP_DYNAMIC, /* Use env->cc_op  */
-    CC_OP_FLAGS, /* CC_DEST = CVZN, CC_SRC = unused */
-    CC_OP_LOGIC, /* CC_DEST = result, CC_SRC = unused */
-    CC_OP_ADD,   /* CC_DEST = result, CC_SRC = source */
-    CC_OP_SUB,   /* CC_DEST = result, CC_SRC = source */
-    CC_OP_CMPB,  /* CC_DEST = result, CC_SRC = source */
-    CC_OP_CMPW,  /* CC_DEST = result, CC_SRC = source */
-    CC_OP_ADDX,  /* CC_DEST = result, CC_SRC = source */
-    CC_OP_SUBX,  /* CC_DEST = result, CC_SRC = source */
-    CC_OP_SHIFT, /* CC_DEST = result, CC_SRC = carry */
-};
+uint32_t cpu_m68k_get_ccr(CPUM68KState *env);
+void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t);
+
+
+/* Instead of computing the condition codes after each m68k instruction,
+ * QEMU just stores one operand (called CC_SRC), the result
+ * (called CC_DEST) and the type of operation (called CC_OP). When the
+ * condition codes are needed, the condition codes can be calculated
+ * using this information. Condition codes are not generated if they
+ * are only needed for conditional branches.
+ */
+typedef enum {
+    /* Translator only -- use env->cc_op.  */
+    CC_OP_DYNAMIC = -1,
+
+    /* Each flag bit computed into cc_[xcnvz].  */
+    CC_OP_FLAGS,
+
+    /* X in cc_x, C = X, N in cc_n, Z in cc_n, V via cc_n/cc_v.  */
+    CC_OP_ADDB, CC_OP_ADDW, CC_OP_ADDL,
+    CC_OP_SUBB, CC_OP_SUBW, CC_OP_SUBL,
+
+    /* X in cc_x, {N,Z,C,V} via cc_n/cc_v.  */
+    CC_OP_CMPB, CC_OP_CMPW, CC_OP_CMPL,
+
+    /* X in cc_x, C = 0, V = 0, N in cc_n, Z in cc_n.  */
+    CC_OP_LOGIC,
+
+    CC_OP_NB
+} CCOp;
 
 #define CCF_C 0x01
 #define CCF_V 0x02
@@ -196,7 +220,6 @@ enum {
 #define MACSR_EV    0x001
 
 void m68k_set_irq_level(M68kCPU *cpu, int level, uint8_t vector);
-void m68k_set_macsr(CPUM68KState *env, uint32_t val);
 void m68k_switch_sp(CPUM68KState *env);
 
 #define M68K_FPCR_PREC (1 << 6)
@@ -208,6 +231,7 @@ void do_m68k_semihosting(CPUM68KState *env, int nr);
    ISA revisions mentioned.  */
 
 enum m68k_features {
+    M68K_FEATURE_M68000,
     M68K_FEATURE_CF_ISA_A,
     M68K_FEATURE_CF_ISA_B, /* (ISA B or C).  */
     M68K_FEATURE_CF_ISA_APLUSC, /* BIT/BITREV, FF1, STRLDSR (ISA A+ or C).  */
@@ -218,7 +242,15 @@ enum m68k_features {
     M68K_FEATURE_CF_EMAC_B, /* Revision B EMAC (dual accumulate).  */
     M68K_FEATURE_USP, /* User Stack Pointer.  (ISA A+, B or C).  */
     M68K_FEATURE_EXT_FULL, /* 68020+ full extension word.  */
-    M68K_FEATURE_WORD_INDEX /* word sized address index registers.  */
+    M68K_FEATURE_WORD_INDEX, /* word sized address index registers.  */
+    M68K_FEATURE_SCALED_INDEX, /* scaled address index registers.  */
+    M68K_FEATURE_LONG_MULDIV, /* 32 bit multiply/divide. */
+    M68K_FEATURE_QUAD_MULDIV, /* 64 bit multiply/divide. */
+    M68K_FEATURE_BCCL, /* Long conditional branches.  */
+    M68K_FEATURE_BITFIELD, /* Bit field insns.  */
+    M68K_FEATURE_FPU,
+    M68K_FEATURE_CAS,
+    M68K_FEATURE_BKPT,
 };
 
 static inline int m68k_feature(CPUM68KState *env, int feature)
@@ -231,8 +263,11 @@ void m68k_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 void register_m68k_insns (CPUM68KState *env);
 
 #ifdef CONFIG_USER_ONLY
-/* Linux uses 8k pages.  */
-#define TARGET_PAGE_BITS 13
+/* Coldfire Linux uses 8k pages
+ * and m68k linux uses 4k pages
+ * use the smaller one
+ */
+#define TARGET_PAGE_BITS 12
 #else
 /* Smallest TLB entry size is 1k.  */
 #define TARGET_PAGE_BITS 10
diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index f52d0e3036..7aed9ffd2f 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -132,87 +132,6 @@ void m68k_cpu_init_gdb(M68kCPU *cpu)
     /* TODO: Add [E]MAC registers.  */
 }
 
-void cpu_m68k_flush_flags(CPUM68KState *env, int cc_op)
-{
-    M68kCPU *cpu = m68k_env_get_cpu(env);
-    int flags;
-    uint32_t src;
-    uint32_t dest;
-    uint32_t tmp;
-
-#define HIGHBIT 0x80000000u
-
-#define SET_NZ(x) do { \
-    if ((x) == 0) \
-        flags |= CCF_Z; \
-    else if ((int32_t)(x) < 0) \
-        flags |= CCF_N; \
-    } while (0)
-
-#define SET_FLAGS_SUB(type, utype) do { \
-    SET_NZ((type)dest); \
-    tmp = dest + src; \
-    if ((utype) tmp < (utype) src) \
-        flags |= CCF_C; \
-    if ((1u << (sizeof(type) * 8 - 1)) & (tmp ^ dest) & (tmp ^ src)) \
-        flags |= CCF_V; \
-    } while (0)
-
-    flags = 0;
-    src = env->cc_src;
-    dest = env->cc_dest;
-    switch (cc_op) {
-    case CC_OP_FLAGS:
-        flags = dest;
-        break;
-    case CC_OP_LOGIC:
-        SET_NZ(dest);
-        break;
-    case CC_OP_ADD:
-        SET_NZ(dest);
-        if (dest < src)
-            flags |= CCF_C;
-        tmp = dest - src;
-        if (HIGHBIT & (src ^ dest) & ~(tmp ^ src))
-            flags |= CCF_V;
-        break;
-    case CC_OP_SUB:
-        SET_FLAGS_SUB(int32_t, uint32_t);
-        break;
-    case CC_OP_CMPB:
-        SET_FLAGS_SUB(int8_t, uint8_t);
-        break;
-    case CC_OP_CMPW:
-        SET_FLAGS_SUB(int16_t, uint16_t);
-        break;
-    case CC_OP_ADDX:
-        SET_NZ(dest);
-        if (dest <= src)
-            flags |= CCF_C;
-        tmp = dest - src - 1;
-        if (HIGHBIT & (src ^ dest) & ~(tmp ^ src))
-            flags |= CCF_V;
-        break;
-    case CC_OP_SUBX:
-        SET_NZ(dest);
-        tmp = dest + src + 1;
-        if (tmp <= src)
-            flags |= CCF_C;
-        if (HIGHBIT & (tmp ^ dest) & (tmp ^ src))
-            flags |= CCF_V;
-        break;
-    case CC_OP_SHIFT:
-        SET_NZ(dest);
-        if (src)
-            flags |= CCF_C;
-        break;
-    default:
-        cpu_abort(CPU(cpu), "Bad CC_OP %d", cc_op);
-    }
-    env->cc_op = CC_OP_FLAGS;
-    env->cc_dest = flags;
-}
-
 void HELPER(movec)(CPUM68KState *env, uint32_t reg, uint32_t val)
 {
     M68kCPU *cpu = m68k_env_get_cpu(env);
@@ -349,140 +268,71 @@ uint32_t HELPER(ff1)(uint32_t x)
     return n;
 }
 
-uint32_t HELPER(sats)(uint32_t val, uint32_t ccr)
+uint32_t HELPER(sats)(uint32_t val, uint32_t v)
 {
     /* The result has the opposite sign to the original value.  */
-    if (ccr & CCF_V)
+    if ((int32_t)v < 0) {
         val = (((int32_t)val) >> 31) ^ SIGNBIT;
-    return val;
-}
-
-uint32_t HELPER(subx_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
-{
-    uint32_t res;
-    uint32_t old_flags;
-
-    old_flags = env->cc_dest;
-    if (env->cc_x) {
-        env->cc_x = (op1 <= op2);
-        env->cc_op = CC_OP_SUBX;
-        res = op1 - (op2 + 1);
-    } else {
-        env->cc_x = (op1 < op2);
-        env->cc_op = CC_OP_SUB;
-        res = op1 - op2;
-    }
-    env->cc_dest = res;
-    env->cc_src = op2;
-    cpu_m68k_flush_flags(env, env->cc_op);
-    /* !Z is sticky.  */
-    env->cc_dest &= (old_flags | ~CCF_Z);
-    return res;
-}
-
-uint32_t HELPER(addx_cc)(CPUM68KState *env, uint32_t op1, uint32_t op2)
-{
-    uint32_t res;
-    uint32_t old_flags;
-
-    old_flags = env->cc_dest;
-    if (env->cc_x) {
-        res = op1 + op2 + 1;
-        env->cc_x = (res <= op2);
-        env->cc_op = CC_OP_ADDX;
-    } else {
-        res = op1 + op2;
-        env->cc_x = (res < op2);
-        env->cc_op = CC_OP_ADD;
     }
-    env->cc_dest = res;
-    env->cc_src = op2;
-    cpu_m68k_flush_flags(env, env->cc_op);
-    /* !Z is sticky.  */
-    env->cc_dest &= (old_flags | ~CCF_Z);
-    return res;
-}
-
-uint32_t HELPER(xflag_lt)(uint32_t a, uint32_t b)
-{
-    return a < b;
+    return val;
 }
 
 void HELPER(set_sr)(CPUM68KState *env, uint32_t val)
 {
-    env->sr = val & 0xffff;
+    env->sr = val & 0xffe0;
+    cpu_m68k_set_ccr(env, val);
     m68k_switch_sp(env);
 }
 
 uint32_t HELPER(shl_cc)(CPUM68KState *env, uint32_t val, uint32_t shift)
 {
-    uint32_t result;
-    uint32_t cf;
+    uint64_t result;
 
     shift &= 63;
-    if (shift == 0) {
-        result = val;
-        cf = env->cc_src & CCF_C;
-    } else if (shift < 32) {
-        result = val << shift;
-        cf = (val >> (32 - shift)) & 1;
-    } else if (shift == 32) {
-        result = 0;
-        cf = val & 1;
-    } else /* shift > 32 */ {
-        result = 0;
-        cf = 0;
-    }
-    env->cc_src = cf;
-    env->cc_x = (cf != 0);
-    env->cc_dest = result;
+    result = (uint64_t)val << shift;
+
+    env->cc_c = (result >> 32) & 1;
+    env->cc_n = result;
+    env->cc_z = result;
+    env->cc_v = 0;
+    env->cc_x = shift ? env->cc_c : env->cc_x;
+
     return result;
 }
 
 uint32_t HELPER(shr_cc)(CPUM68KState *env, uint32_t val, uint32_t shift)
 {
+    uint64_t temp;
     uint32_t result;
-    uint32_t cf;
 
     shift &= 63;
-    if (shift == 0) {
-        result = val;
-        cf = env->cc_src & CCF_C;
-    } else if (shift < 32) {
-        result = val >> shift;
-        cf = (val >> (shift - 1)) & 1;
-    } else if (shift == 32) {
-        result = 0;
-        cf = val >> 31;
-    } else /* shift > 32 */ {
-        result = 0;
-        cf = 0;
-    }
-    env->cc_src = cf;
-    env->cc_x = (cf != 0);
-    env->cc_dest = result;
+    temp = (uint64_t)val << 32 >> shift;
+    result = temp >> 32;
+
+    env->cc_c = (temp >> 31) & 1;
+    env->cc_n = result;
+    env->cc_z = result;
+    env->cc_v = 0;
+    env->cc_x = shift ? env->cc_c : env->cc_x;
+
     return result;
 }
 
 uint32_t HELPER(sar_cc)(CPUM68KState *env, uint32_t val, uint32_t shift)
 {
+    uint64_t temp;
     uint32_t result;
-    uint32_t cf;
 
     shift &= 63;
-    if (shift == 0) {
-        result = val;
-        cf = (env->cc_src & CCF_C) != 0;
-    } else if (shift < 32) {
-        result = (int32_t)val >> shift;
-        cf = (val >> (shift - 1)) & 1;
-    } else /* shift >= 32 */ {
-        result = (int32_t)val >> 31;
-        cf = val >> 31;
-    }
-    env->cc_src = cf;
-    env->cc_x = cf;
-    env->cc_dest = result;
+    temp = (int64_t)val << 32 >> shift;
+    result = temp >> 32;
+
+    env->cc_c = (temp >> 31) & 1;
+    env->cc_n = result;
+    env->cc_z = result;
+    env->cc_v = result ^ val;
+    env->cc_x = shift ? env->cc_c : env->cc_x;
+
     return result;
 }
 
@@ -734,9 +584,101 @@ void HELPER(mac_set_flags)(CPUM68KState *env, uint32_t acc)
     }
 }
 
+#define EXTSIGN(val, index) (     \
+    (index == 0) ? (int8_t)(val) : ((index == 1) ? (int16_t)(val) : (val)) \
+)
+
+#define COMPUTE_CCR(op, x, n, z, v, c) {                                   \
+    switch (op) {                                                          \
+    case CC_OP_FLAGS:                                                      \
+        /* Everything in place.  */                                        \
+        break;                                                             \
+    case CC_OP_ADDB:                                                       \
+    case CC_OP_ADDW:                                                       \
+    case CC_OP_ADDL:                                                       \
+        res = n;                                                           \
+        src2 = v;                                                          \
+        src1 = EXTSIGN(res - src2, op - CC_OP_ADDB);                       \
+        c = x;                                                             \
+        z = n;                                                             \
+        v = (res ^ src1) & ~(src1 ^ src2);                                 \
+        break;                                                             \
+    case CC_OP_SUBB:                                                       \
+    case CC_OP_SUBW:                                                       \
+    case CC_OP_SUBL:                                                       \
+        res = n;                                                           \
+        src2 = v;                                                          \
+        src1 = EXTSIGN(res + src2, op - CC_OP_SUBB);                       \
+        c = x;                                                             \
+        z = n;                                                             \
+        v = (res ^ src1) & (src1 ^ src2);                                  \
+        break;                                                             \
+    case CC_OP_CMPB:                                                       \
+    case CC_OP_CMPW:                                                       \
+    case CC_OP_CMPL:                                                       \
+        src1 = n;                                                          \
+        src2 = v;                                                          \
+        res = EXTSIGN(src1 - src2, op - CC_OP_CMPB);                       \
+        n = res;                                                           \
+        z = res;                                                           \
+        c = src1 < src2;                                                   \
+        v = (res ^ src1) & (src1 ^ src2);                                  \
+        break;                                                             \
+    case CC_OP_LOGIC:                                                      \
+        c = v = 0;                                                         \
+        z = n;                                                             \
+        break;                                                             \
+    default:                                                               \
+        cpu_abort(CPU(m68k_env_get_cpu(env)), "Bad CC_OP %d", op);         \
+    }                                                                      \
+} while (0)
+
+uint32_t cpu_m68k_get_ccr(CPUM68KState *env)
+{
+    uint32_t x, c, n, z, v;
+    uint32_t res, src1, src2;
+
+    x = env->cc_x;
+    n = env->cc_n;
+    z = env->cc_z;
+    v = env->cc_v;
+    c = env->cc_c;
+
+    COMPUTE_CCR(env->cc_op, x, n, z, v, c);
+
+    n = n >> 31;
+    z = (z == 0);
+    v = v >> 31;
+
+    return x * CCF_X + n * CCF_N + z * CCF_Z + v * CCF_V + c * CCF_C;
+}
+
+uint32_t HELPER(get_ccr)(CPUM68KState *env)
+{
+    return cpu_m68k_get_ccr(env);
+}
+
+void cpu_m68k_set_ccr(CPUM68KState *env, uint32_t ccr)
+{
+    env->cc_x = (ccr & CCF_X ? 1 : 0);
+    env->cc_n = (ccr & CCF_N ? -1 : 0);
+    env->cc_z = (ccr & CCF_Z ? 0 : 1);
+    env->cc_v = (ccr & CCF_V ? -1 : 0);
+    env->cc_c = (ccr & CCF_C ? 1 : 0);
+    env->cc_op = CC_OP_FLAGS;
+}
+
+void HELPER(set_ccr)(CPUM68KState *env, uint32_t ccr)
+{
+    cpu_m68k_set_ccr(env, ccr);
+}
+
 void HELPER(flush_flags)(CPUM68KState *env, uint32_t cc_op)
 {
-    cpu_m68k_flush_flags(env, cc_op);
+    uint32_t res, src1, src2;
+
+    COMPUTE_CCR(cc_op, env->cc_x, env->cc_n, env->cc_z, env->cc_v, env->cc_c);
+    env->cc_op = CC_OP_FLAGS;
 }
 
 uint32_t HELPER(get_macf)(CPUM68KState *env, uint64_t val)
@@ -812,7 +754,7 @@ uint32_t HELPER(get_mac_extf)(CPUM68KState *env, uint32_t acc)
 {
     uint32_t val;
     val = env->macc[acc] & 0x00ff;
-    val = (env->macc[acc] >> 32) & 0xff00;
+    val |= (env->macc[acc] >> 32) & 0xff00;
     val |= (env->macc[acc + 1] << 16) & 0x00ff0000;
     val |= (env->macc[acc + 1] >> 16) & 0xff000000;
     return val;
@@ -866,23 +808,3 @@ void HELPER(set_mac_extu)(CPUM68KState *env, uint32_t val, uint32_t acc)
     res |= (uint64_t)(val & 0xffff0000) << 16;
     env->macc[acc + 1] = res;
 }
-
-void m68k_cpu_exec_enter(CPUState *cs)
-{
-    M68kCPU *cpu = M68K_CPU(cs);
-    CPUM68KState *env = &cpu->env;
-
-    env->cc_op = CC_OP_FLAGS;
-    env->cc_dest = env->sr & 0xf;
-    env->cc_x = (env->sr >> 4) & 1;
-}
-
-void m68k_cpu_exec_exit(CPUState *cs)
-{
-    M68kCPU *cpu = M68K_CPU(cs);
-    CPUM68KState *env = &cpu->env;
-
-    cpu_m68k_flush_flags(env, env->cc_op);
-    env->cc_op = CC_OP_FLAGS;
-    env->sr = (env->sr & 0xffe0) | env->cc_dest | (env->cc_x << 4);
-}
diff --git a/target-m68k/helper.h b/target-m68k/helper.h
index f4e5fdf021..2697e32d0b 100644
--- a/target-m68k/helper.h
+++ b/target-m68k/helper.h
@@ -1,14 +1,11 @@
 DEF_HELPER_1(bitrev, i32, i32)
 DEF_HELPER_1(ff1, i32, i32)
-DEF_HELPER_2(sats, i32, i32, i32)
+DEF_HELPER_FLAGS_2(sats, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_2(divu, void, env, i32)
 DEF_HELPER_2(divs, void, env, i32)
-DEF_HELPER_3(addx_cc, i32, env, i32, i32)
-DEF_HELPER_3(subx_cc, i32, env, i32, i32)
 DEF_HELPER_3(shl_cc, i32, env, i32, i32)
 DEF_HELPER_3(shr_cc, i32, env, i32, i32)
 DEF_HELPER_3(sar_cc, i32, env, i32, i32)
-DEF_HELPER_2(xflag_lt, i32, i32, i32)
 DEF_HELPER_2(set_sr, void, env, i32)
 DEF_HELPER_3(movec, void, env, i32, i32)
 
@@ -47,4 +44,6 @@ DEF_HELPER_3(set_mac_exts, void, env, i32, i32)
 DEF_HELPER_3(set_mac_extu, void, env, i32, i32)
 
 DEF_HELPER_2(flush_flags, void, env, i32)
+DEF_HELPER_2(set_ccr, void, env, i32)
+DEF_HELPER_FLAGS_1(get_ccr, TCG_CALL_NO_WG_SE, i32, env)
 DEF_HELPER_2(raise_exception, void, env, i32)
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index e41ae46498..48e02e4062 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -63,9 +63,9 @@ static void do_rte(CPUM68KState *env)
     fmt = cpu_ldl_kernel(env, sp);
     env->pc = cpu_ldl_kernel(env, sp + 4);
     sp |= (fmt >> 28) & 3;
-    env->sr = fmt & 0xffff;
     env->aregs[7] = sp + 8;
-    m68k_switch_sp(env);
+
+    helper_set_sr(env, fmt);
 }
 
 static void do_interrupt_all(CPUM68KState *env, int is_hw)
@@ -112,6 +112,7 @@ static void do_interrupt_all(CPUM68KState *env, int is_hw)
     fmt |= 0x40000000;
     fmt |= vector << 16;
     fmt |= env->sr;
+    fmt |= cpu_m68k_get_ccr(env);
 
     env->sr |= SR_S;
     if (is_hw) {
@@ -184,7 +185,6 @@ void HELPER(divu)(CPUM68KState *env, uint32_t word)
     uint32_t den;
     uint32_t quot;
     uint32_t rem;
-    uint32_t flags;
 
     num = env->div1;
     den = env->div2;
@@ -194,16 +194,14 @@ void HELPER(divu)(CPUM68KState *env, uint32_t word)
     }
     quot = num / den;
     rem = num % den;
-    flags = 0;
-    if (word && quot > 0xffff)
-        flags |= CCF_V;
-    if (quot == 0)
-        flags |= CCF_Z;
-    else if ((int32_t)quot < 0)
-        flags |= CCF_N;
+
+    env->cc_v = (word && quot > 0xffff ? -1 : 0);
+    env->cc_z = quot;
+    env->cc_n = quot;
+    env->cc_c = 0;
+
     env->div1 = quot;
     env->div2 = rem;
-    env->cc_dest = flags;
 }
 
 void HELPER(divs)(CPUM68KState *env, uint32_t word)
@@ -212,7 +210,6 @@ void HELPER(divs)(CPUM68KState *env, uint32_t word)
     int32_t den;
     int32_t quot;
     int32_t rem;
-    int32_t flags;
 
     num = env->div1;
     den = env->div2;
@@ -221,14 +218,12 @@ void HELPER(divs)(CPUM68KState *env, uint32_t word)
     }
     quot = num / den;
     rem = num % den;
-    flags = 0;
-    if (word && quot != (int16_t)quot)
-        flags |= CCF_V;
-    if (quot == 0)
-        flags |= CCF_Z;
-    else if (quot < 0)
-        flags |= CCF_N;
+
+    env->cc_v = (word && quot != (int16_t)quot ? -1 : 0);
+    env->cc_z = quot;
+    env->cc_n = quot;
+    env->cc_c = 0;
+
     env->div1 = quot;
     env->div2 = rem;
-    env->cc_dest = flags;
 }
diff --git a/target-m68k/qregs.def b/target-m68k/qregs.def
index 204663e1aa..156c0f558f 100644
--- a/target-m68k/qregs.def
+++ b/target-m68k/qregs.def
@@ -2,9 +2,11 @@ DEFF64(FP_RESULT, fp_result)
 DEFO32(PC, pc)
 DEFO32(SR, sr)
 DEFO32(CC_OP, cc_op)
-DEFO32(CC_DEST, cc_dest)
-DEFO32(CC_SRC, cc_src)
 DEFO32(CC_X, cc_x)
+DEFO32(CC_C, cc_c)
+DEFO32(CC_N, cc_n)
+DEFO32(CC_V, cc_v)
+DEFO32(CC_Z, cc_z)
 DEFO32(DIV1, div1)
 DEFO32(DIV2, div2)
 DEFO32(MACSR, macsr)
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index ecd5e5c8fd..d6ed883882 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -59,9 +59,10 @@ static TCGv cpu_aregs[8];
 static TCGv_i64 cpu_fregs[8];
 static TCGv_i64 cpu_macc[4];
 
-#define DREG(insn, pos) cpu_dregs[((insn) >> (pos)) & 7]
-#define AREG(insn, pos) cpu_aregs[((insn) >> (pos)) & 7]
-#define FREG(insn, pos) cpu_fregs[((insn) >> (pos)) & 7]
+#define REG(insn, pos) (((insn) >> (pos)) & 7)
+#define DREG(insn, pos) cpu_dregs[REG(insn, pos)]
+#define AREG(insn, pos) cpu_aregs[REG(insn, pos)]
+#define FREG(insn, pos) cpu_fregs[REG(insn, pos)]
 #define MACREG(acc) cpu_macc[acc]
 #define QREG_SP cpu_aregs[7]
 
@@ -132,7 +133,8 @@ typedef struct DisasContext {
     target_ulong insn_pc; /* Start of the current instruction.  */
     target_ulong pc;
     int is_jmp;
-    int cc_op;
+    CCOp cc_op; /* Current CC operation */
+    int cc_op_synced;
     int user;
     uint32_t fpcr;
     struct TranslationBlock *tb;
@@ -154,12 +156,6 @@ typedef struct DisasContext {
 static void *gen_throws_exception;
 #define gen_last_qop NULL
 
-#define OS_BYTE 0
-#define OS_WORD 1
-#define OS_LONG 2
-#define OS_SINGLE 4
-#define OS_DOUBLE 5
-
 typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn);
 
 #ifdef DEBUG_DISPATCH
@@ -170,7 +166,7 @@ typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn);
                              uint16_t insn)                             \
     {                                                                   \
         qemu_log("Dispatch " #name "\n");                               \
-        real_disas_##name(s, env, insn);                                \
+        real_disas_##name(env, s, insn);                                \
     }                                                                   \
     static void real_disas_##name(CPUM68KState *env, DisasContext *s,   \
                                   uint16_t insn)
@@ -180,6 +176,48 @@ typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn);
                              uint16_t insn)
 #endif
 
+static const uint8_t cc_op_live[CC_OP_NB] = {
+    [CC_OP_FLAGS] = CCF_C | CCF_V | CCF_Z | CCF_N | CCF_X,
+    [CC_OP_ADDB ... CC_OP_ADDL] = CCF_X | CCF_N | CCF_V,
+    [CC_OP_SUBB ... CC_OP_SUBL] = CCF_X | CCF_N | CCF_V,
+    [CC_OP_CMPB ... CC_OP_CMPL] = CCF_X | CCF_N | CCF_V,
+    [CC_OP_LOGIC] = CCF_X | CCF_N
+};
+
+static void set_cc_op(DisasContext *s, CCOp op)
+{
+    CCOp old_op = s->cc_op;
+    int dead;
+
+    if (old_op == op) {
+        return;
+    }
+    s->cc_op = op;
+    s->cc_op_synced = 0;
+
+    /* Discard CC computation that will no longer be used.
+       Note that X and N are never dead.  */
+    dead = cc_op_live[old_op] & ~cc_op_live[op];
+    if (dead & CCF_C) {
+        tcg_gen_discard_i32(QREG_CC_C);
+    }
+    if (dead & CCF_Z) {
+        tcg_gen_discard_i32(QREG_CC_Z);
+    }
+    if (dead & CCF_V) {
+        tcg_gen_discard_i32(QREG_CC_V);
+    }
+}
+
+/* Update the CPU env CC_OP state.  */
+static void update_cc_op(DisasContext *s)
+{
+    if (!s->cc_op_synced) {
+        s->cc_op_synced = 1;
+        tcg_gen_movi_i32(QREG_CC_OP, s->cc_op);
+    }
+}
+
 /* Generate a load from the specified address.  Narrow values are
    sign extended to full register width.  */
 static inline TCGv gen_load(DisasContext * s, int opsize, TCGv addr, int sign)
@@ -268,14 +306,27 @@ static TCGv gen_ldst(DisasContext *s, int opsize, TCGv addr, TCGv val,
     }
 }
 
+/* Read a 16-bit immediate constant */
+static inline uint16_t read_im16(CPUM68KState *env, DisasContext *s)
+{
+    uint16_t im;
+    im = cpu_lduw_code(env, s->pc);
+    s->pc += 2;
+    return im;
+}
+
+/* Read an 8-bit immediate constant */
+static inline uint8_t read_im8(CPUM68KState *env, DisasContext *s)
+{
+    return read_im16(env, s);
+}
+
 /* Read a 32-bit immediate constant.  */
 static inline uint32_t read_im32(CPUM68KState *env, DisasContext *s)
 {
     uint32_t im;
-    im = ((uint32_t)cpu_lduw_code(env, s->pc)) << 16;
-    s->pc += 2;
-    im |= cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    im = read_im16(env, s) << 16;
+    im |= 0xffff & read_im16(env, s);
     return im;
 }
 
@@ -309,12 +360,16 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base)
     uint32_t bd, od;
 
     offset = s->pc;
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
 
     if ((ext & 0x800) == 0 && !m68k_feature(s->env, M68K_FEATURE_WORD_INDEX))
         return NULL_QREG;
 
+    if (m68k_feature(s->env, M68K_FEATURE_M68000) &&
+        !m68k_feature(s->env, M68K_FEATURE_SCALED_INDEX)) {
+        ext &= ~(3 << 9);
+    }
+
     if (ext & 0x100) {
         /* full extension word format */
         if (!m68k_feature(s->env, M68K_FEATURE_EXT_FULL))
@@ -323,8 +378,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base)
         if ((ext & 0x30) > 0x10) {
             /* base displacement */
             if ((ext & 0x30) == 0x20) {
-                bd = (int16_t)cpu_lduw_code(env, s->pc);
-                s->pc += 2;
+                bd = (int16_t)read_im16(env, s);
             } else {
                 bd = read_im32(env, s);
             }
@@ -372,8 +426,7 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base)
             if ((ext & 3) > 1) {
                 /* outer displacement */
                 if ((ext & 3) == 2) {
-                    od = (int16_t)cpu_lduw_code(env, s->pc);
-                    s->pc += 2;
+                    od = (int16_t)read_im16(env, s);
                 } else {
                     od = read_im32(env, s);
                 }
@@ -401,33 +454,145 @@ static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, TCGv base)
     return add;
 }
 
-/* Update the CPU env CC_OP state.  */
-static inline void gen_flush_cc_op(DisasContext *s)
+/* Sign or zero extend a value.  */
+
+static inline void gen_ext(TCGv res, TCGv val, int opsize, int sign)
 {
-    if (s->cc_op != CC_OP_DYNAMIC)
-        tcg_gen_movi_i32(QREG_CC_OP, s->cc_op);
+    switch (opsize) {
+    case OS_BYTE:
+        if (sign) {
+            tcg_gen_ext8s_i32(res, val);
+        } else {
+            tcg_gen_ext8u_i32(res, val);
+        }
+        break;
+    case OS_WORD:
+        if (sign) {
+            tcg_gen_ext16s_i32(res, val);
+        } else {
+            tcg_gen_ext16u_i32(res, val);
+        }
+        break;
+    case OS_LONG:
+        tcg_gen_mov_i32(res, val);
+        break;
+    default:
+        g_assert_not_reached();
+    }
 }
 
 /* Evaluate all the CC flags.  */
-static inline void gen_flush_flags(DisasContext *s)
+
+static void gen_flush_flags(DisasContext *s)
 {
-    if (s->cc_op == CC_OP_FLAGS)
+    TCGv t0, t1;
+
+    switch (s->cc_op) {
+    case CC_OP_FLAGS:
         return;
-    gen_flush_cc_op(s);
-    gen_helper_flush_flags(cpu_env, QREG_CC_OP);
+
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+        tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X);
+        tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N);
+        /* Compute signed overflow for addition.  */
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        tcg_gen_sub_i32(t0, QREG_CC_N, QREG_CC_V);
+        gen_ext(t0, t0, s->cc_op - CC_OP_ADDB, 1);
+        tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V);
+        tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0);
+        tcg_temp_free(t0);
+        tcg_gen_andc_i32(QREG_CC_V, t1, QREG_CC_V);
+        tcg_temp_free(t1);
+        break;
+
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+        tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X);
+        tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N);
+        /* Compute signed overflow for subtraction.  */
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        tcg_gen_add_i32(t0, QREG_CC_N, QREG_CC_V);
+        gen_ext(t0, t0, s->cc_op - CC_OP_SUBB, 1);
+        tcg_gen_xor_i32(t1, QREG_CC_N, QREG_CC_V);
+        tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, t0);
+        tcg_temp_free(t0);
+        tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t1);
+        tcg_temp_free(t1);
+        break;
+
+    case CC_OP_CMPB:
+    case CC_OP_CMPW:
+    case CC_OP_CMPL:
+        tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_C, QREG_CC_N, QREG_CC_V);
+        tcg_gen_sub_i32(QREG_CC_Z, QREG_CC_N, QREG_CC_V);
+        gen_ext(QREG_CC_Z, QREG_CC_Z, s->cc_op - CC_OP_CMPB, 1);
+        /* Compute signed overflow for subtraction.  */
+        t0 = tcg_temp_new();
+        tcg_gen_xor_i32(t0, QREG_CC_Z, QREG_CC_N);
+        tcg_gen_xor_i32(QREG_CC_V, QREG_CC_V, QREG_CC_N);
+        tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, t0);
+        tcg_temp_free(t0);
+        tcg_gen_mov_i32(QREG_CC_N, QREG_CC_Z);
+        break;
+
+    case CC_OP_LOGIC:
+        tcg_gen_mov_i32(QREG_CC_Z, QREG_CC_N);
+        tcg_gen_movi_i32(QREG_CC_C, 0);
+        tcg_gen_movi_i32(QREG_CC_V, 0);
+        break;
+
+    case CC_OP_DYNAMIC:
+        gen_helper_flush_flags(cpu_env, QREG_CC_OP);
+        break;
+
+    default:
+        t0 = tcg_const_i32(s->cc_op);
+        gen_helper_flush_flags(cpu_env, t0);
+        tcg_temp_free(t0);
+        break;
+    }
+
+    /* Note that flush_flags also assigned to env->cc_op.  */
     s->cc_op = CC_OP_FLAGS;
+    s->cc_op_synced = 1;
+}
+
+static inline TCGv gen_extend(TCGv val, int opsize, int sign)
+{
+    TCGv tmp;
+
+    if (opsize == OS_LONG) {
+        tmp = val;
+    } else {
+        tmp = tcg_temp_new();
+        gen_ext(tmp, val, opsize, sign);
+    }
+
+    return tmp;
 }
 
-static void gen_logic_cc(DisasContext *s, TCGv val)
+static void gen_logic_cc(DisasContext *s, TCGv val, int opsize)
 {
-    tcg_gen_mov_i32(QREG_CC_DEST, val);
-    s->cc_op = CC_OP_LOGIC;
+    gen_ext(QREG_CC_N, val, opsize, 1);
+    set_cc_op(s, CC_OP_LOGIC);
 }
 
-static void gen_update_cc_add(TCGv dest, TCGv src)
+static void gen_update_cc_cmp(DisasContext *s, TCGv dest, TCGv src, int opsize)
 {
-    tcg_gen_mov_i32(QREG_CC_DEST, dest);
-    tcg_gen_mov_i32(QREG_CC_SRC, src);
+    tcg_gen_mov_i32(QREG_CC_N, dest);
+    tcg_gen_mov_i32(QREG_CC_V, src);
+    set_cc_op(s, CC_OP_CMPB + opsize);
+}
+
+static void gen_update_cc_add(TCGv dest, TCGv src, int opsize)
+{
+    gen_ext(QREG_CC_N, dest, opsize, 1);
+    tcg_gen_mov_i32(QREG_CC_V, src);
 }
 
 static inline int opsize_bytes(int opsize)
@@ -438,6 +603,19 @@ static inline int opsize_bytes(int opsize)
     case OS_LONG: return 4;
     case OS_SINGLE: return 4;
     case OS_DOUBLE: return 8;
+    case OS_EXTENDED: return 12;
+    case OS_PACKED: return 12;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static inline int insn_opsize(int insn)
+{
+    switch ((insn >> 6) & 3) {
+    case 0: return OS_BYTE;
+    case 1: return OS_WORD;
+    case 2: return OS_LONG;
     default:
         g_assert_not_reached();
     }
@@ -470,36 +648,6 @@ static void gen_partset_reg(int opsize, TCGv reg, TCGv val)
     }
 }
 
-/* Sign or zero extend a value.  */
-static inline TCGv gen_extend(TCGv val, int opsize, int sign)
-{
-    TCGv tmp;
-
-    switch (opsize) {
-    case OS_BYTE:
-        tmp = tcg_temp_new();
-        if (sign)
-            tcg_gen_ext8s_i32(tmp, val);
-        else
-            tcg_gen_ext8u_i32(tmp, val);
-        break;
-    case OS_WORD:
-        tmp = tcg_temp_new();
-        if (sign)
-            tcg_gen_ext16s_i32(tmp, val);
-        else
-            tcg_gen_ext16u_i32(tmp, val);
-        break;
-    case OS_LONG:
-    case OS_SINGLE:
-        tmp = val;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    return tmp;
-}
-
 /* Generate code for an "effective address".  Does not adjust the base
    register for autoincrement addressing modes.  */
 static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
@@ -525,8 +673,7 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
     case 5: /* Indirect displacement.  */
         reg = AREG(insn, 0);
         tmp = tcg_temp_new();
-        ext = cpu_lduw_code(env, s->pc);
-        s->pc += 2;
+        ext = read_im16(env, s);
         tcg_gen_addi_i32(tmp, reg, (int16_t)ext);
         return tmp;
     case 6: /* Indirect index + displacement.  */
@@ -535,16 +682,14 @@ static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
-            offset = cpu_ldsw_code(env, s->pc);
-            s->pc += 2;
+            offset = (int16_t)read_im16(env, s);
             return tcg_const_i32(offset);
         case 1: /* Absolute long.  */
             offset = read_im32(env, s);
             return tcg_const_i32(offset);
         case 2: /* pc displacement  */
             offset = s->pc;
-            offset += cpu_ldsw_code(env, s->pc);
-            s->pc += 2;
+            offset += (int16_t)read_im16(env, s);
             return tcg_const_i32(offset);
         case 3: /* pc index+displacement.  */
             return gen_lea_indexed(env, s, NULL_QREG);
@@ -651,19 +796,17 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn,
             switch (opsize) {
             case OS_BYTE:
                 if (what == EA_LOADS) {
-                    offset = cpu_ldsb_code(env, s->pc + 1);
+                    offset = (int8_t)read_im8(env, s);
                 } else {
-                    offset = cpu_ldub_code(env, s->pc + 1);
+                    offset = read_im8(env, s);
                 }
-                s->pc += 2;
                 break;
             case OS_WORD:
                 if (what == EA_LOADS) {
-                    offset = cpu_ldsw_code(env, s->pc);
+                    offset = (int16_t)read_im16(env, s);
                 } else {
-                    offset = cpu_lduw_code(env, s->pc);
+                    offset = read_im16(env, s);
                 }
-                s->pc += 2;
                 break;
             case OS_LONG:
                 offset = read_im32(env, s);
@@ -680,131 +823,217 @@ static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn,
     return NULL_QREG;
 }
 
-/* This generates a conditional branch, clobbering all temporaries.  */
-static void gen_jmpcc(DisasContext *s, int cond, TCGLabel *l1)
-{
-    TCGv tmp;
+typedef struct {
+    TCGCond tcond;
+    bool g1;
+    bool g2;
+    TCGv v1;
+    TCGv v2;
+} DisasCompare;
+
+static void gen_cc_cond(DisasCompare *c, DisasContext *s, int cond)
+{
+    TCGv tmp, tmp2;
+    TCGCond tcond;
+    CCOp op = s->cc_op;
+
+    /* The CC_OP_CMP form can handle most normal comparisons directly.  */
+    if (op == CC_OP_CMPB || op == CC_OP_CMPW || op == CC_OP_CMPL) {
+        c->g1 = c->g2 = 1;
+        c->v1 = QREG_CC_N;
+        c->v2 = QREG_CC_V;
+        switch (cond) {
+        case 2: /* HI */
+        case 3: /* LS */
+            tcond = TCG_COND_LEU;
+            goto done;
+        case 4: /* CC */
+        case 5: /* CS */
+            tcond = TCG_COND_LTU;
+            goto done;
+        case 6: /* NE */
+        case 7: /* EQ */
+            tcond = TCG_COND_EQ;
+            goto done;
+        case 10: /* PL */
+        case 11: /* MI */
+            c->g1 = c->g2 = 0;
+            c->v2 = tcg_const_i32(0);
+            c->v1 = tmp = tcg_temp_new();
+            tcg_gen_sub_i32(tmp, QREG_CC_N, QREG_CC_V);
+            gen_ext(tmp, tmp, op - CC_OP_CMPB, 1);
+            /* fallthru */
+        case 12: /* GE */
+        case 13: /* LT */
+            tcond = TCG_COND_LT;
+            goto done;
+        case 14: /* GT */
+        case 15: /* LE */
+            tcond = TCG_COND_LE;
+            goto done;
+        }
+    }
+
+    c->g1 = 1;
+    c->g2 = 0;
+    c->v2 = tcg_const_i32(0);
 
-    /* TODO: Optimize compare/branch pairs rather than always flushing
-       flag state to CC_OP_FLAGS.  */
-    gen_flush_flags(s);
     switch (cond) {
     case 0: /* T */
-        tcg_gen_br(l1);
-        break;
     case 1: /* F */
+        c->v1 = c->v2;
+        tcond = TCG_COND_NEVER;
+        goto done;
+    case 14: /* GT (!(Z || (N ^ V))) */
+    case 15: /* LE (Z || (N ^ V)) */
+        /* Logic operations clear V, which simplifies LE to (Z || N),
+           and since Z and N are co-located, this becomes a normal
+           comparison vs N.  */
+        if (op == CC_OP_LOGIC) {
+            c->v1 = QREG_CC_N;
+            tcond = TCG_COND_LE;
+            goto done;
+        }
         break;
-    case 2: /* HI (!C && !Z) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C | CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
+    case 12: /* GE (!(N ^ V)) */
+    case 13: /* LT (N ^ V) */
+        /* Logic operations clear V, which simplifies this to N.  */
+        if (op != CC_OP_LOGIC) {
+            break;
+        }
+        /* fallthru */
+    case 10: /* PL (!N) */
+    case 11: /* MI (N) */
+        /* Several cases represent N normally.  */
+        if (op == CC_OP_ADDB || op == CC_OP_ADDW || op == CC_OP_ADDL ||
+            op == CC_OP_SUBB || op == CC_OP_SUBW || op == CC_OP_SUBL ||
+            op == CC_OP_LOGIC) {
+            c->v1 = QREG_CC_N;
+            tcond = TCG_COND_LT;
+            goto done;
+        }
         break;
-    case 3: /* LS (C || Z) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C | CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+    case 6: /* NE (!Z) */
+    case 7: /* EQ (Z) */
+        /* Some cases fold Z into N.  */
+        if (op == CC_OP_ADDB || op == CC_OP_ADDW || op == CC_OP_ADDL ||
+            op == CC_OP_SUBB || op == CC_OP_SUBW || op == CC_OP_SUBL ||
+            op == CC_OP_LOGIC) {
+            tcond = TCG_COND_EQ;
+            c->v1 = QREG_CC_N;
+            goto done;
+        }
         break;
     case 4: /* CC (!C) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
+    case 5: /* CS (C) */
+        /* Some cases fold C into X.  */
+        if (op == CC_OP_ADDB || op == CC_OP_ADDW || op == CC_OP_ADDL ||
+            op == CC_OP_ADDB || op == CC_OP_ADDW || op == CC_OP_ADDL) {
+            tcond = TCG_COND_NE;
+            c->v1 = QREG_CC_X;
+            goto done;
+        }
+        /* fallthru */
+    case 8: /* VC (!V) */
+    case 9: /* VS (V) */
+        /* Logic operations clear V and C.  */
+        if (op == CC_OP_LOGIC) {
+            tcond = TCG_COND_NEVER;
+            c->v1 = c->v2;
+            goto done;
+        }
+        break;
+    }
+
+    /* Otherwise, flush flag state to CC_OP_FLAGS.  */
+    gen_flush_flags(s);
+
+    switch (cond) {
+    case 0: /* T */
+    case 1: /* F */
+    default:
+        /* Invalid, or handled above.  */
+        abort();
+    case 2: /* HI (!C && !Z) -> !(C || Z)*/
+    case 3: /* LS (C || Z) */
+        c->v1 = tmp = tcg_temp_new();
+        c->g1 = 0;
+        tcg_gen_setcond_i32(TCG_COND_EQ, tmp, QREG_CC_Z, c->v2);
+        tcg_gen_or_i32(tmp, tmp, QREG_CC_C);
+        tcond = TCG_COND_NE;
         break;
+    case 4: /* CC (!C) */
     case 5: /* CS (C) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_C);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = QREG_CC_C;
+        tcond = TCG_COND_NE;
         break;
     case 6: /* NE (!Z) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
-        break;
     case 7: /* EQ (Z) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = QREG_CC_Z;
+        tcond = TCG_COND_EQ;
         break;
     case 8: /* VC (!V) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_V);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
-        break;
     case 9: /* VS (V) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_V);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = QREG_CC_V;
+        tcond = TCG_COND_LT;
         break;
     case 10: /* PL (!N) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
-        break;
     case 11: /* MI (N) */
-        tmp = tcg_temp_new();
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = QREG_CC_N;
+        tcond = TCG_COND_LT;
         break;
     case 12: /* GE (!(N ^ V)) */
-        tmp = tcg_temp_new();
-        assert(CCF_V == (CCF_N >> 2));
-        tcg_gen_shri_i32(tmp, QREG_CC_DEST, 2);
-        tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST);
-        tcg_gen_andi_i32(tmp, tmp, CCF_V);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
-        break;
     case 13: /* LT (N ^ V) */
-        tmp = tcg_temp_new();
-        assert(CCF_V == (CCF_N >> 2));
-        tcg_gen_shri_i32(tmp, QREG_CC_DEST, 2);
-        tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST);
-        tcg_gen_andi_i32(tmp, tmp, CCF_V);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = tmp = tcg_temp_new();
+        c->g1 = 0;
+        tcg_gen_xor_i32(tmp, QREG_CC_N, QREG_CC_V);
+        tcond = TCG_COND_LT;
         break;
     case 14: /* GT (!(Z || (N ^ V))) */
-        tmp = tcg_temp_new();
-        assert(CCF_V == (CCF_N >> 2));
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N);
-        tcg_gen_shri_i32(tmp, tmp, 2);
-        tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST);
-        tcg_gen_andi_i32(tmp, tmp, CCF_V | CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, l1);
-        break;
     case 15: /* LE (Z || (N ^ V)) */
-        tmp = tcg_temp_new();
-        assert(CCF_V == (CCF_N >> 2));
-        tcg_gen_andi_i32(tmp, QREG_CC_DEST, CCF_N);
-        tcg_gen_shri_i32(tmp, tmp, 2);
-        tcg_gen_xor_i32(tmp, tmp, QREG_CC_DEST);
-        tcg_gen_andi_i32(tmp, tmp, CCF_V | CCF_Z);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, l1);
+        c->v1 = tmp = tcg_temp_new();
+        c->g1 = 0;
+        tcg_gen_setcond_i32(TCG_COND_EQ, tmp, QREG_CC_Z, c->v2);
+        tcg_gen_neg_i32(tmp, tmp);
+        tmp2 = tcg_temp_new();
+        tcg_gen_xor_i32(tmp2, QREG_CC_N, QREG_CC_V);
+        tcg_gen_or_i32(tmp, tmp, tmp2);
+        tcg_temp_free(tmp2);
+        tcond = TCG_COND_LT;
         break;
-    default:
-        /* Should ever happen.  */
-        abort();
     }
+
+ done:
+    if ((cond & 1) == 0) {
+        tcond = tcg_invert_cond(tcond);
+    }
+    c->tcond = tcond;
 }
 
-DISAS_INSN(scc)
+static void free_cond(DisasCompare *c)
 {
-    TCGLabel *l1;
-    int cond;
-    TCGv reg;
+    if (!c->g1) {
+        tcg_temp_free(c->v1);
+    }
+    if (!c->g2) {
+        tcg_temp_free(c->v2);
+    }
+}
 
-    l1 = gen_new_label();
-    cond = (insn >> 8) & 0xf;
-    reg = DREG(insn, 0);
-    tcg_gen_andi_i32(reg, reg, 0xffffff00);
-    /* This is safe because we modify the reg directly, with no other values
-       live.  */
-    gen_jmpcc(s, cond ^ 1, l1);
-    tcg_gen_ori_i32(reg, reg, 0xff);
-    gen_set_label(l1);
+static void gen_jmpcc(DisasContext *s, int cond, TCGLabel *l1)
+{
+  DisasCompare c;
+
+  gen_cc_cond(&c, s, cond);
+  update_cc_op(s);
+  tcg_gen_brcond_i32(c.tcond, c.v1, c.v2, l1);
+  free_cond(&c);
 }
 
 /* Force a TB lookup after an instruction that changes the CPU state.  */
 static void gen_lookup_tb(DisasContext *s)
 {
-    gen_flush_cc_op(s);
+    update_cc_op(s);
     tcg_gen_movi_i32(QREG_PC, s->pc);
     s->is_jmp = DISAS_UPDATE;
 }
@@ -812,7 +1041,7 @@ static void gen_lookup_tb(DisasContext *s)
 /* Generate a jump to an immediate address.  */
 static void gen_jmp_im(DisasContext *s, uint32_t dest)
 {
-    gen_flush_cc_op(s);
+    update_cc_op(s);
     tcg_gen_movi_i32(QREG_PC, dest);
     s->is_jmp = DISAS_JUMP;
 }
@@ -820,14 +1049,14 @@ static void gen_jmp_im(DisasContext *s, uint32_t dest)
 /* Generate a jump to the address in qreg DEST.  */
 static void gen_jmp(DisasContext *s, TCGv dest)
 {
-    gen_flush_cc_op(s);
+    update_cc_op(s);
     tcg_gen_mov_i32(QREG_PC, dest);
     s->is_jmp = DISAS_JUMP;
 }
 
 static void gen_exception(DisasContext *s, uint32_t where, int nr)
 {
-    gen_flush_cc_op(s);
+    update_cc_op(s);
     gen_jmp_im(s, where);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(nr));
 }
@@ -880,6 +1109,48 @@ static void gen_jmp_tb(DisasContext *s, int n, uint32_t dest)
     s->is_jmp = DISAS_TB_JUMP;
 }
 
+DISAS_INSN(scc)
+{
+    DisasCompare c;
+    int cond;
+    TCGv tmp;
+
+    cond = (insn >> 8) & 0xf;
+    gen_cc_cond(&c, s, cond);
+
+    tmp = tcg_temp_new();
+    tcg_gen_setcond_i32(c.tcond, tmp, c.v1, c.v2);
+    free_cond(&c);
+
+    tcg_gen_neg_i32(tmp, tmp);
+    DEST_EA(env, insn, OS_BYTE, tmp, NULL);
+    tcg_temp_free(tmp);
+}
+
+DISAS_INSN(dbcc)
+{
+    TCGLabel *l1;
+    TCGv reg;
+    TCGv tmp;
+    int16_t offset;
+    uint32_t base;
+
+    reg = DREG(insn, 0);
+    base = s->pc;
+    offset = (int16_t)read_im16(env, s);
+    l1 = gen_new_label();
+    gen_jmpcc(s, (insn >> 8) & 0xf, l1);
+
+    tmp = tcg_temp_new();
+    tcg_gen_ext16s_i32(tmp, reg);
+    tcg_gen_addi_i32(tmp, tmp, -1);
+    gen_partset_reg(OS_WORD, reg, tmp);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, -1, l1);
+    gen_jmp_tb(s, 1, base + offset);
+    gen_set_label(l1);
+    gen_jmp_tb(s, 0, s->pc);
+}
+
 DISAS_INSN(undef_mac)
 {
     gen_exception(s, s->pc - 2, EXCP_LINEA);
@@ -915,8 +1186,7 @@ DISAS_INSN(mulw)
     SRC_EA(env, src, OS_WORD, sign, NULL);
     tcg_gen_mul_i32(tmp, tmp, src);
     tcg_gen_mov_i32(reg, tmp);
-    /* Unlike m68k, coldfire always clears the overflow bit.  */
-    gen_logic_cc(s, tmp);
+    gen_logic_cc(s, tmp, OS_LONG);
 }
 
 DISAS_INSN(divw)
@@ -946,7 +1216,8 @@ DISAS_INSN(divw)
     tcg_gen_ext16u_i32(tmp, QREG_DIV1);
     tcg_gen_shli_i32(src, QREG_DIV2, 16);
     tcg_gen_or_i32(reg, tmp, src);
-    s->cc_op = CC_OP_FLAGS;
+
+    set_cc_op(s, CC_OP_FLAGS);
 }
 
 DISAS_INSN(divl)
@@ -956,8 +1227,7 @@ DISAS_INSN(divl)
     TCGv reg;
     uint16_t ext;
 
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
     if (ext & 0x87f8) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
         return;
@@ -979,7 +1249,7 @@ DISAS_INSN(divl)
         /* rem */
         tcg_gen_mov_i32 (reg, QREG_DIV2);
     }
-    s->cc_op = CC_OP_FLAGS;
+    set_cc_op(s, CC_OP_FLAGS);
 }
 
 DISAS_INSN(addsub)
@@ -990,35 +1260,37 @@ DISAS_INSN(addsub)
     TCGv tmp;
     TCGv addr;
     int add;
+    int opsize;
 
     add = (insn & 0x4000) != 0;
-    reg = DREG(insn, 9);
+    opsize = insn_opsize(insn);
+    reg = gen_extend(DREG(insn, 9), opsize, 1);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(env, tmp, OS_LONG, 0, &addr);
+        SRC_EA(env, tmp, opsize, 1, &addr);
         src = reg;
     } else {
         tmp = reg;
-        SRC_EA(env, src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, opsize, 1, NULL);
     }
     if (add) {
         tcg_gen_add_i32(dest, tmp, src);
-        gen_helper_xflag_lt(QREG_CC_X, dest, src);
-        s->cc_op = CC_OP_ADD;
+        tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, src);
+        set_cc_op(s, CC_OP_ADDB + opsize);
     } else {
-        gen_helper_xflag_lt(QREG_CC_X, tmp, src);
+        tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, tmp, src);
         tcg_gen_sub_i32(dest, tmp, src);
-        s->cc_op = CC_OP_SUB;
+        set_cc_op(s, CC_OP_SUBB + opsize);
     }
-    gen_update_cc_add(dest, src);
+    gen_update_cc_add(dest, src, opsize);
     if (insn & 0x100) {
-        DEST_EA(env, insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
     } else {
-        tcg_gen_mov_i32(reg, dest);
+        gen_partset_reg(opsize, DREG(insn, 9), dest);
     }
+    tcg_temp_free(dest);
 }
 
-
 /* Reverse the order of the bits in REG.  */
 DISAS_INSN(bitrev)
 {
@@ -1043,32 +1315,27 @@ DISAS_INSN(bitop_reg)
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
     SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
-    src2 = DREG(insn, 9);
-    dest = tcg_temp_new();
 
     gen_flush_flags(s);
-    tmp = tcg_temp_new();
+    src2 = tcg_temp_new();
     if (opsize == OS_BYTE)
-        tcg_gen_andi_i32(tmp, src2, 7);
+        tcg_gen_andi_i32(src2, DREG(insn, 9), 7);
     else
-        tcg_gen_andi_i32(tmp, src2, 31);
-    src2 = tmp;
-    tmp = tcg_temp_new();
-    tcg_gen_shr_i32(tmp, src1, src2);
-    tcg_gen_andi_i32(tmp, tmp, 1);
-    tcg_gen_shli_i32(tmp, tmp, 2);
-    /* Clear CCF_Z if bit set.  */
-    tcg_gen_ori_i32(QREG_CC_DEST, QREG_CC_DEST, CCF_Z);
-    tcg_gen_xor_i32(QREG_CC_DEST, QREG_CC_DEST, tmp);
-
-    tcg_gen_shl_i32(tmp, tcg_const_i32(1), src2);
+        tcg_gen_andi_i32(src2, DREG(insn, 9), 31);
+
+    tmp = tcg_const_i32(1);
+    tcg_gen_shl_i32(tmp, tmp, src2);
+    tcg_temp_free(src2);
+
+    tcg_gen_and_i32(QREG_CC_Z, src1, tmp);
+
+    dest = tcg_temp_new();
     switch (op) {
     case 1: /* bchg */
         tcg_gen_xor_i32(dest, src1, tmp);
         break;
     case 2: /* bclr */
-        tcg_gen_not_i32(tmp, tmp);
-        tcg_gen_and_i32(dest, src1, tmp);
+        tcg_gen_andc_i32(dest, src1, tmp);
         break;
     case 3: /* bset */
         tcg_gen_or_i32(dest, src1, tmp);
@@ -1076,8 +1343,11 @@ DISAS_INSN(bitop_reg)
     default: /* btst */
         break;
     }
-    if (op)
+    tcg_temp_free(tmp);
+    if (op) {
         DEST_EA(env, insn, opsize, dest, &addr);
+    }
+    tcg_temp_free(dest);
 }
 
 DISAS_INSN(sats)
@@ -1085,8 +1355,8 @@ DISAS_INSN(sats)
     TCGv reg;
     reg = DREG(insn, 0);
     gen_flush_flags(s);
-    gen_helper_sats(reg, reg, QREG_CC_DEST);
-    gen_logic_cc(s, reg);
+    gen_helper_sats(reg, reg, QREG_CC_V);
+    gen_logic_cc(s, reg, OS_LONG);
 }
 
 static void gen_push(DisasContext *s, TCGv val)
@@ -1108,8 +1378,7 @@ DISAS_INSN(movem)
     TCGv tmp;
     int is_load;
 
-    mask = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    mask = read_im16(env, s);
     tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
@@ -1152,8 +1421,7 @@ DISAS_INSN(bitop_im)
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
 
-    bitnum = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    bitnum = read_im16(env, s);
     if (bitnum & 0xff00) {
         disas_undef(env, s, insn);
         return;
@@ -1168,19 +1436,10 @@ DISAS_INSN(bitop_im)
         bitnum &= 31;
     mask = 1 << bitnum;
 
-    tmp = tcg_temp_new();
-    assert (CCF_Z == (1 << 2));
-    if (bitnum > 2)
-        tcg_gen_shri_i32(tmp, src1, bitnum - 2);
-    else if (bitnum < 2)
-        tcg_gen_shli_i32(tmp, src1, 2 - bitnum);
-    else
-        tcg_gen_mov_i32(tmp, src1);
-    tcg_gen_andi_i32(tmp, tmp, CCF_Z);
-    /* Clear CCF_Z if bit set.  */
-    tcg_gen_ori_i32(QREG_CC_DEST, QREG_CC_DEST, CCF_Z);
-    tcg_gen_xor_i32(QREG_CC_DEST, QREG_CC_DEST, tmp);
+   tcg_gen_andi_i32(QREG_CC_Z, src1, mask);
+
     if (op) {
+        tmp = tcg_temp_new();
         switch (op) {
         case 1: /* bchg */
             tcg_gen_xori_i32(tmp, src1, mask);
@@ -1195,60 +1454,72 @@ DISAS_INSN(bitop_im)
             break;
         }
         DEST_EA(env, insn, opsize, tmp, &addr);
+        tcg_temp_free(tmp);
     }
 }
 
 DISAS_INSN(arith_im)
 {
     int op;
-    uint32_t im;
+    TCGv im;
     TCGv src1;
     TCGv dest;
     TCGv addr;
+    int opsize;
 
     op = (insn >> 9) & 7;
-    SRC_EA(env, src1, OS_LONG, 0, (op == 6) ? NULL : &addr);
-    im = read_im32(env, s);
+    opsize = insn_opsize(insn);
+    switch (opsize) {
+    case OS_BYTE:
+        im = tcg_const_i32((int8_t)read_im8(env, s));
+        break;
+    case OS_WORD:
+        im = tcg_const_i32((int16_t)read_im16(env, s));
+        break;
+    case OS_LONG:
+        im = tcg_const_i32(read_im32(env, s));
+        break;
+    default:
+       abort();
+    }
+    SRC_EA(env, src1, opsize, 1, (op == 6) ? NULL : &addr);
     dest = tcg_temp_new();
     switch (op) {
     case 0: /* ori */
-        tcg_gen_ori_i32(dest, src1, im);
-        gen_logic_cc(s, dest);
+        tcg_gen_or_i32(dest, src1, im);
+        gen_logic_cc(s, dest, opsize);
         break;
     case 1: /* andi */
-        tcg_gen_andi_i32(dest, src1, im);
-        gen_logic_cc(s, dest);
+        tcg_gen_and_i32(dest, src1, im);
+        gen_logic_cc(s, dest, opsize);
         break;
     case 2: /* subi */
-        tcg_gen_mov_i32(dest, src1);
-        gen_helper_xflag_lt(QREG_CC_X, dest, tcg_const_i32(im));
-        tcg_gen_subi_i32(dest, dest, im);
-        gen_update_cc_add(dest, tcg_const_i32(im));
-        s->cc_op = CC_OP_SUB;
+        tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, src1, im);
+        tcg_gen_sub_i32(dest, src1, im);
+        gen_update_cc_add(dest, im, opsize);
+        set_cc_op(s, CC_OP_SUBB + opsize);
         break;
     case 3: /* addi */
-        tcg_gen_mov_i32(dest, src1);
-        tcg_gen_addi_i32(dest, dest, im);
-        gen_update_cc_add(dest, tcg_const_i32(im));
-        gen_helper_xflag_lt(QREG_CC_X, dest, tcg_const_i32(im));
-        s->cc_op = CC_OP_ADD;
+        tcg_gen_add_i32(dest, src1, im);
+        gen_update_cc_add(dest, im, opsize);
+        tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, im);
+        set_cc_op(s, CC_OP_ADDB + opsize);
         break;
     case 5: /* eori */
-        tcg_gen_xori_i32(dest, src1, im);
-        gen_logic_cc(s, dest);
+        tcg_gen_xor_i32(dest, src1, im);
+        gen_logic_cc(s, dest, opsize);
         break;
     case 6: /* cmpi */
-        tcg_gen_mov_i32(dest, src1);
-        tcg_gen_subi_i32(dest, dest, im);
-        gen_update_cc_add(dest, tcg_const_i32(im));
-        s->cc_op = CC_OP_SUB;
+        gen_update_cc_cmp(s, src1, im, opsize);
         break;
     default:
         abort();
     }
+    tcg_temp_free(im);
     if (op != 6) {
-        DEST_EA(env, insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
     }
+    tcg_temp_free(dest);
 }
 
 DISAS_INSN(byterev)
@@ -1292,17 +1563,50 @@ DISAS_INSN(move)
         dest_ea = ((insn >> 9) & 7) | (op << 3);
         DEST_EA(env, dest_ea, opsize, src, NULL);
         /* This will be correct because loads sign extend.  */
-        gen_logic_cc(s, src);
+        gen_logic_cc(s, src, opsize);
     }
 }
 
 DISAS_INSN(negx)
 {
-    TCGv reg;
+    TCGv z;
+    TCGv src;
+    TCGv addr;
+    int opsize;
 
-    gen_flush_flags(s);
-    reg = DREG(insn, 0);
-    gen_helper_subx_cc(reg, cpu_env, tcg_const_i32(0), reg);
+    opsize = insn_opsize(insn);
+    SRC_EA(env, src, opsize, 1, &addr);
+
+    gen_flush_flags(s); /* compute old Z */
+
+    /* Perform substract with borrow.
+     * (X, N) =  -(src + X);
+     */
+
+    z = tcg_const_i32(0);
+    tcg_gen_add2_i32(QREG_CC_N, QREG_CC_X, src, z, QREG_CC_X, z);
+    tcg_gen_sub2_i32(QREG_CC_N, QREG_CC_X, z, z, QREG_CC_N, QREG_CC_X);
+    tcg_temp_free(z);
+    gen_ext(QREG_CC_N, QREG_CC_N, opsize, 1);
+
+    tcg_gen_andi_i32(QREG_CC_X, QREG_CC_X, 1);
+
+    /* Compute signed-overflow for negation.  The normal formula for
+     * subtraction is (res ^ src) & (src ^ dest), but with dest==0
+     * this simplies to res & src.
+     */
+
+    tcg_gen_and_i32(QREG_CC_V, QREG_CC_N, src);
+
+    /* Copy the rest of the results into place.  */
+    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_N); /* !Z is sticky */
+    tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X);
+
+    set_cc_op(s, CC_OP_FLAGS);
+
+    /* result is in QREG_CC_N */
+
+    DEST_EA(env, insn, opsize, QREG_CC_N, &addr);
 }
 
 DISAS_INSN(lea)
@@ -1323,21 +1627,9 @@ DISAS_INSN(clr)
 {
     int opsize;
 
-    switch ((insn >> 6) & 3) {
-    case 0: /* clr.b */
-        opsize = OS_BYTE;
-        break;
-    case 1: /* clr.w */
-        opsize = OS_WORD;
-        break;
-    case 2: /* clr.l */
-        opsize = OS_LONG;
-        break;
-    default:
-        abort();
-    }
+    opsize = insn_opsize(insn);
     DEST_EA(env, insn, opsize, tcg_const_i32(0), NULL);
-    gen_logic_cc(s, tcg_const_i32(0));
+    gen_logic_cc(s, tcg_const_i32(0), opsize);
 }
 
 static TCGv gen_get_ccr(DisasContext *s)
@@ -1345,75 +1637,72 @@ static TCGv gen_get_ccr(DisasContext *s)
     TCGv dest;
 
     gen_flush_flags(s);
+    update_cc_op(s);
     dest = tcg_temp_new();
-    tcg_gen_shli_i32(dest, QREG_CC_X, 4);
-    tcg_gen_or_i32(dest, dest, QREG_CC_DEST);
+    gen_helper_get_ccr(dest, cpu_env);
     return dest;
 }
 
 DISAS_INSN(move_from_ccr)
 {
-    TCGv reg;
     TCGv ccr;
 
     ccr = gen_get_ccr(s);
-    reg = DREG(insn, 0);
-    gen_partset_reg(OS_WORD, reg, ccr);
+    DEST_EA(env, insn, OS_WORD, ccr, NULL);
 }
 
 DISAS_INSN(neg)
 {
-    TCGv reg;
     TCGv src1;
+    TCGv dest;
+    TCGv addr;
+    int opsize;
 
-    reg = DREG(insn, 0);
-    src1 = tcg_temp_new();
-    tcg_gen_mov_i32(src1, reg);
-    tcg_gen_neg_i32(reg, src1);
-    s->cc_op = CC_OP_SUB;
-    gen_update_cc_add(reg, src1);
-    gen_helper_xflag_lt(QREG_CC_X, tcg_const_i32(0), src1);
-    s->cc_op = CC_OP_SUB;
+    opsize = insn_opsize(insn);
+    SRC_EA(env, src1, opsize, 1, &addr);
+    dest = tcg_temp_new();
+    tcg_gen_neg_i32(dest, src1);
+    set_cc_op(s, CC_OP_SUBB + opsize);
+    gen_update_cc_add(dest, src1, opsize);
+    tcg_gen_setcondi_i32(TCG_COND_NE, QREG_CC_X, dest, 0);
+    DEST_EA(env, insn, opsize, dest, &addr);
+    tcg_temp_free(dest);
 }
 
 static void gen_set_sr_im(DisasContext *s, uint16_t val, int ccr_only)
 {
-    tcg_gen_movi_i32(QREG_CC_DEST, val & 0xf);
-    tcg_gen_movi_i32(QREG_CC_X, (val & 0x10) >> 4);
-    if (!ccr_only) {
-        gen_helper_set_sr(cpu_env, tcg_const_i32(val & 0xff00));
+    if (ccr_only) {
+        tcg_gen_movi_i32(QREG_CC_C, val & CCF_C ? 1 : 0);
+        tcg_gen_movi_i32(QREG_CC_V, val & CCF_V ? -1 : 0);
+        tcg_gen_movi_i32(QREG_CC_Z, val & CCF_Z ? 0 : 1);
+        tcg_gen_movi_i32(QREG_CC_N, val & CCF_N ? -1 : 0);
+        tcg_gen_movi_i32(QREG_CC_X, val & CCF_X ? 1 : 0);
+    } else {
+        gen_helper_set_sr(cpu_env, tcg_const_i32(val));
     }
+    set_cc_op(s, CC_OP_FLAGS);
 }
 
 static void gen_set_sr(CPUM68KState *env, DisasContext *s, uint16_t insn,
                        int ccr_only)
 {
-    TCGv tmp;
-    TCGv reg;
-
-    s->cc_op = CC_OP_FLAGS;
-    if ((insn & 0x38) == 0)
-      {
-        tmp = tcg_temp_new();
-        reg = DREG(insn, 0);
-        tcg_gen_andi_i32(QREG_CC_DEST, reg, 0xf);
-        tcg_gen_shri_i32(tmp, reg, 4);
-        tcg_gen_andi_i32(QREG_CC_X, tmp, 1);
-        if (!ccr_only) {
-            gen_helper_set_sr(cpu_env, reg);
+    if ((insn & 0x38) == 0) {
+        if (ccr_only) {
+            gen_helper_set_ccr(cpu_env, DREG(insn, 0));
+        } else {
+            gen_helper_set_sr(cpu_env, DREG(insn, 0));
         }
-      }
-    else if ((insn & 0x3f) == 0x3c)
-      {
+        set_cc_op(s, CC_OP_FLAGS);
+    } else if ((insn & 0x3f) == 0x3c) {
         uint16_t val;
-        val = cpu_lduw_code(env, s->pc);
-        s->pc += 2;
+        val = read_im16(env, s);
         gen_set_sr_im(s, val, ccr_only);
-      }
-    else
+    } else {
         disas_undef(env, s, insn);
+    }
 }
 
+
 DISAS_INSN(move_to_ccr)
 {
     gen_set_sr(env, s, insn, 1);
@@ -1421,11 +1710,17 @@ DISAS_INSN(move_to_ccr)
 
 DISAS_INSN(not)
 {
-    TCGv reg;
+    TCGv src1;
+    TCGv dest;
+    TCGv addr;
+    int opsize;
 
-    reg = DREG(insn, 0);
-    tcg_gen_not_i32(reg, reg);
-    gen_logic_cc(s, reg);
+    opsize = insn_opsize(insn);
+    SRC_EA(env, src1, opsize, 1, &addr);
+    dest = tcg_temp_new();
+    tcg_gen_not_i32(dest, src1);
+    DEST_EA(env, insn, opsize, dest, &addr);
+    gen_logic_cc(s, dest, opsize);
 }
 
 DISAS_INSN(swap)
@@ -1440,7 +1735,12 @@ DISAS_INSN(swap)
     tcg_gen_shli_i32(src1, reg, 16);
     tcg_gen_shri_i32(src2, reg, 16);
     tcg_gen_or_i32(reg, src1, src2);
-    gen_logic_cc(s, reg);
+    gen_logic_cc(s, reg, OS_LONG);
+}
+
+DISAS_INSN(bkpt)
+{
+    gen_exception(s, s->pc - 2, EXCP_DEBUG);
 }
 
 DISAS_INSN(pea)
@@ -1472,7 +1772,7 @@ DISAS_INSN(ext)
         gen_partset_reg(OS_WORD, reg, tmp);
     else
         tcg_gen_mov_i32(reg, tmp);
-    gen_logic_cc(s, tmp);
+    gen_logic_cc(s, tmp, OS_LONG);
 }
 
 DISAS_INSN(tst)
@@ -1480,21 +1780,9 @@ DISAS_INSN(tst)
     int opsize;
     TCGv tmp;
 
-    switch ((insn >> 6) & 3) {
-    case 0: /* tst.b */
-        opsize = OS_BYTE;
-        break;
-    case 1: /* tst.w */
-        opsize = OS_WORD;
-        break;
-    case 2: /* tst.l */
-        opsize = OS_LONG;
-        break;
-    default:
-        abort();
-    }
+    opsize = insn_opsize(insn);
     SRC_EA(env, tmp, opsize, 1, NULL);
-    gen_logic_cc(s, tmp);
+    gen_logic_cc(s, tmp, opsize);
 }
 
 DISAS_INSN(pulse)
@@ -1516,7 +1804,7 @@ DISAS_INSN(tas)
 
     dest = tcg_temp_new();
     SRC_EA(env, src1, OS_BYTE, 1, &addr);
-    gen_logic_cc(s, src1);
+    gen_logic_cc(s, src1, OS_BYTE);
     tcg_gen_ori_i32(dest, src1, 0x80);
     DEST_EA(env, insn, OS_BYTE, dest, &addr);
 }
@@ -1530,8 +1818,7 @@ DISAS_INSN(mull)
 
     /* The upper 32 bits of the product are discarded, so
        muls.l and mulu.l are functionally equivalent.  */
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
     if (ext & 0x87ff) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
         return;
@@ -1542,24 +1829,39 @@ DISAS_INSN(mull)
     tcg_gen_mul_i32(dest, src1, reg);
     tcg_gen_mov_i32(reg, dest);
     /* Unlike m68k, coldfire always clears the overflow bit.  */
-    gen_logic_cc(s, dest);
+    gen_logic_cc(s, dest, OS_LONG);
 }
 
-DISAS_INSN(link)
+static void gen_link(DisasContext *s, uint16_t insn, int32_t offset)
 {
-    int16_t offset;
     TCGv reg;
     TCGv tmp;
 
-    offset = cpu_ldsw_code(env, s->pc);
-    s->pc += 2;
     reg = AREG(insn, 0);
     tmp = tcg_temp_new();
     tcg_gen_subi_i32(tmp, QREG_SP, 4);
     gen_store(s, OS_LONG, tmp, reg);
-    if ((insn & 7) != 7)
+    if ((insn & 7) != 7) {
         tcg_gen_mov_i32(reg, tmp);
+    }
     tcg_gen_addi_i32(QREG_SP, tmp, offset);
+    tcg_temp_free(tmp);
+}
+
+DISAS_INSN(link)
+{
+    int16_t offset;
+
+    offset = read_im16(env, s);
+    gen_link(s, insn, offset);
+}
+
+DISAS_INSN(linkl)
+{
+    int32_t offset;
+
+    offset = read_im32(env, s);
+    gen_link(s, insn, offset);
 }
 
 DISAS_INSN(unlk)
@@ -1609,40 +1911,48 @@ DISAS_INSN(jump)
 
 DISAS_INSN(addsubq)
 {
-    TCGv src1;
-    TCGv src2;
+    TCGv src;
     TCGv dest;
-    int val;
+    TCGv val;
+    int imm;
     TCGv addr;
+    int opsize;
 
-    SRC_EA(env, src1, OS_LONG, 0, &addr);
-    val = (insn >> 9) & 7;
-    if (val == 0)
-        val = 8;
+    if ((insn & 070) == 010) {
+        /* Operation on address register is always long.  */
+        opsize = OS_LONG;
+    } else {
+        opsize = insn_opsize(insn);
+    }
+    SRC_EA(env, src, opsize, 1, &addr);
+    imm = (insn >> 9) & 7;
+    if (imm == 0) {
+        imm = 8;
+    }
+    val = tcg_const_i32(imm);
     dest = tcg_temp_new();
-    tcg_gen_mov_i32(dest, src1);
+    tcg_gen_mov_i32(dest, src);
     if ((insn & 0x38) == 0x08) {
         /* Don't update condition codes if the destination is an
            address register.  */
         if (insn & 0x0100) {
-            tcg_gen_subi_i32(dest, dest, val);
+            tcg_gen_sub_i32(dest, dest, val);
         } else {
-            tcg_gen_addi_i32(dest, dest, val);
+            tcg_gen_add_i32(dest, dest, val);
         }
     } else {
-        src2 = tcg_const_i32(val);
         if (insn & 0x0100) {
-            gen_helper_xflag_lt(QREG_CC_X, dest, src2);
-            tcg_gen_subi_i32(dest, dest, val);
-            s->cc_op = CC_OP_SUB;
+            tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, val);
+            tcg_gen_sub_i32(dest, dest, val);
+            set_cc_op(s, CC_OP_SUBB + opsize);
         } else {
-            tcg_gen_addi_i32(dest, dest, val);
-            gen_helper_xflag_lt(QREG_CC_X, dest, src2);
-            s->cc_op = CC_OP_ADD;
+            tcg_gen_add_i32(dest, dest, val);
+            tcg_gen_setcond_i32(TCG_COND_LTU, QREG_CC_X, dest, val);
+            set_cc_op(s, CC_OP_ADDB + opsize);
         }
-        gen_update_cc_add(dest, src2);
+        gen_update_cc_add(dest, val, opsize);
     }
-    DEST_EA(env, insn, OS_LONG, dest, &addr);
+    DEST_EA(env, insn, opsize, dest, &addr);
 }
 
 DISAS_INSN(tpf)
@@ -1672,8 +1982,7 @@ DISAS_INSN(branch)
     op = (insn >> 8) & 0xf;
     offset = (int8_t)insn;
     if (offset == 0) {
-        offset = cpu_ldsw_code(env, s->pc);
-        s->pc += 2;
+        offset = (int16_t)read_im16(env, s);
     } else if (offset == -1) {
         offset = read_im32(env, s);
     }
@@ -1681,7 +1990,6 @@ DISAS_INSN(branch)
         /* bsr */
         gen_push(s, tcg_const_i32(s->pc));
     }
-    gen_flush_cc_op(s);
     if (op > 1) {
         /* Bcc */
         l1 = gen_new_label();
@@ -1701,7 +2009,7 @@ DISAS_INSN(moveq)
 
     val = (int8_t)insn;
     tcg_gen_movi_i32(DREG(insn, 9), val);
-    gen_logic_cc(s, tcg_const_i32(val));
+    gen_logic_cc(s, tcg_const_i32(val), OS_LONG);
 }
 
 DISAS_INSN(mvzs)
@@ -1717,7 +2025,7 @@ DISAS_INSN(mvzs)
     SRC_EA(env, src, opsize, (insn & 0x80) == 0, NULL);
     reg = DREG(insn, 9);
     tcg_gen_mov_i32(reg, src);
-    gen_logic_cc(s, src);
+    gen_logic_cc(s, src, opsize);
 }
 
 DISAS_INSN(or)
@@ -1726,19 +2034,21 @@ DISAS_INSN(or)
     TCGv dest;
     TCGv src;
     TCGv addr;
+    int opsize;
 
-    reg = DREG(insn, 9);
+    opsize = insn_opsize(insn);
+    reg = gen_extend(DREG(insn, 9), opsize, 0);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(env, src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, opsize, 0, &addr);
         tcg_gen_or_i32(dest, src, reg);
-        DEST_EA(env, insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
     } else {
-        SRC_EA(env, src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, opsize, 0, NULL);
         tcg_gen_or_i32(dest, src, reg);
-        tcg_gen_mov_i32(reg, dest);
+        gen_partset_reg(opsize, DREG(insn, 9), dest);
     }
-    gen_logic_cc(s, dest);
+    gen_logic_cc(s, dest, opsize);
 }
 
 DISAS_INSN(suba)
@@ -1746,20 +2056,80 @@ DISAS_INSN(suba)
     TCGv src;
     TCGv reg;
 
-    SRC_EA(env, src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, (insn & 0x100) ? OS_LONG : OS_WORD, 1, NULL);
     reg = AREG(insn, 9);
     tcg_gen_sub_i32(reg, reg, src);
 }
 
-DISAS_INSN(subx)
+static inline void gen_subx(DisasContext *s, TCGv src, TCGv dest, int opsize)
 {
-    TCGv reg;
+    TCGv tmp;
+
+    gen_flush_flags(s); /* compute old Z */
+
+    /* Perform substract with borrow.
+     * (X, N) = dest - (src + X);
+     */
+
+    tmp = tcg_const_i32(0);
+    tcg_gen_add2_i32(QREG_CC_N, QREG_CC_X, src, tmp, QREG_CC_X, tmp);
+    tcg_gen_sub2_i32(QREG_CC_N, QREG_CC_X, dest, tmp, QREG_CC_N, QREG_CC_X);
+    gen_ext(QREG_CC_N, QREG_CC_N, opsize, 1);
+    tcg_gen_andi_i32(QREG_CC_X, QREG_CC_X, 1);
+
+    /* Compute signed-overflow for substract.  */
+
+    tcg_gen_xor_i32(QREG_CC_V, QREG_CC_N, dest);
+    tcg_gen_xor_i32(tmp, dest, src);
+    tcg_gen_and_i32(QREG_CC_V, QREG_CC_V, tmp);
+    tcg_temp_free(tmp);
+
+    /* Copy the rest of the results into place.  */
+    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_N); /* !Z is sticky */
+    tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X);
+
+    set_cc_op(s, CC_OP_FLAGS);
+
+    /* result is in QREG_CC_N */
+}
+
+DISAS_INSN(subx_reg)
+{
+    TCGv dest;
     TCGv src;
+    int opsize;
 
-    gen_flush_flags(s);
-    reg = DREG(insn, 9);
-    src = DREG(insn, 0);
-    gen_helper_subx_cc(reg, cpu_env, reg, src);
+    opsize = insn_opsize(insn);
+
+    src = gen_extend(DREG(insn, 0), opsize, 1);
+    dest = gen_extend(DREG(insn, 9), opsize, 1);
+
+    gen_subx(s, src, dest, opsize);
+
+    gen_partset_reg(opsize, DREG(insn, 9), QREG_CC_N);
+}
+
+DISAS_INSN(subx_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+    int opsize;
+
+    opsize = insn_opsize(insn);
+
+    addr_src = AREG(insn, 0);
+    tcg_gen_subi_i32(addr_src, addr_src, opsize);
+    src = gen_load(s, opsize, addr_src, 1);
+
+    addr_dest = AREG(insn, 9);
+    tcg_gen_subi_i32(addr_dest, addr_dest, opsize);
+    dest = gen_load(s, opsize, addr_dest, 1);
+
+    gen_subx(s, src, dest, opsize);
+
+    gen_store(s, opsize, addr_dest, QREG_CC_N);
 }
 
 DISAS_INSN(mov3q)
@@ -1771,40 +2141,20 @@ DISAS_INSN(mov3q)
     if (val == 0)
         val = -1;
     src = tcg_const_i32(val);
-    gen_logic_cc(s, src);
+    gen_logic_cc(s, src, OS_LONG);
     DEST_EA(env, insn, OS_LONG, src, NULL);
 }
 
 DISAS_INSN(cmp)
 {
-    int op;
     TCGv src;
     TCGv reg;
-    TCGv dest;
     int opsize;
 
-    op = (insn >> 6) & 3;
-    switch (op) {
-    case 0: /* cmp.b */
-        opsize = OS_BYTE;
-        s->cc_op = CC_OP_CMPB;
-        break;
-    case 1: /* cmp.w */
-        opsize = OS_WORD;
-        s->cc_op = CC_OP_CMPW;
-        break;
-    case 2: /* cmp.l */
-        opsize = OS_LONG;
-        s->cc_op = CC_OP_SUB;
-        break;
-    default:
-        abort();
-    }
+    opsize = insn_opsize(insn);
     SRC_EA(env, src, opsize, 1, NULL);
-    reg = DREG(insn, 9);
-    dest = tcg_temp_new();
-    tcg_gen_sub_i32(dest, reg, src);
-    gen_update_cc_add(dest, src);
+    reg = gen_extend(DREG(insn, 9), opsize, 1);
+    gen_update_cc_cmp(s, reg, src, opsize);
 }
 
 DISAS_INSN(cmpa)
@@ -1812,7 +2162,6 @@ DISAS_INSN(cmpa)
     int opsize;
     TCGv src;
     TCGv reg;
-    TCGv dest;
 
     if (insn & 0x100) {
         opsize = OS_LONG;
@@ -1821,25 +2170,50 @@ DISAS_INSN(cmpa)
     }
     SRC_EA(env, src, opsize, 1, NULL);
     reg = AREG(insn, 9);
-    dest = tcg_temp_new();
-    tcg_gen_sub_i32(dest, reg, src);
-    gen_update_cc_add(dest, src);
-    s->cc_op = CC_OP_SUB;
+    gen_update_cc_cmp(s, reg, src, OS_LONG);
 }
 
 DISAS_INSN(eor)
 {
     TCGv src;
-    TCGv reg;
     TCGv dest;
     TCGv addr;
+    int opsize;
 
-    SRC_EA(env, src, OS_LONG, 0, &addr);
-    reg = DREG(insn, 9);
+    opsize = insn_opsize(insn);
+
+    SRC_EA(env, src, opsize, 0, &addr);
     dest = tcg_temp_new();
-    tcg_gen_xor_i32(dest, src, reg);
-    gen_logic_cc(s, dest);
-    DEST_EA(env, insn, OS_LONG, dest, &addr);
+    tcg_gen_xor_i32(dest, src, DREG(insn, 9));
+    gen_logic_cc(s, dest, opsize);
+    DEST_EA(env, insn, opsize, dest, &addr);
+}
+
+static void do_exg(TCGv reg1, TCGv reg2)
+{
+    TCGv temp = tcg_temp_new();
+    tcg_gen_mov_i32(temp, reg1);
+    tcg_gen_mov_i32(reg1, reg2);
+    tcg_gen_mov_i32(reg2, temp);
+    tcg_temp_free(temp);
+}
+
+DISAS_INSN(exg_dd)
+{
+    /* exchange Dx and Dy */
+    do_exg(DREG(insn, 9), DREG(insn, 0));
+}
+
+DISAS_INSN(exg_aa)
+{
+    /* exchange Ax and Ay */
+    do_exg(AREG(insn, 9), AREG(insn, 0));
+}
+
+DISAS_INSN(exg_da)
+{
+    /* exchange Dx and Ay */
+    do_exg(DREG(insn, 9), AREG(insn, 0));
 }
 
 DISAS_INSN(and)
@@ -1848,19 +2222,23 @@ DISAS_INSN(and)
     TCGv reg;
     TCGv dest;
     TCGv addr;
+    int opsize;
 
-    reg = DREG(insn, 9);
     dest = tcg_temp_new();
+
+    opsize = insn_opsize(insn);
+    reg = DREG(insn, 9);
     if (insn & 0x100) {
-        SRC_EA(env, src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, opsize, 0, &addr);
         tcg_gen_and_i32(dest, src, reg);
-        DEST_EA(env, insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
     } else {
-        SRC_EA(env, src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, opsize, 0, NULL);
         tcg_gen_and_i32(dest, src, reg);
-        tcg_gen_mov_i32(reg, dest);
+        gen_partset_reg(opsize, reg, dest);
     }
-    gen_logic_cc(s, dest);
+    tcg_temp_free(dest);
+    gen_logic_cc(s, dest, opsize);
 }
 
 DISAS_INSN(adda)
@@ -1868,21 +2246,79 @@ DISAS_INSN(adda)
     TCGv src;
     TCGv reg;
 
-    SRC_EA(env, src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, (insn & 0x100) ? OS_LONG : OS_WORD, 1, NULL);
     reg = AREG(insn, 9);
     tcg_gen_add_i32(reg, reg, src);
 }
 
-DISAS_INSN(addx)
+static inline void gen_addx(DisasContext *s, TCGv src, TCGv dest, int opsize)
 {
-    TCGv reg;
+    TCGv tmp;
+
+    gen_flush_flags(s); /* compute old Z */
+
+    /* Perform addition with carry.
+     * (X, N) = src + dest + X;
+     */
+
+    tmp = tcg_const_i32(0);
+    tcg_gen_add2_i32(QREG_CC_N, QREG_CC_X, QREG_CC_X, tmp, dest, tmp);
+    tcg_gen_add2_i32(QREG_CC_N, QREG_CC_X, QREG_CC_N, QREG_CC_X, src, tmp);
+    gen_ext(QREG_CC_N, QREG_CC_N, opsize, 1);
+
+    /* Compute signed-overflow for addition.  */
+
+    tcg_gen_xor_i32(QREG_CC_V, QREG_CC_N, src);
+    tcg_gen_xor_i32(tmp, dest, src);
+    tcg_gen_andc_i32(QREG_CC_V, QREG_CC_V, tmp);
+    tcg_temp_free(tmp);
+
+    /* Copy the rest of the results into place.  */
+    tcg_gen_or_i32(QREG_CC_Z, QREG_CC_Z, QREG_CC_N); /* !Z is sticky */
+    tcg_gen_mov_i32(QREG_CC_C, QREG_CC_X);
+
+    set_cc_op(s, CC_OP_FLAGS);
+
+    /* result is in QREG_CC_N */
+}
+
+DISAS_INSN(addx_reg)
+{
+    TCGv dest;
     TCGv src;
+    int opsize;
 
-    gen_flush_flags(s);
-    reg = DREG(insn, 9);
-    src = DREG(insn, 0);
-    gen_helper_addx_cc(reg, cpu_env, reg, src);
-    s->cc_op = CC_OP_FLAGS;
+    opsize = insn_opsize(insn);
+
+    dest = gen_extend(DREG(insn, 9), opsize, 1);
+    src = gen_extend(DREG(insn, 0), opsize, 1);
+
+    gen_addx(s, src, dest, opsize);
+
+    gen_partset_reg(opsize, DREG(insn, 9), QREG_CC_N);
+}
+
+DISAS_INSN(addx_mem)
+{
+    TCGv src;
+    TCGv addr_src;
+    TCGv dest;
+    TCGv addr_dest;
+    int opsize;
+
+    opsize = insn_opsize(insn);
+
+    addr_src = AREG(insn, 0);
+    tcg_gen_subi_i32(addr_src, addr_src, opsize_bytes(opsize));
+    src = gen_load(s, opsize, addr_src, 1);
+
+    addr_dest = AREG(insn, 9);
+    tcg_gen_subi_i32(addr_dest, addr_dest, opsize_bytes(opsize));
+    dest = gen_load(s, opsize, addr_dest, 1);
+
+    gen_addx(s, src, dest, opsize);
+
+    gen_store(s, opsize, addr_dest, QREG_CC_N);
 }
 
 /* TODO: This could be implemented without helper functions.  */
@@ -1892,6 +2328,8 @@ DISAS_INSN(shift_im)
     int tmp;
     TCGv shift;
 
+    set_cc_op(s, CC_OP_FLAGS);
+
     reg = DREG(insn, 0);
     tmp = (insn >> 9) & 7;
     if (tmp == 0)
@@ -1907,7 +2345,6 @@ DISAS_INSN(shift_im)
             gen_helper_sar_cc(reg, cpu_env, reg, shift);
         }
     }
-    s->cc_op = CC_OP_SHIFT;
 }
 
 DISAS_INSN(shift_reg)
@@ -1917,8 +2354,6 @@ DISAS_INSN(shift_reg)
 
     reg = DREG(insn, 0);
     shift = DREG(insn, 9);
-    /* Shift by zero leaves C flag unmodified.   */
-    gen_flush_flags(s);
     if (insn & 0x100) {
         gen_helper_shl_cc(reg, cpu_env, reg, shift);
     } else {
@@ -1928,14 +2363,14 @@ DISAS_INSN(shift_reg)
             gen_helper_sar_cc(reg, cpu_env, reg, shift);
         }
     }
-    s->cc_op = CC_OP_SHIFT;
+    set_cc_op(s, CC_OP_FLAGS);
 }
 
 DISAS_INSN(ff1)
 {
     TCGv reg;
     reg = DREG(insn, 0);
-    gen_logic_cc(s, reg);
+    gen_logic_cc(s, reg, OS_LONG);
     gen_helper_ff1(reg, reg);
 }
 
@@ -1957,14 +2392,12 @@ DISAS_INSN(strldsr)
     uint32_t addr;
 
     addr = s->pc - 2;
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
     if (ext != 0x46FC) {
         gen_exception(s, addr, EXCP_UNSUPPORTED);
         return;
     }
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
     if (IS_USER(s) || (ext & SR_S) == 0) {
         gen_exception(s, addr, EXCP_PRIVILEGE);
         return;
@@ -1975,16 +2408,14 @@ DISAS_INSN(strldsr)
 
 DISAS_INSN(move_from_sr)
 {
-    TCGv reg;
     TCGv sr;
 
-    if (IS_USER(s)) {
+    if (IS_USER(s) && !m68k_feature(env, M68K_FEATURE_M68000)) {
         gen_exception(s, s->pc - 2, EXCP_PRIVILEGE);
         return;
     }
     sr = gen_get_sr(s);
-    reg = DREG(insn, 0);
-    gen_partset_reg(OS_WORD, reg, sr);
+    DEST_EA(env, insn, OS_WORD, sr, NULL);
 }
 
 DISAS_INSN(move_to_sr)
@@ -2031,8 +2462,7 @@ DISAS_INSN(stop)
         return;
     }
 
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
 
     gen_set_sr_im(s, ext, 0);
     tcg_gen_movi_i32(cpu_halted, 1);
@@ -2058,8 +2488,7 @@ DISAS_INSN(movec)
         return;
     }
 
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
 
     if (ext & 0x8000) {
         reg = AREG(ext, 12);
@@ -2125,8 +2554,7 @@ DISAS_INSN(fpu)
     int set_dest;
     int opsize;
 
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
     opmode = ext & 0x7f;
     switch ((ext >> 13) & 7) {
     case 0: case 2:
@@ -2408,8 +2836,7 @@ DISAS_INSN(fbcc)
     offset = cpu_ldsw_code(env, s->pc);
     s->pc += 2;
     if (insn & (1 << 6)) {
-        offset = (offset << 16) | cpu_lduw_code(env, s->pc);
-        s->pc += 2;
+        offset = (offset << 16) | read_im16(env, s);
     }
 
     l1 = gen_new_label();
@@ -2534,8 +2961,7 @@ DISAS_INSN(mac)
         s->done_mac = 1;
     }
 
-    ext = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    ext = read_im16(env, s);
 
     acc = ((insn >> 7) & 1) | ((ext >> 3) & 2);
     dual = ((insn & 0x30) != 0 && (ext & 3) != 0);
@@ -2747,9 +3173,11 @@ DISAS_INSN(from_mext)
 
 DISAS_INSN(macsr_to_ccr)
 {
-    tcg_gen_movi_i32(QREG_CC_X, 0);
-    tcg_gen_andi_i32(QREG_CC_DEST, QREG_MACSR, 0xf);
-    s->cc_op = CC_OP_FLAGS;
+    TCGv tmp = tcg_temp_new();
+    tcg_gen_andi_i32(tmp, QREG_MACSR, 0xf);
+    gen_helper_set_sr(cpu_env, tmp);
+    tcg_temp_free(tmp);
+    set_cc_op(s, CC_OP_FLAGS);
 }
 
 DISAS_INSN(to_mac)
@@ -2841,90 +3269,131 @@ register_opcode (disas_proc proc, uint16_t opcode, uint16_t mask)
    Later insn override earlier ones.  */
 void register_m68k_insns (CPUM68KState *env)
 {
+    /* Build the opcode table only once to avoid
+       multithreading issues. */
+    if (opcode_table[0] != NULL) {
+        return;
+    }
+
+    /* use BASE() for instruction available
+     * for CF_ISA_A and M68000.
+     */
+#define BASE(name, opcode, mask) \
+    register_opcode(disas_##name, 0x##opcode, 0x##mask)
 #define INSN(name, opcode, mask, feature) do { \
     if (m68k_feature(env, M68K_FEATURE_##feature)) \
-        register_opcode(disas_##name, 0x##opcode, 0x##mask); \
+        BASE(name, opcode, mask); \
     } while(0)
-    INSN(undef,     0000, 0000, CF_ISA_A);
+    BASE(undef,     0000, 0000);
     INSN(arith_im,  0080, fff8, CF_ISA_A);
+    INSN(arith_im,  0000, ff00, M68000);
+    INSN(undef,     00c0, ffc0, M68000);
     INSN(bitrev,    00c0, fff8, CF_ISA_APLUSC);
-    INSN(bitop_reg, 0100, f1c0, CF_ISA_A);
-    INSN(bitop_reg, 0140, f1c0, CF_ISA_A);
-    INSN(bitop_reg, 0180, f1c0, CF_ISA_A);
-    INSN(bitop_reg, 01c0, f1c0, CF_ISA_A);
+    BASE(bitop_reg, 0100, f1c0);
+    BASE(bitop_reg, 0140, f1c0);
+    BASE(bitop_reg, 0180, f1c0);
+    BASE(bitop_reg, 01c0, f1c0);
     INSN(arith_im,  0280, fff8, CF_ISA_A);
+    INSN(arith_im,  0200, ff00, M68000);
+    INSN(undef,     02c0, ffc0, M68000);
     INSN(byterev,   02c0, fff8, CF_ISA_APLUSC);
     INSN(arith_im,  0480, fff8, CF_ISA_A);
+    INSN(arith_im,  0400, ff00, M68000);
+    INSN(undef,     04c0, ffc0, M68000);
+    INSN(arith_im,  0600, ff00, M68000);
+    INSN(undef,     06c0, ffc0, M68000);
     INSN(ff1,       04c0, fff8, CF_ISA_APLUSC);
     INSN(arith_im,  0680, fff8, CF_ISA_A);
-    INSN(bitop_im,  0800, ffc0, CF_ISA_A);
-    INSN(bitop_im,  0840, ffc0, CF_ISA_A);
-    INSN(bitop_im,  0880, ffc0, CF_ISA_A);
-    INSN(bitop_im,  08c0, ffc0, CF_ISA_A);
-    INSN(arith_im,  0a80, fff8, CF_ISA_A);
     INSN(arith_im,  0c00, ff38, CF_ISA_A);
-    INSN(move,      1000, f000, CF_ISA_A);
-    INSN(move,      2000, f000, CF_ISA_A);
-    INSN(move,      3000, f000, CF_ISA_A);
+    INSN(arith_im,  0c00, ff00, M68000);
+    BASE(bitop_im,  0800, ffc0);
+    BASE(bitop_im,  0840, ffc0);
+    BASE(bitop_im,  0880, ffc0);
+    BASE(bitop_im,  08c0, ffc0);
+    INSN(arith_im,  0a80, fff8, CF_ISA_A);
+    INSN(arith_im,  0a00, ff00, M68000);
+    BASE(move,      1000, f000);
+    BASE(move,      2000, f000);
+    BASE(move,      3000, f000);
     INSN(strldsr,   40e7, ffff, CF_ISA_APLUSC);
     INSN(negx,      4080, fff8, CF_ISA_A);
+    INSN(negx,      4000, ff00, M68000);
+    INSN(undef,     40c0, ffc0, M68000);
     INSN(move_from_sr, 40c0, fff8, CF_ISA_A);
-    INSN(lea,       41c0, f1c0, CF_ISA_A);
-    INSN(clr,       4200, ff00, CF_ISA_A);
-    INSN(undef,     42c0, ffc0, CF_ISA_A);
+    INSN(move_from_sr, 40c0, ffc0, M68000);
+    BASE(lea,       41c0, f1c0);
+    BASE(clr,       4200, ff00);
+    BASE(undef,     42c0, ffc0);
     INSN(move_from_ccr, 42c0, fff8, CF_ISA_A);
+    INSN(move_from_ccr, 42c0, ffc0, M68000);
     INSN(neg,       4480, fff8, CF_ISA_A);
-    INSN(move_to_ccr, 44c0, ffc0, CF_ISA_A);
+    INSN(neg,       4400, ff00, M68000);
+    INSN(undef,     44c0, ffc0, M68000);
+    BASE(move_to_ccr, 44c0, ffc0);
     INSN(not,       4680, fff8, CF_ISA_A);
+    INSN(not,       4600, ff00, M68000);
+    INSN(undef,     46c0, ffc0, M68000);
     INSN(move_to_sr, 46c0, ffc0, CF_ISA_A);
-    INSN(pea,       4840, ffc0, CF_ISA_A);
-    INSN(swap,      4840, fff8, CF_ISA_A);
-    INSN(movem,     48c0, fbc0, CF_ISA_A);
-    INSN(ext,       4880, fff8, CF_ISA_A);
-    INSN(ext,       48c0, fff8, CF_ISA_A);
-    INSN(ext,       49c0, fff8, CF_ISA_A);
-    INSN(tst,       4a00, ff00, CF_ISA_A);
+    INSN(linkl,     4808, fff8, M68000);
+    BASE(pea,       4840, ffc0);
+    BASE(swap,      4840, fff8);
+    INSN(bkpt,      4848, fff8, BKPT);
+    BASE(movem,     48c0, fbc0);
+    BASE(ext,       4880, fff8);
+    BASE(ext,       48c0, fff8);
+    BASE(ext,       49c0, fff8);
+    BASE(tst,       4a00, ff00);
     INSN(tas,       4ac0, ffc0, CF_ISA_B);
+    INSN(tas,       4ac0, ffc0, M68000);
     INSN(halt,      4ac8, ffff, CF_ISA_A);
     INSN(pulse,     4acc, ffff, CF_ISA_A);
-    INSN(illegal,   4afc, ffff, CF_ISA_A);
+    BASE(illegal,   4afc, ffff);
     INSN(mull,      4c00, ffc0, CF_ISA_A);
+    INSN(mull,      4c00, ffc0, LONG_MULDIV);
     INSN(divl,      4c40, ffc0, CF_ISA_A);
+    INSN(divl,      4c40, ffc0, LONG_MULDIV);
     INSN(sats,      4c80, fff8, CF_ISA_B);
-    INSN(trap,      4e40, fff0, CF_ISA_A);
-    INSN(link,      4e50, fff8, CF_ISA_A);
-    INSN(unlk,      4e58, fff8, CF_ISA_A);
+    BASE(trap,      4e40, fff0);
+    BASE(link,      4e50, fff8);
+    BASE(unlk,      4e58, fff8);
     INSN(move_to_usp, 4e60, fff8, USP);
     INSN(move_from_usp, 4e68, fff8, USP);
-    INSN(nop,       4e71, ffff, CF_ISA_A);
-    INSN(stop,      4e72, ffff, CF_ISA_A);
-    INSN(rte,       4e73, ffff, CF_ISA_A);
-    INSN(rts,       4e75, ffff, CF_ISA_A);
+    BASE(nop,       4e71, ffff);
+    BASE(stop,      4e72, ffff);
+    BASE(rte,       4e73, ffff);
+    BASE(rts,       4e75, ffff);
     INSN(movec,     4e7b, ffff, CF_ISA_A);
-    INSN(jump,      4e80, ffc0, CF_ISA_A);
-    INSN(jump,      4ec0, ffc0, CF_ISA_A);
-    INSN(addsubq,   5180, f1c0, CF_ISA_A);
-    INSN(scc,       50c0, f0f8, CF_ISA_A);
-    INSN(addsubq,   5080, f1c0, CF_ISA_A);
+    BASE(jump,      4e80, ffc0);
+    BASE(jump,      4ec0, ffc0);
+    INSN(addsubq,   5000, f080, M68000);
+    BASE(addsubq,   5080, f0c0);
+    INSN(scc,       50c0, f0f8, CF_ISA_A); /* Scc.B Dx   */
+    INSN(scc,       50c0, f0c0, M68000);   /* Scc.B <EA> */
+    INSN(dbcc,      50c8, f0f8, M68000);
     INSN(tpf,       51f8, fff8, CF_ISA_A);
 
     /* Branch instructions.  */
-    INSN(branch,    6000, f000, CF_ISA_A);
+    BASE(branch,    6000, f000);
     /* Disable long branch instructions, then add back the ones we want.  */
-    INSN(undef,     60ff, f0ff, CF_ISA_A); /* All long branches.  */
+    BASE(undef,     60ff, f0ff); /* All long branches.  */
     INSN(branch,    60ff, f0ff, CF_ISA_B);
     INSN(undef,     60ff, ffff, CF_ISA_B); /* bra.l */
     INSN(branch,    60ff, ffff, BRAL);
+    INSN(branch,    60ff, f0ff, BCCL);
 
-    INSN(moveq,     7000, f100, CF_ISA_A);
+    BASE(moveq,     7000, f100);
     INSN(mvzs,      7100, f100, CF_ISA_B);
-    INSN(or,        8000, f000, CF_ISA_A);
-    INSN(divw,      80c0, f0c0, CF_ISA_A);
-    INSN(addsub,    9000, f000, CF_ISA_A);
-    INSN(subx,      9180, f1f8, CF_ISA_A);
+    BASE(or,        8000, f000);
+    BASE(divw,      80c0, f0c0);
+    BASE(addsub,    9000, f000);
+    INSN(undef,     90c0, f0c0, CF_ISA_A);
+    INSN(subx_reg,  9180, f1f8, CF_ISA_A);
+    INSN(subx_reg,  9100, f138, M68000);
+    INSN(subx_mem,  9108, f138, M68000);
     INSN(suba,      91c0, f1c0, CF_ISA_A);
+    INSN(suba,      90c0, f0c0, M68000);
 
-    INSN(undef_mac, a000, f000, CF_ISA_A);
+    BASE(undef_mac, a000, f000);
     INSN(mac,       a000, f100, CF_EMAC);
     INSN(from_mac,  a180, f9b0, CF_EMAC);
     INSN(move_mac,  a110, f9fc, CF_EMAC);
@@ -2943,12 +3412,22 @@ void register_m68k_insns (CPUM68KState *env)
     INSN(cmpa,      b0c0, f1c0, CF_ISA_B); /* cmpa.w */
     INSN(cmp,       b080, f1c0, CF_ISA_A);
     INSN(cmpa,      b1c0, f1c0, CF_ISA_A);
+    INSN(cmp,       b000, f100, M68000);
+    INSN(eor,       b100, f100, M68000);
+    INSN(cmpa,      b0c0, f0c0, M68000);
     INSN(eor,       b180, f1c0, CF_ISA_A);
-    INSN(and,       c000, f000, CF_ISA_A);
-    INSN(mulw,      c0c0, f0c0, CF_ISA_A);
-    INSN(addsub,    d000, f000, CF_ISA_A);
-    INSN(addx,      d180, f1f8, CF_ISA_A);
+    BASE(and,       c000, f000);
+    INSN(exg_dd,    c140, f1f8, M68000);
+    INSN(exg_aa,    c148, f1f8, M68000);
+    INSN(exg_da,    c188, f1f8, M68000);
+    BASE(mulw,      c0c0, f0c0);
+    BASE(addsub,    d000, f000);
+    INSN(undef,     d0c0, f0c0, CF_ISA_A);
+    INSN(addx_reg,      d180, f1f8, CF_ISA_A);
+    INSN(addx_reg,  d100, f138, M68000);
+    INSN(addx_mem,  d108, f138, M68000);
     INSN(adda,      d1c0, f1c0, CF_ISA_A);
+    INSN(adda,      d0c0, f0c0, M68000);
     INSN(shift_im,  e080, f0f0, CF_ISA_A);
     INSN(shift_reg, e0a0, f0f0, CF_ISA_A);
     INSN(undef_fpu, f000, f000, CF_ISA_A);
@@ -2969,8 +3448,7 @@ static void disas_m68k_insn(CPUM68KState * env, DisasContext *s)
 {
     uint16_t insn;
 
-    insn = cpu_lduw_code(env, s->pc);
-    s->pc += 2;
+    insn = read_im16(env, s);
 
     opcode_table[insn](env, s, insn);
 }
@@ -2995,6 +3473,7 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
     dc->is_jmp = DISAS_NEXT;
     dc->pc = pc_start;
     dc->cc_op = CC_OP_DYNAMIC;
+    dc->cc_op_synced = 1;
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->fpcr = env->fpcr;
     dc->user = (env->sr & SR_S) == 0;
@@ -3012,7 +3491,7 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
     do {
         pc_offset = dc->pc - pc_start;
         gen_throws_exception = NULL;
-        tcg_gen_insn_start(dc->pc);
+        tcg_gen_insn_start(dc->pc, dc->cc_op);
         num_insns++;
 
         if (unlikely(cpu_breakpoint_test(cs, dc->pc, BP_ANY))) {
@@ -3043,20 +3522,20 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
     if (unlikely(cs->singlestep_enabled)) {
         /* Make sure the pc is updated, and raise a debug exception.  */
         if (!dc->is_jmp) {
-            gen_flush_cc_op(dc);
+            update_cc_op(dc);
             tcg_gen_movi_i32(QREG_PC, dc->pc);
         }
         gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
     } else {
         switch(dc->is_jmp) {
         case DISAS_NEXT:
-            gen_flush_cc_op(dc);
+            update_cc_op(dc);
             gen_jmp_tb(dc, 0, dc->pc);
             break;
         default:
         case DISAS_JUMP:
         case DISAS_UPDATE:
-            gen_flush_cc_op(dc);
+            update_cc_op(dc);
             /* indicate that the hash table must be used to find the next TB */
             tcg_gen_exit_tb(0);
             break;
@@ -3070,10 +3549,12 @@ void gen_intermediate_code(CPUM68KState *env, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
     tb->size = dc->pc - pc_start;
@@ -3091,20 +3572,24 @@ void m68k_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
     for (i = 0; i < 8; i++)
       {
         u.d = env->fregs[i];
-        cpu_fprintf (f, "D%d = %08x   A%d = %08x   F%d = %08x%08x (%12g)\n",
-                     i, env->dregs[i], i, env->aregs[i],
-                     i, u.l.upper, u.l.lower, *(double *)&u.d);
+        cpu_fprintf(f, "D%d = %08x   A%d = %08x   F%d = %08x%08x (%12g)\n",
+                    i, env->dregs[i], i, env->aregs[i],
+                    i, u.l.upper, u.l.lower, *(double *)&u.d);
       }
     cpu_fprintf (f, "PC = %08x   ", env->pc);
-    sr = env->sr;
-    cpu_fprintf (f, "SR = %04x %c%c%c%c%c ", sr, (sr & 0x10) ? 'X' : '-',
-                 (sr & CCF_N) ? 'N' : '-', (sr & CCF_Z) ? 'Z' : '-',
-                 (sr & CCF_V) ? 'V' : '-', (sr & CCF_C) ? 'C' : '-');
+    sr = env->sr | cpu_m68k_get_ccr(env);
+    cpu_fprintf(f, "SR = %04x %c%c%c%c%c ", sr, (sr & CCF_X) ? 'X' : '-',
+                (sr & CCF_N) ? 'N' : '-', (sr & CCF_Z) ? 'Z' : '-',
+                (sr & CCF_V) ? 'V' : '-', (sr & CCF_C) ? 'C' : '-');
     cpu_fprintf (f, "FPRESULT = %12g\n", *(double *)&env->fp_result);
 }
 
 void restore_state_to_opc(CPUM68KState *env, TranslationBlock *tb,
                           target_ulong *data)
 {
+    int cc_op = data[1];
     env->pc = data[0];
+    if (cc_op != CC_OP_DYNAMIC) {
+        env->cc_op = cc_op;
+    }
 }
diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c
index 8edc00a796..389c7b691e 100644
--- a/target-microblaze/cpu.c
+++ b/target-microblaze/cpu.c
@@ -138,6 +138,13 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp)
     CPUMBState *env = &cpu->env;
     uint8_t version_code = 0;
     int i = 0;
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     qemu_init_vcpu(cs);
 
@@ -199,7 +206,6 @@ static void mb_cpu_initfn(Object *obj)
     static bool tcg_initialized;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
 
@@ -267,12 +273,6 @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_num_core_regs = 32 + 5;
 
     cc->disas_set_info = mb_disas_set_info;
-
-    /*
-     * Reason: mb_cpu_initfn() calls cpu_exec_init(), which saves the
-     * object in cpus -> dangling pointer after final object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo mb_cpu_type_info = {
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 80098ece15..de2090ac71 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -581,50 +581,10 @@ static void dec_msr(DisasContext *dc)
     }
 }
 
-/* 64-bit signed mul, lower result in d and upper in d2.  */
-static void t_gen_muls(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_ext_i32_i64(t0, a);
-    tcg_gen_ext_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_extrl_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_extrl_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-/* 64-bit unsigned muls, lower result in d and upper in d2.  */
-static void t_gen_mulu(TCGv d, TCGv d2, TCGv a, TCGv b)
-{
-    TCGv_i64 t0, t1;
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(t0, a);
-    tcg_gen_extu_i32_i64(t1, b);
-    tcg_gen_mul_i64(t0, t0, t1);
-
-    tcg_gen_extrl_i64_i32(d, t0);
-    tcg_gen_shri_i64(t0, t0, 32);
-    tcg_gen_extrl_i64_i32(d2, t0);
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
 /* Multiplier unit.  */
 static void dec_mul(DisasContext *dc)
 {
-    TCGv d[2];
+    TCGv tmp;
     unsigned int subcode;
 
     if ((dc->tb_flags & MSR_EE_FLAG)
@@ -636,13 +596,11 @@ static void dec_mul(DisasContext *dc)
     }
 
     subcode = dc->imm & 3;
-    d[0] = tcg_temp_new();
-    d[1] = tcg_temp_new();
 
     if (dc->type_b) {
         LOG_DIS("muli r%d r%d %x\n", dc->rd, dc->ra, dc->imm);
-        t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
-        goto done;
+        tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], *(dec_alu_op_b(dc)));
+        return;
     }
 
     /* mulh, mulhsu and mulhu are not available if C_USE_HW_MUL is < 2.  */
@@ -651,30 +609,29 @@ static void dec_mul(DisasContext *dc)
         /* nop??? */
     }
 
+    tmp = tcg_temp_new();
     switch (subcode) {
         case 0:
             LOG_DIS("mul r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_mulu(cpu_R[dc->rd], d[1], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mul_tl(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 1:
             LOG_DIS("mulh r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_muls2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 2:
             LOG_DIS("mulhsu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_muls(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulsu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         case 3:
             LOG_DIS("mulhu r%d r%d r%d\n", dc->rd, dc->ra, dc->rb);
-            t_gen_mulu(d[0], cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            tcg_gen_mulu2_tl(tmp, cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
             break;
         default:
             cpu_abort(CPU(dc->cpu), "unknown MUL insn %x\n", subcode);
             break;
     }
-done:
-    tcg_temp_free(d[0]);
-    tcg_temp_free(d[1]);
+    tcg_temp_free(tmp);
 }
 
 /* Div unit.  */
@@ -1670,13 +1627,6 @@ void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb)
         cpu_abort(cs, "Microblaze: unaligned PC=%x\n", pc_start);
     }
 
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
-#if !SIM_COMPAT
-        qemu_log("--------------\n");
-        log_cpu_state(CPU(cpu), 0);
-#endif
-    }
-
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
@@ -1820,12 +1770,14 @@ void gen_intermediate_code(CPUMBState *env, struct TranslationBlock *tb)
 #if !SIM_COMPAT
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
-        qemu_log("\n");
+        qemu_log_lock();
+        qemu_log("--------------\n");
 #if DISAS_GNU
         log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
 #endif
         qemu_log("\nisize=%d osize=%d\n",
                  dc->pc - pc_start, tcg_op_buf_count());
+        qemu_log_unlock();
     }
 #endif
 #endif
diff --git a/target-mips/cpu.c b/target-mips/cpu.c
index 64ad112f4d..65ca607f88 100644
--- a/target-mips/cpu.c
+++ b/target-mips/cpu.c
@@ -124,6 +124,13 @@ static void mips_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     MIPSCPUClass *mcc = MIPS_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cpu_reset(cs);
     qemu_init_vcpu(cs);
@@ -138,7 +145,6 @@ static void mips_cpu_initfn(Object *obj)
     CPUMIPSState *env = &cpu->env;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled()) {
         mips_tcg_init();
@@ -177,13 +183,6 @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
 
     cc->gdb_num_core_regs = 73;
     cc->gdb_stop_before_watchpoint = true;
-
-    /*
-     * Reason: mips_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo mips_cpu_type_info = {
diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
index df7d2204b0..dc707934ea 100644
--- a/target-mips/dsp_helper.c
+++ b/target-mips/dsp_helper.c
@@ -3477,7 +3477,7 @@ target_ulong helper_dextp(target_ulong ac, target_ulong size, CPUMIPSState *env)
 
     if (sub >= -1) {
         temp = (tempB << (64 - len)) | (tempA >> len);
-        temp = temp & ((0x01 << (size + 1)) - 1);
+        temp = temp & ((1ULL << (size + 1)) - 1);
         set_DSPControl_efi(0, env);
     } else {
         set_DSPControl_efi(1, env);
@@ -3506,7 +3506,7 @@ target_ulong helper_dextpdp(target_ulong ac, target_ulong size,
 
     if (sub >= -1) {
         temp = (tempB << (64 - len)) | (tempA >> len);
-        temp = temp & ((0x01 << (size + 1)) - 1);
+        temp = temp & ((1ULL << (size + 1)) - 1);
         set_DSPControl_pos(sub, env);
         set_DSPControl_efi(0, env);
     } else {
diff --git a/target-mips/machine.c b/target-mips/machine.c
index a27f2f156d..d20d948457 100644
--- a/target-mips/machine.c
+++ b/target-mips/machine.c
@@ -2,7 +2,6 @@
 #include "qemu-common.h"
 #include "cpu.h"
 #include "hw/hw.h"
-#include "cpu.h"
 #include "migration/cpu.h"
 
 static int cpu_post_load(void *opaque, int version_id)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index ea2f2abe19..7af4c2f084 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -4122,10 +4122,10 @@ void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
 }
 
 #if !defined(CONFIG_USER_ONLY)
-MSA_LD_DF(DF_BYTE,   b, helper_ret_ldub_mmu, oi, GETRA())
-MSA_LD_DF(DF_HALF,   h, helper_ret_lduw_mmu, oi, GETRA())
-MSA_LD_DF(DF_WORD,   w, helper_ret_ldul_mmu, oi, GETRA())
-MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu,  oi, GETRA())
+MSA_LD_DF(DF_BYTE,   b, helper_ret_ldub_mmu, oi, GETPC())
+MSA_LD_DF(DF_HALF,   h, helper_ret_lduw_mmu, oi, GETPC())
+MSA_LD_DF(DF_WORD,   w, helper_ret_ldul_mmu, oi, GETPC())
+MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu,  oi, GETPC())
 #else
 MSA_LD_DF(DF_BYTE,   b, cpu_ldub_data)
 MSA_LD_DF(DF_HALF,   h, cpu_lduw_data)
@@ -4161,17 +4161,17 @@ void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd,             \
     int mmu_idx = cpu_mmu_index(env, false);				\
     int i;                                                              \
     MEMOP_IDX(DF)                                                       \
-    ensure_writable_pages(env, addr, mmu_idx, GETRA());                 \
+    ensure_writable_pages(env, addr, mmu_idx, GETPC());                 \
     for (i = 0; i < DF_ELEMENTS(DF); i++) {                             \
         ST_INSN(env, addr + (i << DF), pwd->TYPE[i], ##__VA_ARGS__);    \
     }                                                                   \
 }
 
 #if !defined(CONFIG_USER_ONLY)
-MSA_ST_DF(DF_BYTE,   b, helper_ret_stb_mmu, oi, GETRA())
-MSA_ST_DF(DF_HALF,   h, helper_ret_stw_mmu, oi, GETRA())
-MSA_ST_DF(DF_WORD,   w, helper_ret_stl_mmu, oi, GETRA())
-MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETRA())
+MSA_ST_DF(DF_BYTE,   b, helper_ret_stb_mmu, oi, GETPC())
+MSA_ST_DF(DF_HALF,   h, helper_ret_stw_mmu, oi, GETPC())
+MSA_ST_DF(DF_WORD,   w, helper_ret_stl_mmu, oi, GETPC())
+MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETPC())
 #else
 MSA_ST_DF(DF_BYTE,   b, cpu_stb_data)
 MSA_ST_DF(DF_HALF,   h, cpu_stw_data)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index bab52cb254..57b824ff2d 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -3871,6 +3871,7 @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
         break;
     }
 
+    check_cp1_enabled(ctx);
     gen_load_fpr64(ctx, t0, rs);
     gen_load_fpr64(ctx, t1, rt);
 
@@ -3945,8 +3946,11 @@ static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
     LMI_DIRECT(XOR_CP2, xor, xor);
     LMI_DIRECT(NOR_CP2, nor, nor);
     LMI_DIRECT(AND_CP2, and, and);
-    LMI_DIRECT(PANDN, pandn, andc);
-    LMI_DIRECT(OR, or, or);
+    LMI_DIRECT(OR_CP2, or, or);
+
+    case OPC_PANDN:
+        tcg_gen_andc_i64(t0, t1, t0);
+        break;
 
     case OPC_PINSRH_0:
         tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
@@ -13109,6 +13113,34 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd,
     tcg_temp_free(t1);
 }
 
+static void gen_sync(int stype)
+{
+    TCGBar tcg_mo = TCG_BAR_SC;
+
+    switch (stype) {
+    case 0x4: /* SYNC_WMB */
+        tcg_mo |= TCG_MO_ST_ST;
+        break;
+    case 0x10: /* SYNC_MB */
+        tcg_mo |= TCG_MO_ALL;
+        break;
+    case 0x11: /* SYNC_ACQUIRE */
+        tcg_mo |= TCG_MO_LD_LD | TCG_MO_LD_ST;
+        break;
+    case 0x12: /* SYNC_RELEASE */
+        tcg_mo |= TCG_MO_ST_ST | TCG_MO_LD_ST;
+        break;
+    case 0x13: /* SYNC_RMB */
+        tcg_mo |= TCG_MO_LD_LD;
+        break;
+    default:
+        tcg_mo |= TCG_MO_ALL;
+        break;
+    }
+
+    tcg_gen_mb(tcg_mo);
+}
+
 static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
 {
     int extension = (ctx->opcode >> 6) & 0x3f;
@@ -13384,7 +13416,7 @@ static void gen_pool32axf (CPUMIPSState *env, DisasContext *ctx, int rt, int rs)
     case 0x2d:
         switch (minor) {
         case SYNC:
-            /* NOP */
+            gen_sync(extract32(ctx->opcode, 16, 5));
             break;
         case SYSCALL:
             generate_exception_end(ctx, EXCP_SYSCALL);
@@ -17201,7 +17233,7 @@ static void decode_opc_special(CPUMIPSState *env, DisasContext *ctx)
         break;
     case OPC_SYNC:
         check_insn(ctx, ISA_MIPS2);
-        /* Treat as NOP. */
+        gen_sync(extract32(ctx->opcode, 6, 5));
         break;
 
 #if defined(TARGET_MIPS64)
@@ -20015,9 +20047,11 @@ void gen_intermediate_code(CPUMIPSState *env, struct TranslationBlock *tb)
     LOG_DISAS("\n");
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, ctx.pc - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 39ed5c4c1b..6ae23e476f 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -255,6 +255,28 @@ static const mips_def_t mips_defs[] =
         .insn_flags = CPU_MIPS32R2 | ASE_MIPS16,
         .mmu_type = MMU_TYPE_R4000,
     },
+    {
+        .name = "24KEc",
+        .CP0_PRid = 0x00019600,
+        .CP0_Config0 = MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+                       (MMU_TYPE_R4000 << CP0C0_MT),
+        .CP0_Config1 = MIPS_CONFIG1 | (15 << CP0C1_MMU) |
+                       (0 << CP0C1_IS) | (3 << CP0C1_IL) | (1 << CP0C1_IA) |
+                       (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
+                       (1 << CP0C1_CA),
+        .CP0_Config2 = MIPS_CONFIG2,
+        .CP0_Config3 = MIPS_CONFIG3 | (1 << CP0C3_DSPP) | (0 << CP0C3_VInt),
+        .CP0_LLAddr_rw_bitmask = 0,
+        .CP0_LLAddr_shift = 4,
+        .SYNCI_Step = 32,
+        .CCRes = 2,
+        /* we have a DSP, but no FPU */
+        .CP0_Status_rw_bitmask = 0x1378FF1F,
+        .SEGBITS = 32,
+        .PABITS = 32,
+        .insn_flags = CPU_MIPS32R2 | ASE_MIPS16 | ASE_DSP,
+        .mmu_type = MMU_TYPE_R4000,
+    },
     {
         .name = "24Kf",
         .CP0_PRid = 0x00019300,
diff --git a/target-moxie/cpu.c b/target-moxie/cpu.c
index 50a0899471..b0be4a7551 100644
--- a/target-moxie/cpu.c
+++ b/target-moxie/cpu.c
@@ -61,6 +61,13 @@ static void moxie_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     MoxieCPUClass *mcc = MOXIE_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     qemu_init_vcpu(cs);
     cpu_reset(cs);
@@ -75,7 +82,6 @@ static void moxie_cpu_initfn(Object *obj)
     static int inited;
 
     cs->env_ptr = &cpu->env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled() && !inited) {
         inited = 1;
@@ -124,13 +130,6 @@ static void moxie_cpu_class_init(ObjectClass *oc, void *data)
     cc->vmsd = &vmstate_moxie_cpu;
 #endif
     cc->disas_set_info = moxie_cpu_disas_set_info;
-
-    /*
-     * Reason: moxie_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void moxielite_initfn(Object *obj)
diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 155913f107..698e87bb25 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -81,6 +81,13 @@ static void openrisc_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     OpenRISCCPUClass *occ = OPENRISC_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     qemu_init_vcpu(cs);
     cpu_reset(cs);
@@ -95,7 +102,6 @@ static void openrisc_cpu_initfn(Object *obj)
     static int inited;
 
     cs->env_ptr = &cpu->env;
-    cpu_exec_init(cs, &error_abort);
 
 #ifndef CONFIG_USER_ONLY
     cpu_openrisc_mmu_init(cpu);
@@ -180,13 +186,6 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
     dc->vmsd = &vmstate_openrisc_cpu;
 #endif
     cc->gdb_num_core_regs = 32 + 3;
-
-    /*
-     * Reason: openrisc_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const OpenRISCCPUInfo *info)
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 28c944657c..229361aed1 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1651,10 +1651,6 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
     dc->synced_flags = dc->tb_flags = tb->flags;
     dc->delayed_branch = !!(dc->tb_flags & D_FLAG);
     dc->singlestep_enabled = cs->singlestep_enabled;
-    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
-        qemu_log("-----------------------------------------\n");
-        log_cpu_state(CPU(cpu), 0);
-    }
 
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     num_insns = 0;
@@ -1754,10 +1750,13 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
-        qemu_log("\n");
+        qemu_log_lock();
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
         qemu_log("\nisize=%d osize=%d\n",
                  dc->pc - pc_start, tcg_op_buf_count());
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index 5209e63a72..506dee1ee8 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1130,10 +1130,6 @@
 #if defined(TODO)
     POWERPC_DEF("POWER6",        CPU_POWERPC_POWER6,                 POWER6,
                 "POWER6")
-    POWERPC_DEF("POWER6_5",      CPU_POWERPC_POWER6_5,               POWER5,
-                "POWER6 running in POWER5 mode")
-    POWERPC_DEF("POWER6A",       CPU_POWERPC_POWER6A,                POWER6,
-                "POWER6A")
 #endif
     POWERPC_DEF("POWER7_v2.3",   CPU_POWERPC_POWER7_v23,             POWER7,
                 "POWER7 v2.3")
@@ -1147,6 +1143,10 @@
                 "POWER8NVL v1.0")
     POWERPC_DEF("970_v2.2",      CPU_POWERPC_970_v22,                970,
                 "PowerPC 970 v2.2")
+
+    POWERPC_DEF("POWER9_v1.0",   CPU_POWERPC_POWER9_BASE,            POWER9,
+                "POWER9 v1.0")
+
     POWERPC_DEF("970fx_v1.0",    CPU_POWERPC_970FX_v10,              970,
                 "PowerPC 970FX v1.0 (G5)")
     POWERPC_DEF("970fx_v2.0",    CPU_POWERPC_970FX_v20,              970,
@@ -1395,6 +1395,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
     { "POWER8E", "POWER8E_v2.1" },
     { "POWER8", "POWER8_v2.0" },
     { "POWER8NVL", "POWER8NVL_v1.0" },
+    { "POWER9", "POWER9_v1.0" },
     { "970", "970_v2.2" },
     { "970fx", "970fx_v3.1" },
     { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index f21a44c830..aafbbd7d5d 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -549,8 +549,6 @@ enum {
     CPU_POWERPC_POWER5             = 0x003A0203,
     CPU_POWERPC_POWER5P_v21        = 0x003B0201,
     CPU_POWERPC_POWER6             = 0x003E0000,
-    CPU_POWERPC_POWER6_5           = 0x0F000001, /* POWER6 in POWER5 mode */
-    CPU_POWERPC_POWER6A            = 0x0F000002,
     CPU_POWERPC_POWER_SERVER_MASK  = 0xFFFF0000,
     CPU_POWERPC_POWER7_BASE        = 0x003F0000,
     CPU_POWERPC_POWER7_v23         = 0x003F0203,
@@ -562,6 +560,7 @@ enum {
     CPU_POWERPC_POWER8_v20         = 0x004D0200,
     CPU_POWERPC_POWER8NVL_BASE     = 0x004C0000,
     CPU_POWERPC_POWER8NVL_v10      = 0x004C0100,
+    CPU_POWERPC_POWER9_BASE        = 0x004E0000,
     CPU_POWERPC_970_v22            = 0x00390202,
     CPU_POWERPC_970FX_v10          = 0x00391100,
     CPU_POWERPC_970FX_v20          = 0x003C0200,
diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
index 286410502f..d46c31a15d 100644
--- a/target-ppc/cpu-qom.h
+++ b/target-ppc/cpu-qom.h
@@ -86,6 +86,7 @@ enum powerpc_mmu_t {
     POWERPC_MMU_2_07       = POWERPC_MMU_64 | POWERPC_MMU_1TSEG
                              | POWERPC_MMU_64K
                              | POWERPC_MMU_AMR | 0x00000004,
+    /* FIXME Add POWERPC_MMU_3_OO defines */
     /* Architecture 2.07 "degraded" (no 1T segments)           */
     POWERPC_MMU_2_07a      = POWERPC_MMU_64 | POWERPC_MMU_AMR
                              | 0x00000004,
@@ -173,6 +174,7 @@ typedef struct PowerPCCPUClass {
     /*< public >*/
 
     DeviceRealize parent_realize;
+    DeviceUnrealize parent_unrealize;
     void (*parent_reset)(CPUState *cpu);
 
     uint32_t pvr;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 786ab5cdfa..2a50c43689 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1009,6 +1009,8 @@ struct CPUPPCState {
     bool tlb_dirty;   /* Set to non-zero when modifying TLB                  */
     bool kvm_sw_tlb;  /* non-zero if KVM SW TLB API is active                */
     uint32_t tlb_need_flush; /* Delayed flush needed */
+#define TLB_NEED_LOCAL_FLUSH   0x1
+#define TLB_NEED_GLOBAL_FLUSH  0x2
 #endif
 
     /* Other registers */
@@ -1164,6 +1166,13 @@ struct PowerPCCPU {
     int cpu_dt_id;
     uint32_t max_compat;
     uint32_t cpu_version;
+
+    /* Fields related to migration compatibility hacks */
+    bool pre_2_8_migration;
+    target_ulong mig_msr_mask;
+    uint64_t mig_insns_flags;
+    uint64_t mig_insns_flags2;
+    uint32_t mig_nb_BATs;
 };
 
 static inline PowerPCCPU *ppc_env_get_cpu(CPUPPCState *env)
@@ -1184,8 +1193,6 @@ void ppc_cpu_dump_state(CPUState *cpu, FILE *f, fprintf_function cpu_fprintf,
                         int flags);
 void ppc_cpu_dump_statistics(CPUState *cpu, FILE *f,
                              fprintf_function cpu_fprintf, int flags);
-int ppc_cpu_get_monitor_def(CPUState *cs, const char *name,
-                            uint64_t *pval);
 hwaddr ppc_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr);
 int ppc_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg);
 int ppc_cpu_gdb_read_register_apple(CPUState *cpu, uint8_t *buf, int reg);
@@ -1202,7 +1209,6 @@ extern const struct VMStateDescription vmstate_ppc_cpu;
 PowerPCCPU *cpu_ppc_init(const char *cpu_model);
 void ppc_translate_init(void);
 const char *ppc_cpu_lookup_alias(const char *alias);
-void gen_update_current_nip(void *opaque);
 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
    is returned if the signal was handled by the virtual CPU.  */
@@ -2095,6 +2101,8 @@ enum {
     PPC2_TM            = 0x0000000000020000ULL,
     /* Server PM instructgions (ISA 2.06, Book III)                          */
     PPC2_PM_ISA206     = 0x0000000000040000ULL,
+    /* POWER ISA 3.0                                                         */
+    PPC2_ISA300        = 0x0000000000080000ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
                         PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2102,7 +2110,8 @@ enum {
                         PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
                         PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
                         PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
-                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206)
+                        PPC2_FP_CVT_S64 | PPC2_TM | PPC2_PM_ISA206 | \
+                        PPC2_ISA300)
 };
 
 /*****************************************************************************/
@@ -2296,6 +2305,14 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
     *flags = env->hflags;
 }
 
+void QEMU_NORETURN raise_exception(CPUPPCState *env, uint32_t exception);
+void QEMU_NORETURN raise_exception_ra(CPUPPCState *env, uint32_t exception,
+                                      uintptr_t raddr);
+void QEMU_NORETURN raise_exception_err(CPUPPCState *env, uint32_t exception,
+                                       uint32_t error_code);
+void QEMU_NORETURN raise_exception_err_ra(CPUPPCState *env, uint32_t exception,
+                                          uint32_t error_code, uintptr_t raddr);
+
 #if !defined(CONFIG_USER_ONLY)
 static inline int booke206_tlbm_id(CPUPPCState *env, ppcmas_tlb_t *tlbm)
 {
diff --git a/target-ppc/dfp_helper.c b/target-ppc/dfp_helper.c
index db0ede698b..9164fe701b 100644
--- a/target-ppc/dfp_helper.c
+++ b/target-ppc/dfp_helper.c
@@ -647,6 +647,41 @@ uint32_t helper_##op(CPUPPCState *env, uint64_t *a, uint64_t *b)         \
 DFP_HELPER_TSTSF(dtstsf, 64)
 DFP_HELPER_TSTSF(dtstsfq, 128)
 
+#define DFP_HELPER_TSTSFI(op, size)                                     \
+uint32_t helper_##op(CPUPPCState *env, uint32_t a, uint64_t *b)         \
+{                                                                       \
+    struct PPC_DFP dfp;                                                 \
+    unsigned uim;                                                       \
+                                                                        \
+    dfp_prepare_decimal##size(&dfp, 0, b, env);                         \
+                                                                        \
+    uim = a & 0x3F;                                                     \
+                                                                        \
+    if (unlikely(decNumberIsSpecial(&dfp.b))) {                         \
+        dfp.crbf = 1;                                                   \
+    } else if (uim == 0) {                                              \
+        dfp.crbf = 4;                                                   \
+    } else if (unlikely(decNumberIsZero(&dfp.b))) {                     \
+        /* Zero has no sig digits */                                    \
+        dfp.crbf = 4;                                                   \
+    } else {                                                            \
+        unsigned nsd = dfp.b.digits;                                    \
+        if (uim < nsd) {                                                \
+            dfp.crbf = 8;                                               \
+        } else if (uim > nsd) {                                         \
+            dfp.crbf = 4;                                               \
+        } else {                                                        \
+            dfp.crbf = 2;                                               \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+    dfp_set_FPCC_from_CRBF(&dfp);                                       \
+    return dfp.crbf;                                                    \
+}
+
+DFP_HELPER_TSTSFI(dtstsfi, 64)
+DFP_HELPER_TSTSFI(dtstsfiq, 128)
+
 static void QUA_PPs(struct PPC_DFP *dfp)
 {
     dfp_set_FPRF_from_FRT(dfp);
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index d6e1678a63..93369d4fe5 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -198,7 +198,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         default:
             goto excp_invalid;
         }
-        goto store_next;
+        break;
     case POWERPC_EXCP_MCHECK:    /* Machine check exception                  */
         if (msr_me == 0) {
             /* Machine check exception is not enabled.
@@ -213,7 +213,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             cs->halted = 1;
             cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
         }
-        new_msr |= (target_ulong)MSR_HVB;
+        if (env->msr_mask & MSR_HVB) {
+            /* ISA specifies HV, but can be delivered to guest with HV clear
+             * (e.g., see FWNMI in PAPR).
+             */
+            new_msr |= (target_ulong)MSR_HVB;
+        }
         ail = 0;
 
         /* machine check exceptions don't have ME set */
@@ -235,16 +240,16 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         default:
             break;
         }
-        goto store_next;
+        break;
     case POWERPC_EXCP_DSI:       /* Data storage exception                   */
         LOG_EXCP("DSI exception: DSISR=" TARGET_FMT_lx" DAR=" TARGET_FMT_lx
                  "\n", env->spr[SPR_DSISR], env->spr[SPR_DAR]);
-        goto store_next;
+        break;
     case POWERPC_EXCP_ISI:       /* Instruction storage exception            */
         LOG_EXCP("ISI exception: msr=" TARGET_FMT_lx ", nip=" TARGET_FMT_lx
                  "\n", msr, env->nip);
         msr |= env->error_code;
-        goto store_next;
+        break;
     case POWERPC_EXCP_EXTERNAL:  /* External input                           */
         cs = CPU(cpu);
 
@@ -258,13 +263,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             /* IACK the IRQ on delivery */
             env->spr[SPR_BOOKE_EPR] = ldl_phys(cs->as, env->mpic_iack);
         }
-        goto store_next;
+        break;
     case POWERPC_EXCP_ALIGN:     /* Alignment exception                      */
-        /* XXX: this is false */
         /* Get rS/rD and rA from faulting opcode */
-        env->spr[SPR_DSISR] |= (cpu_ldl_code(env, (env->nip - 4))
-                                & 0x03FF0000) >> 16;
-        goto store_next;
+        /* Note: the opcode fields will not be set properly for a direct
+         * store load/store, but nobody cares as nobody actually uses
+         * direct store segments.
+         */
+        env->spr[SPR_DSISR] |= (env->error_code & 0x03FF0000) >> 16;
+        break;
     case POWERPC_EXCP_PROGRAM:   /* Program exception                        */
         switch (env->error_code & ~0xF) {
         case POWERPC_EXCP_FP:
@@ -274,11 +281,12 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                 env->error_code = 0;
                 return;
             }
+
+            /* FP exceptions always have NIP pointing to the faulting
+             * instruction, so always use store_next and claim we are
+             * precise in the MSR.
+             */
             msr |= 0x00100000;
-            if (msr_fe0 == msr_fe1) {
-                goto store_next;
-            }
-            msr |= 0x00010000;
             break;
         case POWERPC_EXCP_INVAL:
             LOG_EXCP("Invalid instruction at " TARGET_FMT_lx "\n", env->nip);
@@ -299,19 +307,16 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
                       env->error_code);
             break;
         }
-        goto store_current;
-    case POWERPC_EXCP_HV_EMU:
-        srr0 = SPR_HSRR0;
-        srr1 = SPR_HSRR1;
-        new_msr |= (target_ulong)MSR_HVB;
-        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_current;
-    case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
-        goto store_current;
+        break;
     case POWERPC_EXCP_SYSCALL:   /* System call exception                    */
         dump_syscall(env);
         lev = env->error_code;
 
+        /* We need to correct the NIP which in this case is supposed
+         * to point to the next instruction
+         */
+        env->nip += 4;
+
         /* "PAPR mode" built-in hypercall emulation */
         if ((lev == 1) && cpu_ppc_hypercall) {
             cpu_ppc_hypercall(cpu);
@@ -320,15 +325,15 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         if (lev == 1) {
             new_msr |= (target_ulong)MSR_HVB;
         }
-        goto store_next;
+        break;
+    case POWERPC_EXCP_FPU:       /* Floating-point unavailable exception     */
     case POWERPC_EXCP_APU:       /* Auxiliary processor unavailable          */
-        goto store_current;
     case POWERPC_EXCP_DECR:      /* Decrementer exception                    */
-        goto store_next;
+        break;
     case POWERPC_EXCP_FIT:       /* Fixed-interval timer interrupt           */
         /* FIT on 4xx */
         LOG_EXCP("FIT exception\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_WDT:       /* Watchdog timer interrupt                 */
         LOG_EXCP("WDT exception\n");
         switch (excp_model) {
@@ -339,11 +344,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         default:
             break;
         }
-        goto store_next;
+        break;
     case POWERPC_EXCP_DTLB:      /* Data TLB error                           */
-        goto store_next;
     case POWERPC_EXCP_ITLB:      /* Instruction TLB error                    */
-        goto store_next;
+        break;
     case POWERPC_EXCP_DEBUG:     /* Debug interrupt                          */
         switch (excp_model) {
         case POWERPC_EXCP_BOOKE:
@@ -358,102 +362,91 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
         }
         /* XXX: TODO */
         cpu_abort(cs, "Debug exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_SPEU:      /* SPE/embedded floating-point unavailable  */
         env->spr[SPR_BOOKE_ESR] = ESR_SPV;
-        goto store_current;
+        break;
     case POWERPC_EXCP_EFPDI:     /* Embedded floating-point data interrupt   */
         /* XXX: TODO */
         cpu_abort(cs, "Embedded floating point data exception "
                   "is not implemented yet !\n");
         env->spr[SPR_BOOKE_ESR] = ESR_SPV;
-        goto store_next;
+        break;
     case POWERPC_EXCP_EFPRI:     /* Embedded floating-point round interrupt  */
         /* XXX: TODO */
         cpu_abort(cs, "Embedded floating point round exception "
                   "is not implemented yet !\n");
         env->spr[SPR_BOOKE_ESR] = ESR_SPV;
-        goto store_next;
+        break;
     case POWERPC_EXCP_EPERFM:    /* Embedded performance monitor interrupt   */
         /* XXX: TODO */
         cpu_abort(cs,
                   "Performance counter exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_DOORI:     /* Embedded doorbell interrupt              */
-        goto store_next;
+        break;
     case POWERPC_EXCP_DOORCI:    /* Embedded doorbell critical interrupt     */
         srr0 = SPR_BOOKE_CSRR0;
         srr1 = SPR_BOOKE_CSRR1;
-        goto store_next;
+        break;
     case POWERPC_EXCP_RESET:     /* System reset exception                   */
+        /* A power-saving exception sets ME, otherwise it is unchanged */
         if (msr_pow) {
             /* indicate that we resumed from power save mode */
             msr |= 0x10000;
+            new_msr |= ((target_ulong)1 << MSR_ME);
+        }
+        if (env->msr_mask & MSR_HVB) {
+            /* ISA specifies HV, but can be delivered to guest with HV clear
+             * (e.g., see FWNMI in PAPR, NMI injection in QEMU).
+             */
+            new_msr |= (target_ulong)MSR_HVB;
         } else {
-            new_msr &= ~((target_ulong)1 << MSR_ME);
+            if (msr_pow) {
+                cpu_abort(cs, "Trying to deliver power-saving system reset "
+                          "exception %d with no HV support\n", excp);
+            }
         }
-
-        new_msr |= (target_ulong)MSR_HVB;
         ail = 0;
-        goto store_next;
+        break;
     case POWERPC_EXCP_DSEG:      /* Data segment exception                   */
-        goto store_next;
     case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
-        goto store_next;
-    case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
-        srr0 = SPR_HSRR0;
-        srr1 = SPR_HSRR1;
-        new_msr |= (target_ulong)MSR_HVB;
-        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_next;
     case POWERPC_EXCP_TRACE:     /* Trace exception                          */
-        goto store_next;
+        break;
+    case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
     case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
-        srr0 = SPR_HSRR0;
-        srr1 = SPR_HSRR1;
-        new_msr |= (target_ulong)MSR_HVB;
-        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_next;
     case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
-        srr0 = SPR_HSRR0;
-        srr1 = SPR_HSRR1;
-        new_msr |= (target_ulong)MSR_HVB;
-        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_next;
     case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
-        srr0 = SPR_HSRR0;
-        srr1 = SPR_HSRR1;
-        new_msr |= (target_ulong)MSR_HVB;
-        new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_next;
     case POWERPC_EXCP_HISEG:     /* Hypervisor instruction segment exception */
+    case POWERPC_EXCP_HV_EMU:
         srr0 = SPR_HSRR0;
         srr1 = SPR_HSRR1;
         new_msr |= (target_ulong)MSR_HVB;
         new_msr |= env->msr & ((target_ulong)1 << MSR_RI);
-        goto store_next;
+        break;
     case POWERPC_EXCP_VPU:       /* Vector unavailable exception             */
-        goto store_current;
     case POWERPC_EXCP_VSXU:       /* VSX unavailable exception               */
-        goto store_current;
     case POWERPC_EXCP_FU:         /* Facility unavailable exception          */
-        goto store_current;
+#ifdef TARGET_PPC64
+        env->spr[SPR_FSCR] |= ((target_ulong)env->error_code << 56);
+#endif
+        break;
     case POWERPC_EXCP_PIT:       /* Programmable interval timer interrupt    */
         LOG_EXCP("PIT exception\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_IO:        /* IO error exception                       */
         /* XXX: TODO */
         cpu_abort(cs, "601 IO error exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_RUNM:      /* Run mode exception                       */
         /* XXX: TODO */
         cpu_abort(cs, "601 run mode exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_EMUL:      /* Emulation trap exception                 */
         /* XXX: TODO */
         cpu_abort(cs, "602 emulation trap exception "
                   "is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_IFTLB:     /* Instruction fetch TLB error              */
         switch (excp_model) {
         case POWERPC_EXCP_602:
@@ -568,78 +561,80 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
             cpu_abort(cs, "Invalid data store TLB miss exception\n");
             break;
         }
-        goto store_next;
+        break;
     case POWERPC_EXCP_FPA:       /* Floating-point assist exception          */
         /* XXX: TODO */
         cpu_abort(cs, "Floating point assist exception "
                   "is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_DABR:      /* Data address breakpoint                  */
         /* XXX: TODO */
         cpu_abort(cs, "DABR exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_IABR:      /* Instruction address breakpoint           */
         /* XXX: TODO */
         cpu_abort(cs, "IABR exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_SMI:       /* System management interrupt              */
         /* XXX: TODO */
         cpu_abort(cs, "SMI exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_THERM:     /* Thermal interrupt                        */
         /* XXX: TODO */
         cpu_abort(cs, "Thermal management exception "
                   "is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_PERFM:     /* Embedded performance monitor interrupt   */
         /* XXX: TODO */
         cpu_abort(cs,
                   "Performance counter exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_VPUA:      /* Vector assist exception                  */
         /* XXX: TODO */
         cpu_abort(cs, "VPU assist exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_SOFTP:     /* Soft patch exception                     */
         /* XXX: TODO */
         cpu_abort(cs,
                   "970 soft-patch exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_MAINT:     /* Maintenance exception                    */
         /* XXX: TODO */
         cpu_abort(cs,
                   "970 maintenance exception is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_MEXTBR:    /* Maskable external breakpoint             */
         /* XXX: TODO */
         cpu_abort(cs, "Maskable external exception "
                   "is not implemented yet !\n");
-        goto store_next;
+        break;
     case POWERPC_EXCP_NMEXTBR:   /* Non maskable external breakpoint         */
         /* XXX: TODO */
         cpu_abort(cs, "Non maskable external exception "
                   "is not implemented yet !\n");
-        goto store_next;
+        break;
     default:
     excp_invalid:
         cpu_abort(cs, "Invalid PowerPC exception %d. Aborting\n", excp);
         break;
-    store_current:
-        /* save current instruction location */
-        env->spr[srr0] = env->nip - 4;
-        break;
-    store_next:
-        /* save next instruction location */
-        env->spr[srr0] = env->nip;
-        break;
     }
+
+    /* Save PC */
+    env->spr[srr0] = env->nip;
+
     /* Save MSR */
     env->spr[srr1] = msr;
 
     /* Sanity check */
-    if (!(env->msr_mask & MSR_HVB) && (srr0 == SPR_HSRR0)) {
-        cpu_abort(cs, "Trying to deliver HV exception %d with "
-                  "no HV support\n", excp);
+    if (!(env->msr_mask & MSR_HVB)) {
+        if (new_msr & MSR_HVB) {
+            cpu_abort(cs, "Trying to deliver HV exception (MSR) %d with "
+                      "no HV support\n", excp);
+        }
+        if (srr0 == SPR_HSRR0) {
+            cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with "
+                      "no HV support\n", excp);
+        }
     }
 
     /* If any alternate SRR register are defined, duplicate saved values */
@@ -739,7 +734,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Any interrupt is context synchronizing, check if TCG TLB
      * needs a delayed flush on ppc64
      */
-    check_tlb_flush(env);
+    check_tlb_flush(env, false);
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
@@ -898,34 +893,53 @@ static void cpu_dump_rfi(target_ulong RA, target_ulong msr)
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
-void helper_raise_exception_err(CPUPPCState *env, uint32_t exception,
-                                uint32_t error_code)
+void raise_exception_err_ra(CPUPPCState *env, uint32_t exception,
+                            uint32_t error_code, uintptr_t raddr)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
 
-#if 0
-    printf("Raise exception %3x code : %d\n", exception, error_code);
-#endif
     cs->exception_index = exception;
     env->error_code = error_code;
-    cpu_loop_exit(cs);
+    cpu_loop_exit_restore(cs, raddr);
+}
+
+void raise_exception_err(CPUPPCState *env, uint32_t exception,
+                         uint32_t error_code)
+{
+    raise_exception_err_ra(env, exception, error_code, 0);
+}
+
+void raise_exception(CPUPPCState *env, uint32_t exception)
+{
+    raise_exception_err_ra(env, exception, 0, 0);
+}
+
+void raise_exception_ra(CPUPPCState *env, uint32_t exception,
+                        uintptr_t raddr)
+{
+    raise_exception_err_ra(env, exception, 0, raddr);
+}
+
+void helper_raise_exception_err(CPUPPCState *env, uint32_t exception,
+                                uint32_t error_code)
+{
+    raise_exception_err_ra(env, exception, error_code, 0);
 }
 
 void helper_raise_exception(CPUPPCState *env, uint32_t exception)
 {
-    helper_raise_exception_err(env, exception, 0);
+    raise_exception_err_ra(env, exception, 0, 0);
 }
 
 #if !defined(CONFIG_USER_ONLY)
 void helper_store_msr(CPUPPCState *env, target_ulong val)
 {
-    CPUState *cs;
+    uint32_t excp = hreg_store_msr(env, val, 0);
 
-    val = hreg_store_msr(env, val, 0);
-    if (val != 0) {
-        cs = CPU(ppc_env_get_cpu(env));
+    if (excp != 0) {
+        CPUState *cs = CPU(ppc_env_get_cpu(env));
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
-        helper_raise_exception(env, val);
+        raise_exception(env, excp);
     }
 }
 
@@ -951,7 +965,7 @@ void helper_pminsn(CPUPPCState *env, powerpc_pm_insn_t insn)
      * but this doesn't seem to be a problem.
      */
     env->msr |= (1ull << MSR_EE);
-    helper_raise_exception(env, EXCP_HLT);
+    raise_exception(env, EXCP_HLT);
 }
 #endif /* defined(TARGET_PPC64) */
 
@@ -982,7 +996,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
 
     /* Context synchronizing: check if TCG TLB needs flush */
-    check_tlb_flush(env);
+    check_tlb_flush(env, false);
 }
 
 void helper_rfi(CPUPPCState *env)
@@ -1041,8 +1055,8 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
                   ((int32_t)arg1 == (int32_t)arg2 && (flags & 0x04)) ||
                   ((uint32_t)arg1 < (uint32_t)arg2 && (flags & 0x02)) ||
                   ((uint32_t)arg1 > (uint32_t)arg2 && (flags & 0x01))))) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_TRAP);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_TRAP, GETPC());
     }
 }
 
@@ -1055,8 +1069,8 @@ void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2,
                   ((int64_t)arg1 == (int64_t)arg2 && (flags & 0x04)) ||
                   ((uint64_t)arg1 < (uint64_t)arg2 && (flags & 0x02)) ||
                   ((uint64_t)arg1 > (uint64_t)arg2 && (flags & 0x01))))) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_TRAP);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_TRAP, GETPC());
     }
 }
 #endif
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index d9795d04d0..8a389e19af 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -19,6 +19,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/exec-all.h"
 
 #define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ULL)
 #define float32_snan_to_qnan(x) ((x) | 0x00400000)
@@ -116,8 +117,8 @@ void helper_compute_fprf(CPUPPCState *env, uint64_t arg)
 }
 
 /* Floating-point invalid operations exception */
-static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op,
-                                             int set_fpcc)
+static inline __attribute__((__always_inline__))
+uint64_t float_invalid_op_excp(CPUPPCState *env, int op, int set_fpcc)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     uint64_t ret = 0;
@@ -200,14 +201,15 @@ static inline uint64_t fload_invalid_op_excp(CPUPPCState *env, int op,
         /* Update the floating-point enabled exception summary */
         env->fpscr |= 1 << FPSCR_FEX;
         if (msr_fe0 != 0 || msr_fe1 != 0) {
-            helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                       POWERPC_EXCP_FP | op);
+            /* GETPC() works here because this is inline */
+            raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                                   POWERPC_EXCP_FP | op, GETPC());
         }
     }
     return ret;
 }
 
-static inline void float_zero_divide_excp(CPUPPCState *env)
+static inline void float_zero_divide_excp(CPUPPCState *env, uintptr_t raddr)
 {
     env->fpscr |= 1 << FPSCR_ZX;
     env->fpscr &= ~((1 << FPSCR_FR) | (1 << FPSCR_FI));
@@ -217,8 +219,9 @@ static inline void float_zero_divide_excp(CPUPPCState *env)
         /* Update the floating-point enabled exception summary */
         env->fpscr |= 1 << FPSCR_FEX;
         if (msr_fe0 != 0 || msr_fe1 != 0) {
-            helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                       POWERPC_EXCP_FP | POWERPC_EXCP_FP_ZX);
+            raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                                   POWERPC_EXCP_FP | POWERPC_EXCP_FP_ZX,
+                                   raddr);
         }
     }
 }
@@ -491,13 +494,13 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask)
     helper_store_fpscr(env, arg, mask);
 }
 
-void helper_float_check_status(CPUPPCState *env)
+static void do_float_check_status(CPUPPCState *env, uintptr_t raddr)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     int status = get_float_exception_flags(&env->fp_status);
 
     if (status & float_flag_divbyzero) {
-        float_zero_divide_excp(env);
+        float_zero_divide_excp(env, raddr);
     } else if (status & float_flag_overflow) {
         float_overflow_excp(env);
     } else if (status & float_flag_underflow) {
@@ -510,12 +513,24 @@ void helper_float_check_status(CPUPPCState *env)
         (env->error_code & POWERPC_EXCP_FP)) {
         /* Differred floating-point exception after target FPR update */
         if (msr_fe0 != 0 || msr_fe1 != 0) {
-            helper_raise_exception_err(env, cs->exception_index,
-                                       env->error_code);
+            raise_exception_err_ra(env, cs->exception_index,
+                                   env->error_code, raddr);
         }
     }
 }
 
+static inline  __attribute__((__always_inline__))
+void float_check_status(CPUPPCState *env)
+{
+    /* GETPC() works here because this is inline */
+    do_float_check_status(env, GETPC());
+}
+
+void helper_float_check_status(CPUPPCState *env)
+{
+    do_float_check_status(env, GETPC());
+}
+
 void helper_reset_fpstatus(CPUPPCState *env)
 {
     set_float_exception_flags(0, &env->fp_status);
@@ -532,12 +547,12 @@ uint64_t helper_fadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
                  float64_is_neg(farg1.d) != float64_is_neg(farg2.d))) {
         /* Magnitude subtraction of infinities */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status))) {
             /* sNaN addition */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status);
     }
@@ -556,12 +571,12 @@ uint64_t helper_fsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) &&
                  float64_is_neg(farg1.d) == float64_is_neg(farg2.d))) {
         /* Magnitude subtraction of infinities */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status))) {
             /* sNaN subtraction */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_sub(farg1.d, farg2.d, &env->fp_status);
     }
@@ -580,12 +595,12 @@ uint64_t helper_fmul(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status))) {
             /* sNaN multiplication */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status);
     }
@@ -604,15 +619,15 @@ uint64_t helper_fdiv(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     if (unlikely(float64_is_infinity(farg1.d) &&
                  float64_is_infinity(farg2.d))) {
         /* Division of infinity by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, 1);
     } else if (unlikely(float64_is_zero(farg1.d) && float64_is_zero(farg2.d))) {
         /* Division of zero by zero */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status))) {
             /* sNaN division */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         farg1.d = float64_div(farg1.d, farg2.d, &env->fp_status);
     }
@@ -631,16 +646,16 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg)                   \
                                                                        \
     if (unlikely(env->fp_status.float_exception_flags)) {              \
         if (float64_is_any_nan(arg)) {                                 \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
             if (float64_is_signaling_nan(arg, &env->fp_status)) {      \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \
             }                                                          \
             farg.ll = nanval;                                          \
         } else if (env->fp_status.float_exception_flags &              \
                    float_flag_invalid) {                               \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 1);      \
         }                                                              \
-        helper_float_check_status(env);                                \
+        float_check_status(env);                                       \
     }                                                                  \
     return farg.ll;                                                    \
  }
@@ -665,7 +680,7 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg)       \
     } else {                                               \
         farg.d = cvtr(arg, &env->fp_status);               \
     }                                                      \
-    helper_float_check_status(env);                        \
+    float_check_status(env);                               \
     return farg.ll;                                        \
 }
 
@@ -683,7 +698,7 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
 
     if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
         /* sNaN round */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         farg.ll = arg | 0x0008000000000000ULL;
     } else {
         int inexact = get_float_exception_flags(&env->fp_status) &
@@ -698,7 +713,7 @@ static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
             env->fp_status.float_exception_flags &= ~float_flag_inexact;
         }
     }
-    helper_float_check_status(env);
+    float_check_status(env);
     return farg.ll;
 }
 
@@ -735,13 +750,13 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg3.d, &env->fp_status))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -753,7 +768,7 @@ uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -778,13 +793,13 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  (float64_is_zero(farg1.d) &&
                   float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg3.d, &env->fp_status))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -796,7 +811,7 @@ uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -819,13 +834,13 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
     if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) ||
                  (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg3.d, &env->fp_status))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -837,7 +852,7 @@ uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status);
@@ -864,13 +879,13 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  (float64_is_zero(farg1.d) &&
                   float64_is_infinity(farg2.d)))) {
         /* Multiplication of zero by infinity */
-        farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
+        farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1);
     } else {
         if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg3.d, &env->fp_status))) {
             /* sNaN operation */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
         }
         /* This is the way the PowerPC specification defines it */
         float128 ft0_128, ft1_128;
@@ -882,7 +897,7 @@ uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                      float64_is_infinity(farg3.d) &&
                      float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) {
             /* Magnitude subtraction of infinities */
-            farg1.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
+            farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1);
         } else {
             ft1_128 = float64_to_float128(farg3.d, &env->fp_status);
             ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status);
@@ -905,7 +920,7 @@ uint64_t helper_frsp(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
         /* sNaN square root */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     f32 = float64_to_float32(farg.d, &env->fp_status);
     farg.d = float32_to_float64(f32, &env->fp_status);
@@ -923,12 +938,12 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
     if (unlikely(float64_is_any_nan(farg.d))) {
         if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
             /* sNaN reciprocal square root */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
             farg.ll = float64_snan_to_qnan(farg.ll);
         }
     } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
         /* Square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+        farg.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
     } else {
         farg.d = float64_sqrt(farg.d, &env->fp_status);
     }
@@ -944,7 +959,7 @@ uint64_t helper_fre(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
         /* sNaN reciprocal */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     farg.d = float64_div(float64_one, farg.d, &env->fp_status);
     return farg.d;
@@ -960,7 +975,7 @@ uint64_t helper_fres(CPUPPCState *env, uint64_t arg)
 
     if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
         /* sNaN reciprocal */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
     farg.d = float64_div(float64_one, farg.d, &env->fp_status);
     f32 = float64_to_float32(farg.d, &env->fp_status);
@@ -979,12 +994,12 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
     if (unlikely(float64_is_any_nan(farg.d))) {
         if (unlikely(float64_is_signaling_nan(farg.d, &env->fp_status))) {
             /* sNaN reciprocal square root */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
             farg.ll = float64_snan_to_qnan(farg.ll);
         }
     } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) {
         /* Reciprocal square root of a negative nonzero number */
-        farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+        farg.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
     } else {
         farg.d = float64_sqrt(farg.d, &env->fp_status);
         farg.d = float64_div(float64_one, farg.d, &env->fp_status);
@@ -1103,7 +1118,7 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
                  && (float64_is_signaling_nan(farg1.d, &env->fp_status) ||
                      float64_is_signaling_nan(farg2.d, &env->fp_status)))) {
         /* sNaN comparison */
-        fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
     }
 }
 
@@ -1134,11 +1149,11 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
         if (float64_is_signaling_nan(farg1.d, &env->fp_status) ||
             float64_is_signaling_nan(farg2.d, &env->fp_status)) {
             /* sNaN comparison */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN |
                                   POWERPC_EXCP_FP_VXVC, 1);
         } else {
             /* qNaN comparison */
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 1);
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 1);
         }
     }
 }
@@ -1838,10 +1853,10 @@ void helper_##name(CPUPPCState *env, uint32_t opcode)                        \
                                                                              \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
             if (tp##_is_infinity(xa.fld) && tp##_is_infinity(xb.fld)) {      \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);    \
             } else if (tp##_is_signaling_nan(xa.fld, &tstat) ||              \
                        tp##_is_signaling_nan(xb.fld, &tstat)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
             }                                                                \
         }                                                                    \
                                                                              \
@@ -1854,7 +1869,7 @@ void helper_##name(CPUPPCState *env, uint32_t opcode)                        \
         }                                                                    \
     }                                                                        \
     putVSR(xT(opcode), &xt, env);                                            \
-    helper_float_check_status(env);                                          \
+    float_check_status(env);                                                 \
 }
 
 VSX_ADD_SUB(xsadddp, add, 1, float64, VsrD(0), 1, 0)
@@ -1893,10 +1908,10 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
             if ((tp##_is_infinity(xa.fld) && tp##_is_zero(xb.fld)) ||        \
                 (tp##_is_infinity(xb.fld) && tp##_is_zero(xa.fld))) {        \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, sfprf);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, sfprf);    \
             } else if (tp##_is_signaling_nan(xa.fld, &tstat) ||              \
                        tp##_is_signaling_nan(xb.fld, &tstat)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
             }                                                                \
         }                                                                    \
                                                                              \
@@ -1910,7 +1925,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
     }                                                                        \
                                                                              \
     putVSR(xT(opcode), &xt, env);                                            \
-    helper_float_check_status(env);                                          \
+    float_check_status(env);                                                 \
 }
 
 VSX_MUL(xsmuldp, 1, float64, VsrD(0), 1, 0)
@@ -1944,13 +1959,13 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
                                                                               \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {     \
             if (tp##_is_infinity(xa.fld) && tp##_is_infinity(xb.fld)) {       \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, sfprf);     \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIDI, sfprf);     \
             } else if (tp##_is_zero(xa.fld) &&                                \
                 tp##_is_zero(xb.fld)) {                                       \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, sfprf);     \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXZDZ, sfprf);     \
             } else if (tp##_is_signaling_nan(xa.fld, &tstat) ||               \
                 tp##_is_signaling_nan(xb.fld, &tstat)) {                      \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
             }                                                                 \
         }                                                                     \
                                                                               \
@@ -1964,7 +1979,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
     }                                                                         \
                                                                               \
     putVSR(xT(opcode), &xt, env);                                             \
-    helper_float_check_status(env);                                           \
+    float_check_status(env);                                                  \
 }
 
 VSX_DIV(xsdivdp, 1, float64, VsrD(0), 1, 0)
@@ -1991,7 +2006,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
                                                                               \
     for (i = 0; i < nels; i++) {                                              \
         if (unlikely(tp##_is_signaling_nan(xb.fld, &env->fp_status))) {       \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
         }                                                                     \
         xt.fld = tp##_div(tp##_one, xb.fld, &env->fp_status);                 \
                                                                               \
@@ -2005,7 +2020,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
     }                                                                         \
                                                                               \
     putVSR(xT(opcode), &xt, env);                                             \
-    helper_float_check_status(env);                                           \
+    float_check_status(env);                                                  \
 }
 
 VSX_RE(xsredp, 1, float64, VsrD(0), 1, 0)
@@ -2038,9 +2053,9 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
                                                                              \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
             if (tp##_is_neg(xb.fld) && !tp##_is_zero(xb.fld)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
             } else if (tp##_is_signaling_nan(xb.fld, &tstat)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
             }                                                                \
         }                                                                    \
                                                                              \
@@ -2054,7 +2069,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
     }                                                                        \
                                                                              \
     putVSR(xT(opcode), &xt, env);                                            \
-    helper_float_check_status(env);                                          \
+    float_check_status(env);                                                 \
 }
 
 VSX_SQRT(xssqrtdp, 1, float64, VsrD(0), 1, 0)
@@ -2088,9 +2103,9 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
                                                                              \
         if (unlikely(tstat.float_exception_flags & float_flag_invalid)) {    \
             if (tp##_is_neg(xb.fld) && !tp##_is_zero(xb.fld)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, sfprf);   \
             } else if (tp##_is_signaling_nan(xb.fld, &tstat)) {              \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);   \
             }                                                                \
         }                                                                    \
                                                                              \
@@ -2104,7 +2119,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
     }                                                                        \
                                                                              \
     putVSR(xT(opcode), &xt, env);                                            \
-    helper_float_check_status(env);                                          \
+    float_check_status(env);                                                 \
 }
 
 VSX_RSQRTE(xsrsqrtedp, 1, float64, VsrD(0), 1, 0)
@@ -2277,12 +2292,12 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
             if (tp##_is_signaling_nan(xa.fld, &tstat) ||                      \
                 tp##_is_signaling_nan(b->fld, &tstat) ||                      \
                 tp##_is_signaling_nan(c->fld, &tstat)) {                      \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, sfprf);    \
                 tstat.float_exception_flags &= ~float_flag_invalid;           \
             }                                                                 \
             if ((tp##_is_infinity(xa.fld) && tp##_is_zero(b->fld)) ||         \
                 (tp##_is_zero(xa.fld) && tp##_is_infinity(b->fld))) {         \
-                xt_out.fld = float64_to_##tp(fload_invalid_op_excp(env,       \
+                xt_out.fld = float64_to_##tp(float_invalid_op_excp(env,       \
                     POWERPC_EXCP_FP_VXIMZ, sfprf), &env->fp_status);          \
                 tstat.float_exception_flags &= ~float_flag_invalid;           \
             }                                                                 \
@@ -2290,7 +2305,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
                 ((tp##_is_infinity(xa.fld) ||                                 \
                   tp##_is_infinity(b->fld)) &&                                \
                   tp##_is_infinity(c->fld))) {                                \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);     \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, sfprf);     \
             }                                                                 \
         }                                                                     \
                                                                               \
@@ -2303,7 +2318,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
         }                                                                     \
     }                                                                         \
     putVSR(xT(opcode), &xt_out, env);                                         \
-    helper_float_check_status(env);                                           \
+    float_check_status(env);                                                  \
 }
 
 #define MADD_FLGS 0
@@ -2347,6 +2362,58 @@ VSX_MADD(xvnmaddmsp, 4, float32, VsrW(i), NMADD_FLGS, 0, 0, 0)
 VSX_MADD(xvnmsubasp, 4, float32, VsrW(i), NMSUB_FLGS, 1, 0, 0)
 VSX_MADD(xvnmsubmsp, 4, float32, VsrW(i), NMSUB_FLGS, 0, 0, 0)
 
+/* VSX_SCALAR_CMP_DP - VSX scalar floating point compare double precision
+ *   op    - instruction mnemonic
+ *   cmp   - comparison operation
+ *   exp   - expected result of comparison
+ *   svxvc - set VXVC bit
+ */
+#define VSX_SCALAR_CMP_DP(op, cmp, exp, svxvc)                                \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    bool vxsnan_flag = false, vxvc_flag = false, vex_flag = false;            \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+                                                                              \
+    if (float64_is_signaling_nan(xa.VsrD(0), &env->fp_status) ||              \
+        float64_is_signaling_nan(xb.VsrD(0), &env->fp_status)) {              \
+        vxsnan_flag = true;                                                   \
+        if (fpscr_ve == 0 && svxvc) {                                         \
+            vxvc_flag = true;                                                 \
+        }                                                                     \
+    } else if (svxvc) {                                                       \
+        vxvc_flag = float64_is_quiet_nan(xa.VsrD(0), &env->fp_status) ||      \
+            float64_is_quiet_nan(xb.VsrD(0), &env->fp_status);                \
+    }                                                                         \
+    if (vxsnan_flag) {                                                        \
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);                \
+    }                                                                         \
+    if (vxvc_flag) {                                                          \
+        float_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);                  \
+    }                                                                         \
+    vex_flag = fpscr_ve && (vxvc_flag || vxsnan_flag);                        \
+                                                                              \
+    if (!vex_flag) {                                                          \
+        if (float64_##cmp(xb.VsrD(0), xa.VsrD(0), &env->fp_status) == exp) {  \
+            xt.VsrD(0) = -1;                                                  \
+            xt.VsrD(1) = 0;                                                   \
+        } else {                                                              \
+            xt.VsrD(0) = 0;                                                   \
+            xt.VsrD(1) = 0;                                                   \
+        }                                                                     \
+    }                                                                         \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_SCALAR_CMP_DP(xscmpeqdp, eq, 1, 0)
+VSX_SCALAR_CMP_DP(xscmpgedp, le, 1, 1)
+VSX_SCALAR_CMP_DP(xscmpgtdp, lt, 1, 1)
+VSX_SCALAR_CMP_DP(xscmpnedp, eq, 0, 0)
+
 #define VSX_SCALAR_CMP(op, ordered)                                      \
 void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
 {                                                                        \
@@ -2360,10 +2427,10 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
                  float64_is_any_nan(xb.VsrD(0)))) {                      \
         if (float64_is_signaling_nan(xa.VsrD(0), &env->fp_status) ||     \
             float64_is_signaling_nan(xb.VsrD(0), &env->fp_status)) {     \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
         }                                                                \
         if (ordered) {                                                   \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);         \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);         \
         }                                                                \
         cc = 1;                                                          \
     } else {                                                             \
@@ -2381,7 +2448,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \
     env->fpscr |= cc << FPSCR_FPRF;                                      \
     env->crf[BF(opcode)] = cc;                                           \
                                                                          \
-    helper_float_check_status(env);                                      \
+    float_check_status(env);                                             \
 }
 
 VSX_SCALAR_CMP(xscmpodp, 1)
@@ -2408,12 +2475,12 @@ void helper_##name(CPUPPCState *env, uint32_t opcode)                         \
         xt.fld = tp##_##op(xa.fld, xb.fld, &env->fp_status);                  \
         if (unlikely(tp##_is_signaling_nan(xa.fld, &env->fp_status) ||        \
                      tp##_is_signaling_nan(xb.fld, &env->fp_status))) {       \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);            \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);            \
         }                                                                     \
     }                                                                         \
                                                                               \
     putVSR(xT(opcode), &xt, env);                                             \
-    helper_float_check_status(env);                                           \
+    float_check_status(env);                                                  \
 }
 
 VSX_MAX_MIN(xsmaxdp, maxnum, 1, float64, VsrD(0))
@@ -2430,8 +2497,9 @@ VSX_MAX_MIN(xvminsp, minnum, 4, float32, VsrW(i))
  *   fld   - vsr_t field (VsrD(*) or VsrW(*))
  *   cmp   - comparison operation
  *   svxvc - set VXVC bit
+ *   exp   - expected result of comparison
  */
-#define VSX_CMP(op, nels, tp, fld, cmp, svxvc)                            \
+#define VSX_CMP(op, nels, tp, fld, cmp, svxvc, exp)                       \
 void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
 {                                                                         \
     ppc_vsr_t xt, xa, xb;                                                 \
@@ -2448,15 +2516,15 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
                      tp##_is_any_nan(xb.fld))) {                          \
             if (tp##_is_signaling_nan(xa.fld, &env->fp_status) ||         \
                 tp##_is_signaling_nan(xb.fld, &env->fp_status)) {         \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);    \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);    \
             }                                                             \
             if (svxvc) {                                                  \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);      \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXVC, 0);      \
             }                                                             \
             xt.fld = 0;                                                   \
             all_true = 0;                                                 \
         } else {                                                          \
-            if (tp##_##cmp(xb.fld, xa.fld, &env->fp_status) == 1) {       \
+            if (tp##_##cmp(xb.fld, xa.fld, &env->fp_status) == exp) {     \
                 xt.fld = -1;                                              \
                 all_false = 0;                                            \
             } else {                                                      \
@@ -2470,15 +2538,17 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                       \
     if ((opcode >> (31-21)) & 1) {                                        \
         env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0);       \
     }                                                                     \
-    helper_float_check_status(env);                                       \
+    float_check_status(env);                                              \
  }
 
-VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0)
-VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1)
-VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1)
-VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0)
-VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1)
-VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1)
+VSX_CMP(xvcmpeqdp, 2, float64, VsrD(i), eq, 0, 1)
+VSX_CMP(xvcmpgedp, 2, float64, VsrD(i), le, 1, 1)
+VSX_CMP(xvcmpgtdp, 2, float64, VsrD(i), lt, 1, 1)
+VSX_CMP(xvcmpnedp, 2, float64, VsrD(i), eq, 0, 0)
+VSX_CMP(xvcmpeqsp, 4, float32, VsrW(i), eq, 0, 1)
+VSX_CMP(xvcmpgesp, 4, float32, VsrW(i), le, 1, 1)
+VSX_CMP(xvcmpgtsp, 4, float32, VsrW(i), lt, 1, 1)
+VSX_CMP(xvcmpnesp, 4, float32, VsrW(i), eq, 0, 0)
 
 /* VSX_CVT_FP_TO_FP - VSX floating point/floating point conversion
  *   op    - instruction mnemonic
@@ -2502,7 +2572,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                \
         xt.tfld = stp##_to_##ttp(xb.sfld, &env->fp_status);        \
         if (unlikely(stp##_is_signaling_nan(xb.sfld,               \
                                             &env->fp_status))) {   \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0); \
             xt.tfld = ttp##_snan_to_qnan(xt.tfld);                 \
         }                                                          \
         if (sfprf) {                                               \
@@ -2512,7 +2582,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                \
     }                                                              \
                                                                    \
     putVSR(xT(opcode), &xt, env);                                  \
-    helper_float_check_status(env);                                \
+    float_check_status(env);                                       \
 }
 
 VSX_CVT_FP_TO_FP(xscvdpsp, 1, float64, float32, VsrD(0), VsrW(0), 1)
@@ -2557,21 +2627,21 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                          \
     for (i = 0; i < nels; i++) {                                             \
         if (unlikely(stp##_is_any_nan(xb.sfld))) {                           \
             if (stp##_is_signaling_nan(xb.sfld, &env->fp_status)) {          \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);       \
             }                                                                \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);            \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);            \
             xt.tfld = rnan;                                                  \
         } else {                                                             \
             xt.tfld = stp##_to_##ttp##_round_to_zero(xb.sfld,                \
                           &env->fp_status);                                  \
             if (env->fp_status.float_exception_flags & float_flag_invalid) { \
-                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);        \
+                float_invalid_op_excp(env, POWERPC_EXCP_FP_VXCVI, 0);        \
             }                                                                \
         }                                                                    \
     }                                                                        \
                                                                              \
     putVSR(xT(opcode), &xt, env);                                            \
-    helper_float_check_status(env);                                          \
+    float_check_status(env);                                                 \
 }
 
 VSX_CVT_FP_TO_INT(xscvdpsxds, 1, float64, int64, VsrD(0), VsrD(0), \
@@ -2622,7 +2692,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                     \
     }                                                                   \
                                                                         \
     putVSR(xT(opcode), &xt, env);                                       \
-    helper_float_check_status(env);                                     \
+    float_check_status(env);                                            \
 }
 
 VSX_CVT_INT_TO_FP(xscvsxddp, 1, int64, float64, VsrD(0), VsrD(0), 1, 0)
@@ -2667,7 +2737,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                    \
     for (i = 0; i < nels; i++) {                                       \
         if (unlikely(tp##_is_signaling_nan(xb.fld,                     \
                                            &env->fp_status))) {        \
-            fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);     \
+            float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);     \
             xt.fld = tp##_snan_to_qnan(xb.fld);                        \
         } else {                                                       \
             xt.fld = tp##_round_to_int(xb.fld, &env->fp_status);       \
@@ -2686,7 +2756,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                    \
     }                                                                  \
                                                                        \
     putVSR(xT(opcode), &xt, env);                                      \
-    helper_float_check_status(env);                                    \
+    float_check_status(env);                                           \
 }
 
 VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1)
@@ -2714,6 +2784,6 @@ uint64_t helper_xsrsp(CPUPPCState *env, uint64_t xb)
     uint64_t xt = helper_frsp(env, xb);
 
     helper_compute_fprf(env, xt);
-    helper_float_check_status(env);
+    float_check_status(env);
     return xt;
 }
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 1f5cfd0990..da00f0ab49 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -1,8 +1,8 @@
-DEF_HELPER_3(raise_exception_err, void, env, i32, i32)
-DEF_HELPER_2(raise_exception, void, env, i32)
-DEF_HELPER_4(tw, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, void, env, i32, i32)
+DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, void, env, i32)
+DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
-DEF_HELPER_4(td, void, env, tl, tl, i32)
+DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
@@ -18,16 +18,17 @@ DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 #endif
-DEF_HELPER_1(check_tlb_flush, void, env)
+DEF_HELPER_1(check_tlb_flush_local, void, env)
+DEF_HELPER_1(check_tlb_flush_global, void, env)
 #endif
 
 DEF_HELPER_3(lmw, void, env, tl, i32)
-DEF_HELPER_3(stmw, void, env, tl, i32)
+DEF_HELPER_FLAGS_3(stmw, TCG_CALL_NO_WG, void, env, tl, i32)
 DEF_HELPER_4(lsw, void, env, tl, i32, i32)
 DEF_HELPER_5(lswx, void, env, tl, i32, i32, i32)
-DEF_HELPER_4(stsw, void, env, tl, i32, i32)
-DEF_HELPER_3(dcbz, void, env, tl, i32)
-DEF_HELPER_2(icbi, void, env, tl)
+DEF_HELPER_FLAGS_4(stsw, TCG_CALL_NO_WG, void, env, tl, i32, i32)
+DEF_HELPER_FLAGS_3(dcbz, TCG_CALL_NO_WG, void, env, tl, i32)
+DEF_HELPER_FLAGS_2(icbi, TCG_CALL_NO_WG, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
@@ -38,15 +39,20 @@ DEF_HELPER_4(divweu, tl, env, tl, tl, i32)
 DEF_HELPER_4(divwe, tl, env, tl, tl, i32)
 
 DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(cnttzw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
+DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl)
 DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(cnttzd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(srad, tl, env, tl, tl)
+DEF_HELPER_0(darn32, tl)
+DEF_HELPER_0(darn64, tl)
 #endif
 
 DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
@@ -115,6 +121,9 @@ DEF_HELPER_3(vsubudm, void, avr, avr, avr)
 DEF_HELPER_3(vavgub, void, avr, avr, avr)
 DEF_HELPER_3(vavguh, void, avr, avr, avr)
 DEF_HELPER_3(vavguw, void, avr, avr, avr)
+DEF_HELPER_3(vabsdub, void, avr, avr, avr)
+DEF_HELPER_3(vabsduh, void, avr, avr, avr)
+DEF_HELPER_3(vabsduw, void, avr, avr, avr)
 DEF_HELPER_3(vavgsb, void, avr, avr, avr)
 DEF_HELPER_3(vavgsh, void, avr, avr, avr)
 DEF_HELPER_3(vavgsw, void, avr, avr, avr)
@@ -138,6 +147,12 @@ DEF_HELPER_4(vcmpequb, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequh, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequw, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequd, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneb, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneh, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnew, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezb, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezh, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezw, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtub, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuh, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuw, void, env, avr, avr, avr)
@@ -154,6 +169,12 @@ DEF_HELPER_4(vcmpequb_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequh_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequw_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpequd_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneb_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpneh_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnew_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezb_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezh_dot, void, env, avr, avr, avr)
+DEF_HELPER_4(vcmpnezw_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtub_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuh_dot, void, env, avr, avr, avr)
 DEF_HELPER_4(vcmpgtuw_dot, void, env, avr, avr, avr)
@@ -199,7 +220,12 @@ DEF_HELPER_3(vslw, void, avr, avr, avr)
 DEF_HELPER_3(vsld, void, avr, avr, avr)
 DEF_HELPER_3(vslo, void, avr, avr, avr)
 DEF_HELPER_3(vsro, void, avr, avr, avr)
+DEF_HELPER_3(vsrv, void, avr, avr, avr)
+DEF_HELPER_3(vslv, void, avr, avr, avr)
 DEF_HELPER_3(vaddcuw, void, avr, avr, avr)
+DEF_HELPER_2(vprtybw, void, avr, avr)
+DEF_HELPER_2(vprtybd, void, avr, avr)
+DEF_HELPER_2(vprtybq, void, avr, avr)
 DEF_HELPER_3(vsubcuw, void, avr, avr, avr)
 DEF_HELPER_2(lvsl, void, avr, tl)
 DEF_HELPER_2(lvsr, void, avr, tl)
@@ -236,6 +262,21 @@ DEF_HELPER_2(vspltisw, void, avr, i32)
 DEF_HELPER_3(vspltb, void, avr, avr, i32)
 DEF_HELPER_3(vsplth, void, avr, avr, i32)
 DEF_HELPER_3(vspltw, void, avr, avr, i32)
+DEF_HELPER_3(vextractub, void, avr, avr, i32)
+DEF_HELPER_3(vextractuh, void, avr, avr, i32)
+DEF_HELPER_3(vextractuw, void, avr, avr, i32)
+DEF_HELPER_3(vextractd, void, avr, avr, i32)
+DEF_HELPER_3(vinsertb, void, avr, avr, i32)
+DEF_HELPER_3(vinserth, void, avr, avr, i32)
+DEF_HELPER_3(vinsertw, void, avr, avr, i32)
+DEF_HELPER_3(vinsertd, void, avr, avr, i32)
+DEF_HELPER_2(vextsb2w, void, avr, avr)
+DEF_HELPER_2(vextsh2w, void, avr, avr)
+DEF_HELPER_2(vextsb2d, void, avr, avr)
+DEF_HELPER_2(vextsh2d, void, avr, avr)
+DEF_HELPER_2(vextsw2d, void, avr, avr)
+DEF_HELPER_2(vnegw, void, avr, avr)
+DEF_HELPER_2(vnegd, void, avr, avr)
 DEF_HELPER_2(vupkhpx, void, avr, avr)
 DEF_HELPER_2(vupklpx, void, avr, avr)
 DEF_HELPER_2(vupkhsb, void, avr, avr)
@@ -248,6 +289,7 @@ DEF_HELPER_5(vmsumubm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vmsummbm, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vsel, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vperm, void, env, avr, avr, avr, avr)
+DEF_HELPER_5(vpermr, void, env, avr, avr, avr, avr)
 DEF_HELPER_4(vpkshss, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkshus, void, env, avr, avr, avr)
 DEF_HELPER_4(vpkswss, void, env, avr, avr, avr)
@@ -286,6 +328,10 @@ DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr)
 DEF_HELPER_4(vminfp, void, env, avr, avr, avr)
 DEF_HELPER_3(vrefp, void, env, avr, avr)
 DEF_HELPER_3(vrsqrtefp, void, env, avr, avr)
+DEF_HELPER_3(vrlwmi, void, avr, avr, avr)
+DEF_HELPER_3(vrldmi, void, avr, avr, avr)
+DEF_HELPER_3(vrldnm, void, avr, avr, avr)
+DEF_HELPER_3(vrlwnm, void, avr, avr, avr)
 DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_3(vexptefp, void, env, avr, avr)
@@ -303,10 +349,17 @@ DEF_HELPER_2(vclzb, void, avr, avr)
 DEF_HELPER_2(vclzh, void, avr, avr)
 DEF_HELPER_2(vclzw, void, avr, avr)
 DEF_HELPER_2(vclzd, void, avr, avr)
+DEF_HELPER_2(vctzb, void, avr, avr)
+DEF_HELPER_2(vctzh, void, avr, avr)
+DEF_HELPER_2(vctzw, void, avr, avr)
+DEF_HELPER_2(vctzd, void, avr, avr)
 DEF_HELPER_2(vpopcntb, void, avr, avr)
 DEF_HELPER_2(vpopcnth, void, avr, avr)
 DEF_HELPER_2(vpopcntw, void, avr, avr)
 DEF_HELPER_2(vpopcntd, void, avr, avr)
+DEF_HELPER_1(vclzlsbb, tl, avr)
+DEF_HELPER_1(vctzlsbb, tl, avr)
+DEF_HELPER_3(vbpermd, void, avr, avr, avr)
 DEF_HELPER_3(vbpermq, void, avr, avr, avr)
 DEF_HELPER_2(vgbbd, void, avr, avr)
 DEF_HELPER_3(vpmsumb, void, avr, avr, avr)
@@ -325,6 +378,10 @@ DEF_HELPER_4(vpermxor, void, avr, avr, avr, avr)
 
 DEF_HELPER_4(bcdadd, i32, avr, avr, avr, i32)
 DEF_HELPER_4(bcdsub, i32, avr, avr, avr, i32)
+DEF_HELPER_3(bcdcfn, i32, avr, avr, i32)
+DEF_HELPER_3(bcdctn, i32, avr, avr, i32)
+DEF_HELPER_3(bcdcfz, i32, avr, avr, i32)
+DEF_HELPER_3(bcdctz, i32, avr, avr, i32)
 
 DEF_HELPER_2(xsadddp, void, env, i32)
 DEF_HELPER_2(xssubdp, void, env, i32)
@@ -343,6 +400,10 @@ DEF_HELPER_2(xsnmaddadp, void, env, i32)
 DEF_HELPER_2(xsnmaddmdp, void, env, i32)
 DEF_HELPER_2(xsnmsubadp, void, env, i32)
 DEF_HELPER_2(xsnmsubmdp, void, env, i32)
+DEF_HELPER_2(xscmpeqdp, void, env, i32)
+DEF_HELPER_2(xscmpgtdp, void, env, i32)
+DEF_HELPER_2(xscmpgedp, void, env, i32)
+DEF_HELPER_2(xscmpnedp, void, env, i32)
 DEF_HELPER_2(xscmpodp, void, env, i32)
 DEF_HELPER_2(xscmpudp, void, env, i32)
 DEF_HELPER_2(xsmaxdp, void, env, i32)
@@ -404,6 +465,7 @@ DEF_HELPER_2(xvmindp, void, env, i32)
 DEF_HELPER_2(xvcmpeqdp, void, env, i32)
 DEF_HELPER_2(xvcmpgedp, void, env, i32)
 DEF_HELPER_2(xvcmpgtdp, void, env, i32)
+DEF_HELPER_2(xvcmpnedp, void, env, i32)
 DEF_HELPER_2(xvcvdpsp, void, env, i32)
 DEF_HELPER_2(xvcvdpsxds, void, env, i32)
 DEF_HELPER_2(xvcvdpsxws, void, env, i32)
@@ -441,6 +503,7 @@ DEF_HELPER_2(xvminsp, void, env, i32)
 DEF_HELPER_2(xvcmpeqsp, void, env, i32)
 DEF_HELPER_2(xvcmpgesp, void, env, i32)
 DEF_HELPER_2(xvcmpgtsp, void, env, i32)
+DEF_HELPER_2(xvcmpnesp, void, env, i32)
 DEF_HELPER_2(xvcvspdp, void, env, i32)
 DEF_HELPER_2(xvcvspsxds, void, env, i32)
 DEF_HELPER_2(xvcvspsxws, void, env, i32)
@@ -581,35 +644,35 @@ DEF_HELPER_2(load_dump_spr, void, env, i32)
 DEF_HELPER_2(store_dump_spr, void, env, i32)
 DEF_HELPER_4(fscr_facility_check, void, env, i32, i32, i32)
 DEF_HELPER_4(msr_facility_check, void, env, i32, i32, i32)
-DEF_HELPER_1(load_tbl, tl, env)
-DEF_HELPER_1(load_tbu, tl, env)
-DEF_HELPER_1(load_atbl, tl, env)
-DEF_HELPER_1(load_atbu, tl, env)
-DEF_HELPER_1(load_601_rtcl, tl, env)
-DEF_HELPER_1(load_601_rtcu, tl, env)
+DEF_HELPER_FLAGS_1(load_tbl, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_1(load_tbu, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_1(load_atbl, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_1(load_atbu, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_1(load_601_rtcl, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_1(load_601_rtcu, TCG_CALL_NO_RWG, tl, env)
 #if !defined(CONFIG_USER_ONLY)
 #if defined(TARGET_PPC64)
-DEF_HELPER_1(load_purr, tl, env)
+DEF_HELPER_FLAGS_1(load_purr, TCG_CALL_NO_RWG, tl, env)
 #endif
 DEF_HELPER_2(store_sdr1, void, env, tl)
-DEF_HELPER_2(store_tbl, void, env, tl)
-DEF_HELPER_2(store_tbu, void, env, tl)
-DEF_HELPER_2(store_atbl, void, env, tl)
-DEF_HELPER_2(store_atbu, void, env, tl)
-DEF_HELPER_2(store_601_rtcl, void, env, tl)
-DEF_HELPER_2(store_601_rtcu, void, env, tl)
-DEF_HELPER_1(load_decr, tl, env)
-DEF_HELPER_2(store_decr, void, env, tl)
-DEF_HELPER_1(load_hdecr, tl, env)
-DEF_HELPER_2(store_hdecr, void, env, tl)
+DEF_HELPER_FLAGS_2(store_tbl, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_tbu, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_atbl, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_atbu, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_601_rtcl, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_601_rtcu, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_1(load_decr, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_2(store_decr, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_1(load_hdecr, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_2(store_hdecr, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_2(store_hid0_601, void, env, tl)
 DEF_HELPER_3(store_403_pbr, void, env, i32, tl)
-DEF_HELPER_1(load_40x_pit, tl, env)
-DEF_HELPER_2(store_40x_pit, void, env, tl)
+DEF_HELPER_FLAGS_1(load_40x_pit, TCG_CALL_NO_RWG, tl, env)
+DEF_HELPER_FLAGS_2(store_40x_pit, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_2(store_40x_dbcr0, void, env, tl)
 DEF_HELPER_2(store_40x_sler, void, env, tl)
-DEF_HELPER_2(store_booke_tcr, void, env, tl)
-DEF_HELPER_2(store_booke_tsr, void, env, tl)
+DEF_HELPER_FLAGS_2(store_booke_tcr, TCG_CALL_NO_RWG, void, env, tl)
+DEF_HELPER_FLAGS_2(store_booke_tsr, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_3(store_ibatl, void, env, i32, tl)
 DEF_HELPER_3(store_ibatu, void, env, i32, tl)
 DEF_HELPER_3(store_dbatl, void, env, i32, tl)
@@ -642,6 +705,8 @@ DEF_HELPER_3(dtstex, i32, env, fprp, fprp)
 DEF_HELPER_3(dtstexq, i32, env, fprp, fprp)
 DEF_HELPER_3(dtstsf, i32, env, fprp, fprp)
 DEF_HELPER_3(dtstsfq, i32, env, fprp, fprp)
+DEF_HELPER_3(dtstsfi, i32, env, i32, fprp)
+DEF_HELPER_3(dtstsfiq, i32, env, i32, fprp)
 DEF_HELPER_5(dquai, void, env, fprp, fprp, i32, i32)
 DEF_HELPER_5(dquaiq, void, env, fprp, fprp, i32, i32)
 DEF_HELPER_5(dqua, void, env, fprp, fprp, fprp, i32)
@@ -674,4 +739,4 @@ DEF_HELPER_4(dscli, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
 
 DEF_HELPER_1(tbegin, void, env)
-DEF_HELPER_1(fixup_thrm, void, env)
+DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 3d279f1d8a..62138163a5 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -131,11 +131,14 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     /* If PR=1 then EE, IR and DR must be 1
      *
-     * Note: We only enforce this on 64-bit processors. It appears that
-     * 32-bit implementations supports PR=1 and EE/DR/IR=0 and MacOS
-     * exploits it.
+     * Note: We only enforce this on 64-bit server processors.
+     * It appears that:
+     * - 32-bit implementations supports PR=1 and EE/DR/IR=0 and MacOS
+     *   exploits it.
+     * - 64-bit embedded implementations do not need any operation to be
+     *   performed when PR is set.
      */
-    if ((env->insns_flags & PPC_64B) && ((value >> MSR_PR) & 1)) {
+    if ((env->insns_flags & PPC_SEGMENT_64B) && ((value >> MSR_PR) & 1)) {
         value |= (1 << MSR_EE) | (1 << MSR_DR) | (1 << MSR_IR);
     }
 #endif
@@ -154,16 +157,33 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static inline void check_tlb_flush(CPUPPCState *env)
+static inline void check_tlb_flush(CPUPPCState *env, bool global)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
-    if (env->tlb_need_flush) {
-        env->tlb_need_flush = 0;
+    if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) {
         tlb_flush(cs, 1);
+        env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
+    }
+
+    /* Propagate TLB invalidations to other CPUs when the guest uses broadcast
+     * TLB invalidation instructions.
+     */
+    if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) {
+        CPUState *other_cs;
+        CPU_FOREACH(other_cs) {
+            if (other_cs != cs) {
+                PowerPCCPU *cpu = POWERPC_CPU(other_cs);
+                CPUPPCState *other_env = &cpu->env;
+
+                other_env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH;
+                tlb_flush(other_cs, 1);
+            }
+        }
+        env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH;
     }
 }
 #else
-static inline void check_tlb_flush(CPUPPCState *env) { }
+static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
 #endif
 
 #endif /* HELPER_REGS_H */
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 74453763d6..2d57c9a1c2 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -18,6 +18,7 @@
  */
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "internal.h"
 #include "exec/exec-all.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
@@ -145,11 +146,59 @@ target_ulong helper_cntlzw(target_ulong t)
     return clz32(t);
 }
 
+target_ulong helper_cnttzw(target_ulong t)
+{
+    return ctz32(t);
+}
+
 #if defined(TARGET_PPC64)
+/* if x = 0xab, returns 0xababababababababa */
+#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
+
+/* substract 1 from each byte, and with inverse, check if MSB is set at each
+ * byte.
+ * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
+ *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
+ */
+#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
+
+/* When you XOR the pattern and there is a match, that byte will be zero */
+#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
+
+uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
+{
+    return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
+}
+
+#undef pattern
+#undef haszero
+#undef hasvalue
+
 target_ulong helper_cntlzd(target_ulong t)
 {
     return clz64(t);
 }
+
+target_ulong helper_cnttzd(target_ulong t)
+{
+    return ctz64(t);
+}
+
+/* Return invalid random number.
+ *
+ * FIXME: Add rng backend or other mechanism to get cryptographically suitable
+ * random number
+ */
+target_ulong helper_darn32(void)
+{
+    return -1;
+}
+
+target_ulong helper_darn64(void)
+{
+    return -1;
+}
+
 #endif
 
 #if defined(TARGET_PPC64)
@@ -479,6 +528,40 @@ void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
     }
 }
 
+/* vprtybw */
+void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
+{
+    int i;
+    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
+        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
+        res ^= res >> 8;
+        r->u32[i] = res & 1;
+    }
+}
+
+/* vprtybd */
+void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
+{
+    int i;
+    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
+        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
+        res ^= res >> 16;
+        res ^= res >> 8;
+        r->u64[i] = res & 1;
+    }
+}
+
+/* vprtybq */
+void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
+{
+    uint64_t res = b->u64[0] ^ b->u64[1];
+    res ^= res >> 32;
+    res ^= res >> 16;
+    res ^= res >> 8;
+    r->u64[LO_IDX] = res & 1;
+    r->u64[HI_IDX] = 0;
+}
+
 #define VARITH_DO(name, op, element)                                    \
     void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
     {                                                                   \
@@ -597,6 +680,30 @@ VAVG(w, s32, int64_t, u32, uint64_t)
 #undef VAVG_DO
 #undef VAVG
 
+#define VABSDU_DO(name, element)                                        \
+void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
+{                                                                       \
+    int i;                                                              \
+                                                                        \
+    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
+        r->element[i] = (a->element[i] > b->element[i]) ?               \
+            (a->element[i] - b->element[i]) :                           \
+            (b->element[i] - a->element[i]);                            \
+    }                                                                   \
+}
+
+/* VABSDU - Vector absolute difference unsigned
+ *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
+ *   element - element type to access from vector
+ */
+#define VABSDU(type, element)                   \
+    VABSDU_DO(absdu##type, element)
+VABSDU(b, u8)
+VABSDU(h, u16)
+VABSDU(w, u32)
+#undef VABSDU_DO
+#undef VABSDU
+
 #define VCF(suffix, cvt, element)                                       \
     void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
                             ppc_avr_t *b, uint32_t uim)                 \
@@ -663,6 +770,49 @@ VCMP(gtsd, >, s64)
 #undef VCMP_DO
 #undef VCMP
 
+#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
+void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
+                            ppc_avr_t *a, ppc_avr_t *b)                 \
+{                                                                       \
+    etype ones = (etype)-1;                                             \
+    etype all = ones;                                                   \
+    etype result, none = 0;                                             \
+    int i;                                                              \
+                                                                        \
+    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
+        if (cmpzero) {                                                  \
+            result = ((a->element[i] == 0)                              \
+                           || (b->element[i] == 0)                      \
+                           || (a->element[i] != b->element[i]) ?        \
+                           ones : 0x0);                                 \
+        } else {                                                        \
+            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
+        }                                                               \
+        r->element[i] = result;                                         \
+        all &= result;                                                  \
+        none |= result;                                                 \
+    }                                                                   \
+    if (record) {                                                       \
+        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
+    }                                                                   \
+}
+
+/* VCMPNEZ - Vector compare not equal to zero
+ *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
+ *   element - element type to access from vector
+ */
+#define VCMPNE(suffix, element, etype, cmpzero)         \
+    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
+    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
+VCMPNE(zb, u8, uint8_t, 1)
+VCMPNE(zh, u16, uint16_t, 1)
+VCMPNE(zw, u32, uint32_t, 1)
+VCMPNE(b, u8, uint8_t, 0)
+VCMPNE(h, u16, uint16_t, 0)
+VCMPNE(w, u32, uint32_t, 0)
+#undef VCMPNE_DO
+#undef VCMPNE
+
 #define VCMPFP_DO(suffix, compare, order, record)                       \
     void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
                              ppc_avr_t *a, ppc_avr_t *b)                \
@@ -766,6 +916,36 @@ VCT(uxs, cvtsduw, u32)
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
+target_ulong helper_vclzlsbb(ppc_avr_t *r)
+{
+    target_ulong count = 0;
+    int i;
+    VECTOR_FOR_INORDER_I(i, u8) {
+        if (r->u8[i] & 0x01) {
+            break;
+        }
+        count++;
+    }
+    return count;
+}
+
+target_ulong helper_vctzlsbb(ppc_avr_t *r)
+{
+    target_ulong count = 0;
+    int i;
+#if defined(HOST_WORDS_BIGENDIAN)
+    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
+#else
+    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
+#endif
+        if (r->u8[i] & 0x01) {
+            break;
+        }
+        count++;
+    }
+    return count;
+}
+
 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
                       ppc_avr_t *b, ppc_avr_t *c)
 {
@@ -1034,14 +1214,57 @@ void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
     *r = result;
 }
 
+void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
+                  ppc_avr_t *c)
+{
+    ppc_avr_t result;
+    int i;
+
+    VECTOR_FOR_INORDER_I(i, u8) {
+        int s = c->u8[i] & 0x1f;
+#if defined(HOST_WORDS_BIGENDIAN)
+        int index = 15 - (s & 0xf);
+#else
+        int index = s & 0xf;
+#endif
+
+        if (s & 0x10) {
+            result.u8[i] = a->u8[index];
+        } else {
+            result.u8[i] = b->u8[index];
+        }
+    }
+    *r = result;
+}
+
 #if defined(HOST_WORDS_BIGENDIAN)
 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
+#define VBPERMD_INDEX(i) (i)
 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
+#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
 #else
 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
+#define VBPERMD_INDEX(i) (1 - i)
 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
+#define EXTRACT_BIT(avr, i, index) \
+        (extract64((avr)->u64[1 - i], 63 - index, 1))
 #endif
 
+void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i, j;
+    ppc_avr_t result = { .u64 = { 0, 0 } };
+    VECTOR_FOR_INORDER_I(i, u64) {
+        for (j = 0; j < 8; j++) {
+            int index = VBPERMQ_INDEX(b, (i * 8) + j);
+            if (index < 64 && EXTRACT_BIT(a, i, index)) {
+                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
+            }
+        }
+    }
+    *r = result;
+}
+
 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 {
     int i;
@@ -1529,6 +1752,34 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     }
 }
 
+#define VRLMI(name, size, element, insert)                            \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
+{                                                                     \
+    int i;                                                            \
+    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
+        uint##size##_t src1 = a->element[i];                          \
+        uint##size##_t src2 = b->element[i];                          \
+        uint##size##_t src3 = r->element[i];                          \
+        uint##size##_t begin, end, shift, mask, rot_val;              \
+                                                                      \
+        shift = extract##size(src2, 0, 6);                            \
+        end   = extract##size(src2, 8, 6);                            \
+        begin = extract##size(src2, 16, 6);                           \
+        rot_val = rol##size(src1, shift);                             \
+        mask = mask_u##size(begin, end);                              \
+        if (insert) {                                                 \
+            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
+        } else {                                                      \
+            r->element[i] = (rot_val & mask);                         \
+        }                                                             \
+    }                                                                 \
+}
+
+VRLMI(vrldmi, 64, u64, 1);
+VRLMI(vrlwmi, 32, u32, 1);
+VRLMI(vrldnm, 64, u64, 0);
+VRLMI(vrlwnm, 32, u32, 0);
+
 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                  ppc_avr_t *c)
 {
@@ -1604,6 +1855,37 @@ VSL(w, u32, 0x1F)
 VSL(d, u64, 0x3F)
 #undef VSL
 
+void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+    unsigned int shift, bytes, size;
+
+    size = ARRAY_SIZE(r->u8);
+    for (i = 0; i < size; i++) {
+        shift = b->u8[i] & 0x7;             /* extract shift value */
+        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
+            (((i + 1) < size) ? a->u8[i + 1] : 0);
+        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
+    }
+}
+
+void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+    int i;
+    unsigned int shift, bytes;
+
+    /* Use reverse order, as destination and source register can be same. Its
+     * being modified in place saving temporary, reverse order will guarantee
+     * that computed result is not fed back.
+     */
+    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
+        shift = b->u8[i] & 0x7;                 /* extract shift value */
+        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
+                                                /* extract adjacent bytes */
+        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
+    }
+}
+
 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
 {
     int sh = shift & 0xf;
@@ -1669,6 +1951,78 @@ VSPLT(w, u32)
 #undef VSPLT
 #undef SPLAT_ELEMENT
 #undef _SPLAT_MASKED
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VINSERT(suffix, element)                                            \
+    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                       \
+        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)],              \
+               sizeof(r->element[0]));                                      \
+    }
+#else
+#define VINSERT(suffix, element)                                            \
+    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                       \
+        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
+        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
+    }
+#endif
+VINSERT(b, u8)
+VINSERT(h, u16)
+VINSERT(w, u32)
+VINSERT(d, u64)
+#undef VINSERT
+#if defined(HOST_WORDS_BIGENDIAN)
+#define VEXTRACT(suffix, element)                                            \
+    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                        \
+        uint32_t es = sizeof(r->element[0]);                                 \
+        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
+        memset(&r->u8[8], 0, 8);                                             \
+        memset(&r->u8[0], 0, 8 - es);                                        \
+    }
+#else
+#define VEXTRACT(suffix, element)                                            \
+    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
+    {                                                                        \
+        uint32_t es = sizeof(r->element[0]);                                 \
+        uint32_t s = (16 - index) - es;                                      \
+        memmove(&r->u8[8], &b->u8[s], es);                                   \
+        memset(&r->u8[0], 0, 8);                                             \
+        memset(&r->u8[8 + es], 0, 8 - es);                                   \
+    }
+#endif
+VEXTRACT(ub, u8)
+VEXTRACT(uh, u16)
+VEXTRACT(uw, u32)
+VEXTRACT(d, u64)
+#undef VEXTRACT
+
+#define VEXT_SIGNED(name, element, mask, cast, recast)              \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
+{                                                                   \
+    int i;                                                          \
+    VECTOR_FOR_INORDER_I(i, element) {                              \
+        r->element[i] = (recast)((cast)(b->element[i] & mask));     \
+    }                                                               \
+}
+VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
+VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
+VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
+VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
+VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
+#undef VEXT_SIGNED
+
+#define VNEG(name, element)                                         \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
+{                                                                   \
+    int i;                                                          \
+    VECTOR_FOR_INORDER_I(i, element) {                              \
+        r->element[i] = -b->element[i];                             \
+    }                                                               \
+}
+VNEG(vnegw, s32)
+VNEG(vnegd, s64)
+#undef VNEG
 
 #define VSPLTI(suffix, element, splat_type)                     \
     void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
@@ -1915,6 +2269,21 @@ VGENERIC_DO(clzd, u64)
 #undef clzw
 #undef clzd
 
+#define ctzb(v) ((v) ? ctz32(v) : 8)
+#define ctzh(v) ((v) ? ctz32(v) : 16)
+#define ctzw(v) ctz32((v))
+#define ctzd(v) ctz64((v))
+
+VGENERIC_DO(ctzb, u8)
+VGENERIC_DO(ctzh, u16)
+VGENERIC_DO(ctzw, u32)
+VGENERIC_DO(ctzd, u64)
+
+#undef ctzb
+#undef ctzh
+#undef ctzw
+#undef ctzd
+
 #define popcntb(v) ctpop8(v)
 #define popcnth(v) ctpop16(v)
 #define popcntw(v) ctpop32(v)
@@ -2123,6 +2492,8 @@ void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 #define BCD_NEG_PREF    0xD
 #define BCD_NEG_ALT     0xB
 #define BCD_PLUS_ALT_2  0xE
+#define NATIONAL_PLUS   0x2B
+#define NATIONAL_NEG    0x2D
 
 #if defined(HOST_WORDS_BIGENDIAN)
 #define BCD_DIG_BYTE(n) (15 - (n/2))
@@ -2189,6 +2560,33 @@ static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
     }
 }
 
+static int bcd_cmp_zero(ppc_avr_t *bcd)
+{
+    if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
+        return 1 << CRF_EQ;
+    } else {
+        return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
+    }
+}
+
+static uint16_t get_national_digit(ppc_avr_t *reg, int n)
+{
+#if defined(HOST_WORDS_BIGENDIAN)
+    return reg->u16[7 - n];
+#else
+    return reg->u16[n];
+#endif
+}
+
+static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
+{
+#if defined(HOST_WORDS_BIGENDIAN)
+    reg->u16[7 - n] = val;
+#else
+    reg->u16[n] = val;
+#endif
+}
+
 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
 {
     int i;
@@ -2319,6 +2717,163 @@ uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
     return helper_bcdadd(r, a, &bcopy, ps);
 }
 
+uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
+{
+    int i;
+    int cr = 0;
+    uint16_t national = 0;
+    uint16_t sgnb = get_national_digit(b, 0);
+    ppc_avr_t ret = { .u64 = { 0, 0 } };
+    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
+
+    for (i = 1; i < 8; i++) {
+        national = get_national_digit(b, i);
+        if (unlikely(national < 0x30 || national > 0x39)) {
+            invalid = 1;
+            break;
+        }
+
+        bcd_put_digit(&ret, national & 0xf, i);
+    }
+
+    if (sgnb == NATIONAL_PLUS) {
+        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
+    } else {
+        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
+    }
+
+    cr = bcd_cmp_zero(&ret);
+
+    if (unlikely(invalid)) {
+        cr = 1 << CRF_SO;
+    }
+
+    *r = ret;
+
+    return cr;
+}
+
+uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
+{
+    int i;
+    int cr = 0;
+    int sgnb = bcd_get_sgn(b);
+    int invalid = (sgnb == 0);
+    ppc_avr_t ret = { .u64 = { 0, 0 } };
+
+    int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
+
+    for (i = 1; i < 8; i++) {
+        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
+
+        if (unlikely(invalid)) {
+            break;
+        }
+    }
+    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
+
+    cr = bcd_cmp_zero(b);
+
+    if (ox_flag) {
+        cr |= 1 << CRF_SO;
+    }
+
+    if (unlikely(invalid)) {
+        cr = 1 << CRF_SO;
+    }
+
+    *r = ret;
+
+    return cr;
+}
+
+uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
+{
+    int i;
+    int cr = 0;
+    int invalid = 0;
+    int zone_digit = 0;
+    int zone_lead = ps ? 0xF : 0x3;
+    int digit = 0;
+    ppc_avr_t ret = { .u64 = { 0, 0 } };
+    int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
+
+    if (unlikely((sgnb < 0xA) && ps)) {
+        invalid = 1;
+    }
+
+    for (i = 0; i < 16; i++) {
+        zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
+        digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
+        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
+            invalid = 1;
+            break;
+        }
+
+        bcd_put_digit(&ret, digit, i + 1);
+    }
+
+    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
+            (!ps && (sgnb & 0x4))) {
+        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
+    } else {
+        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
+    }
+
+    cr = bcd_cmp_zero(&ret);
+
+    if (unlikely(invalid)) {
+        cr = 1 << CRF_SO;
+    }
+
+    *r = ret;
+
+    return cr;
+}
+
+uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
+{
+    int i;
+    int cr = 0;
+    uint8_t digit = 0;
+    int sgnb = bcd_get_sgn(b);
+    int zone_lead = (ps) ? 0xF0 : 0x30;
+    int invalid = (sgnb == 0);
+    ppc_avr_t ret = { .u64 = { 0, 0 } };
+
+    int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
+
+    for (i = 0; i < 16; i++) {
+        digit = bcd_get_digit(b, i + 1, &invalid);
+
+        if (unlikely(invalid)) {
+            break;
+        }
+
+        ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
+    }
+
+    if (ps) {
+        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
+    } else {
+        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
+    }
+
+    cr = bcd_cmp_zero(b);
+
+    if (ox_flag) {
+        cr |= 1 << CRF_SO;
+    }
+
+    if (unlikely(invalid)) {
+        cr = 1 << CRF_SO;
+    }
+
+    *r = ret;
+
+    return cr;
+}
+
 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
 {
     int i;
diff --git a/target-ppc/internal.h b/target-ppc/internal.h
new file mode 100644
index 0000000000..1ff4896c45
--- /dev/null
+++ b/target-ppc/internal.h
@@ -0,0 +1,50 @@
+/*
+ *  PowerPC interal definitions for qemu.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef PPC_INTERNAL_H
+#define PPC_INTERNAL_H
+
+#define FUNC_MASK(name, ret_type, size, max_val)                  \
+static inline ret_type name(uint##size##_t start,                 \
+                              uint##size##_t end)                 \
+{                                                                 \
+    ret_type ret, max_bit = size - 1;                             \
+                                                                  \
+    if (likely(start == 0)) {                                     \
+        ret = max_val << (max_bit - end);                         \
+    } else if (likely(end == max_bit)) {                          \
+        ret = max_val >> start;                                   \
+    } else {                                                      \
+        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^            \
+            (((uint##size##_t)(-1ULL) >> (end)) >> 1);            \
+        if (unlikely(start > end)) {                              \
+            return ~ret;                                          \
+        }                                                         \
+    }                                                             \
+                                                                  \
+    return ret;                                                   \
+}
+
+#if defined(TARGET_PPC64)
+FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX);
+#else
+FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX);
+#endif
+FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX);
+FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX);
+
+#endif /* PPC_INTERNAL_H */
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index dcb68b9081..9c4834c4fc 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -36,6 +36,7 @@
 #include "hw/sysbus.h"
 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
 #include "hw/ppc/ppc.h"
 #include "sysemu/watchdog.h"
 #include "trace.h"
@@ -79,6 +80,7 @@ static int cap_ppc_watchdog;
 static int cap_papr;
 static int cap_htab_fd;
 static int cap_fixup_hcalls;
+static int cap_htm;             /* Hardware transactional memory support */
 
 static uint32_t debug_inst_opcode;
 
@@ -100,6 +102,16 @@ static void kvm_kick_cpu(void *opaque)
     qemu_cpu_kick(CPU(cpu));
 }
 
+/* Check whether we are running with KVM-PR (instead of KVM-HV).  This
+ * should only be used for fallback tests - generally we should use
+ * explicit capabilities for the features we want, rather than
+ * assuming what is/isn't available depending on the KVM variant. */
+static bool kvmppc_is_pr(KVMState *ks)
+{
+    /* Assume KVM-PR if the GET_PVINFO capability is available */
+    return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
+}
+
 static int kvm_ppc_register_host_cpu_type(void);
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -121,6 +133,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
      * only activated after this by kvmppc_set_papr() */
     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
+    cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -220,10 +233,9 @@ static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
      *
      * For that to work we make a few assumptions:
      *
-     * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
-     *   KVM which only supports 4K and 16M pages, but supports them
-     *   regardless of the backing store characteritics. We also don't
-     *   support 1T segments.
+     * - Check whether we are running "PR" KVM which only supports 4K
+     *   and 16M pages, but supports them regardless of the backing
+     *   store characteritics. We also don't support 1T segments.
      *
      *   This is safe as if HV KVM ever supports that capability or PR
      *   KVM grows supports for more page/segment sizes, those versions
@@ -238,7 +250,7 @@ static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
      *   this fallback.
      */
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+    if (kvmppc_is_pr(cs->kvm_state)) {
         /* No flags */
         info->flags = 0;
         info->slb_size = 64;
@@ -427,6 +439,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
     CPUPPCState *env = &cpu->env;
     long rampagesize;
     int iq, ik, jq, jk;
+    bool has_64k_pages = false;
 
     /* We only handle page sizes for 64-bit server guests for now */
     if (!(env->mmu_model & POWERPC_MMU_64)) {
@@ -470,6 +483,9 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
                                      ksps->enc[jk].page_shift)) {
                 continue;
             }
+            if (ksps->enc[jk].page_shift == 16) {
+                has_64k_pages = true;
+            }
             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
@@ -484,6 +500,9 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
         env->mmu_model &= ~POWERPC_MMU_1TSEG;
     }
+    if (!has_64k_pages) {
+        env->mmu_model &= ~POWERPC_MMU_64K;
+    }
 }
 #else /* defined (TARGET_PPC64) */
 
@@ -551,11 +570,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 
-    /* Some targets support access to KVM's guest TLB. */
     switch (cenv->mmu_model) {
     case POWERPC_MMU_BOOKE206:
+        /* This target supports access to KVM's guest TLB */
         ret = kvm_booke206_tlb_init(cpu);
         break;
+    case POWERPC_MMU_2_07:
+        if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
+            /* KVM-HV has transactional memory on POWER8 also without the
+             * KVM_CAP_PPC_HTM extension, so enable it here instead. */
+            cap_htm = true;
+        }
+        break;
     default:
         break;
     }
@@ -2055,6 +2081,12 @@ void kvmppc_enable_set_mode_hcall(void)
     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
 }
 
+void kvmppc_enable_clear_ref_mod_hcalls(void)
+{
+    kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
+    kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
+}
+
 void kvmppc_set_papr(PowerPCCPU *cpu)
 {
     CPUState *cs = CPU(cpu);
@@ -2254,11 +2286,8 @@ int kvmppc_reset_htab(int shift_hint)
 
     /* We have a kernel that predates the htab reset calls.  For PR
      * KVM, we need to allocate the htab ourselves, for an HV KVM of
-     * this era, it has allocated a 16MB fixed size hash table
-     * already.  Kernels of this era have the GET_PVINFO capability
-     * only on PR, so we use this hack to determine the right
-     * answer */
-    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+     * this era, it has allocated a 16MB fixed size hash table already. */
+    if (kvmppc_is_pr(kvm_state)) {
         /* PR - tell caller to allocate htab */
         return 0;
     } else {
@@ -2339,6 +2368,11 @@ bool kvmppc_has_cap_fixup_hcalls(void)
     return cap_fixup_hcalls;
 }
 
+bool kvmppc_has_cap_htm(void)
+{
+    return cap_htm;
+}
+
 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
 {
     ObjectClass *oc = OBJECT_CLASS(pcc);
@@ -2364,19 +2398,6 @@ PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
     return pvr_pcc;
 }
 
-#if defined(TARGET_PPC64)
-static void spapr_cpu_core_host_initfn(Object *obj)
-{
-    sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
-    char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
-    ObjectClass *oc = object_class_by_name(name);
-
-    g_assert(oc);
-    g_free((void *)name);
-    core->cpu_class = oc;
-}
-#endif
-
 static int kvm_ppc_register_host_cpu_type(void)
 {
     TypeInfo type_info = {
@@ -2404,14 +2425,16 @@ static int kvm_ppc_register_host_cpu_type(void)
 #if defined(TARGET_PPC64)
     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
     type_info.parent = TYPE_SPAPR_CPU_CORE,
-    type_info.instance_size = sizeof(sPAPRCPUCore),
-    type_info.instance_init = spapr_cpu_core_host_initfn,
-    type_info.class_init = NULL;
+    type_info.instance_size = sizeof(sPAPRCPUCore);
+    type_info.instance_init = NULL;
+    type_info.class_init = spapr_cpu_core_class_init;
+    type_info.class_data = (void *) "host";
     type_register(&type_info);
     g_free((void *)type_info.name);
 
     /* Register generic spapr CPU family class for current host CPU type */
     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
+    type_info.class_data = (void *) dc->desc;
     type_register(&type_info);
     g_free((void *)type_info.name);
 #endif
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index 5461d1082c..bd1d78bfbe 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -24,6 +24,7 @@ int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
 void kvmppc_enable_logical_ci_hcalls(void);
 void kvmppc_enable_set_mode_hcall(void);
+void kvmppc_enable_clear_ref_mod_hcalls(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
@@ -54,6 +55,7 @@ void kvmppc_hash64_free_pteg(uint64_t token);
 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
                              target_ulong pte0, target_ulong pte1);
 bool kvmppc_has_cap_fixup_hcalls(void);
+bool kvmppc_has_cap_htm(void);
 int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
@@ -113,6 +115,10 @@ static inline void kvmppc_enable_set_mode_hcall(void)
 {
 }
 
+static inline void kvmppc_enable_clear_ref_mod_hcalls(void)
+{
+}
+
 static inline void kvmppc_set_papr(PowerPCCPU *cpu)
 {
 }
@@ -244,6 +250,11 @@ static inline bool kvmppc_has_cap_fixup_hcalls(void)
     abort();
 }
 
+static inline bool kvmppc_has_cap_htm(void)
+{
+    return false;
+}
+
 static inline int kvmppc_enable_hwrng(void)
 {
     return -1;
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index 4820f22377..18c16d2512 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -8,7 +8,6 @@
 #include "helper_regs.h"
 #include "mmu-hash64.h"
 #include "migration/cpu.h"
-#include "exec/exec-all.h"
 
 static int cpu_load_old(QEMUFile *f, void *opaque, int version_id)
 {
@@ -136,11 +135,33 @@ static const VMStateInfo vmstate_info_avr = {
 #define VMSTATE_AVR_ARRAY(_f, _s, _n)                             \
     VMSTATE_AVR_ARRAY_V(_f, _s, _n, 0)
 
+static bool cpu_pre_2_8_migration(void *opaque, int version_id)
+{
+    PowerPCCPU *cpu = opaque;
+
+    return cpu->pre_2_8_migration;
+}
+
 static void cpu_pre_save(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
     CPUPPCState *env = &cpu->env;
     int i;
+    uint64_t insns_compat_mask =
+        PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB
+        | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES
+        | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES
+        | PPC_FLOAT_STFIWX | PPC_FLOAT_EXT
+        | PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ
+        | PPC_MEM_SYNC | PPC_MEM_EIEIO | PPC_MEM_TLBIE | PPC_MEM_TLBSYNC
+        | PPC_64B | PPC_64BX | PPC_ALTIVEC
+        | PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD;
+    uint64_t insns_compat_mask2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX
+        | PPC2_PERM_ISA206 | PPC2_DIVE_ISA206
+        | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206
+        | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207
+        | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207
+        | PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 | PPC2_TM;
 
     env->spr[SPR_LR] = env->lr;
     env->spr[SPR_CTR] = env->ctr;
@@ -162,6 +183,14 @@ static void cpu_pre_save(void *opaque)
         env->spr[SPR_IBAT4U + 2*i] = env->IBAT[0][i+4];
         env->spr[SPR_IBAT4U + 2*i + 1] = env->IBAT[1][i+4];
     }
+
+    /* Hacks for migration compatibility between 2.6, 2.7 & 2.8 */
+    if (cpu->pre_2_8_migration) {
+        cpu->mig_msr_mask = env->msr_mask;
+        cpu->mig_insns_flags = env->insns_flags & insns_compat_mask;
+        cpu->mig_insns_flags2 = env->insns_flags2 & insns_compat_mask2;
+        cpu->mig_nb_BATs = env->nb_BATs;
+    }
 }
 
 static int cpu_post_load(void *opaque, int version_id)
@@ -562,10 +591,11 @@ const VMStateDescription vmstate_ppc_cpu = {
         /* FIXME: access_type? */
 
         /* Sanity checking */
-        VMSTATE_UINTTL_EQUAL(env.msr_mask, PowerPCCPU),
-        VMSTATE_UINT64_EQUAL(env.insns_flags, PowerPCCPU),
-        VMSTATE_UINT64_EQUAL(env.insns_flags2, PowerPCCPU),
-        VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU),
+        VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
+                            cpu_pre_2_8_migration),
+        VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
         VMSTATE_END_OF_LIST()
     },
     .subsections = (const VMStateDescription*[]) {
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index e4ed3773e8..1ab8a6eab4 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -23,7 +23,6 @@
 #include "exec/helper-proto.h"
 
 #include "helper_regs.h"
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 
 //#define DEBUG_OP
@@ -57,9 +56,9 @@ void helper_lmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
     for (; reg < 32; reg++) {
         if (needs_byteswap(env)) {
-            env->gpr[reg] = bswap32(cpu_ldl_data(env, addr));
+            env->gpr[reg] = bswap32(cpu_ldl_data_ra(env, addr, GETPC()));
         } else {
-            env->gpr[reg] = cpu_ldl_data(env, addr);
+            env->gpr[reg] = cpu_ldl_data_ra(env, addr, GETPC());
         }
         addr = addr_add(env, addr, 4);
     }
@@ -69,31 +68,39 @@ void helper_stmw(CPUPPCState *env, target_ulong addr, uint32_t reg)
 {
     for (; reg < 32; reg++) {
         if (needs_byteswap(env)) {
-            cpu_stl_data(env, addr, bswap32((uint32_t)env->gpr[reg]));
+            cpu_stl_data_ra(env, addr, bswap32((uint32_t)env->gpr[reg]),
+                                                   GETPC());
         } else {
-            cpu_stl_data(env, addr, (uint32_t)env->gpr[reg]);
+            cpu_stl_data_ra(env, addr, (uint32_t)env->gpr[reg], GETPC());
         }
         addr = addr_add(env, addr, 4);
     }
 }
 
-void helper_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg)
+static void do_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
+                   uint32_t reg, uintptr_t raddr)
 {
     int sh;
 
     for (; nb > 3; nb -= 4) {
-        env->gpr[reg] = cpu_ldl_data(env, addr);
+        env->gpr[reg] = cpu_ldl_data_ra(env, addr, raddr);
         reg = (reg + 1) % 32;
         addr = addr_add(env, addr, 4);
     }
     if (unlikely(nb > 0)) {
         env->gpr[reg] = 0;
         for (sh = 24; nb > 0; nb--, sh -= 8) {
-            env->gpr[reg] |= cpu_ldub_data(env, addr) << sh;
+            env->gpr[reg] |= cpu_ldub_data_ra(env, addr, raddr) << sh;
             addr = addr_add(env, addr, 1);
         }
     }
 }
+
+void helper_lsw(CPUPPCState *env, target_ulong addr, uint32_t nb, uint32_t reg)
+{
+    do_lsw(env, addr, nb, reg, GETPC());
+}
+
 /* PPC32 specification says we must generate an exception if
  * rA is in the range of registers to be loaded.
  * In an other hand, IBM says this is valid, but rA won't be loaded.
@@ -106,12 +113,11 @@ void helper_lswx(CPUPPCState *env, target_ulong addr, uint32_t reg,
         int num_used_regs = (xer_bc + 3) / 4;
         if (unlikely((ra != 0 && lsw_reg_in_range(reg, num_used_regs, ra)) ||
                      lsw_reg_in_range(reg, num_used_regs, rb))) {
-            env->nip += 4;     /* Compensate the "nip - 4" from gen_lswx() */
-            helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                       POWERPC_EXCP_INVAL |
-                                       POWERPC_EXCP_INVAL_LSWX);
+            raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                                   POWERPC_EXCP_INVAL |
+                                   POWERPC_EXCP_INVAL_LSWX, GETPC());
         } else {
-            helper_lsw(env, addr, xer_bc, reg);
+            do_lsw(env, addr, xer_bc, reg, GETPC());
         }
     }
 }
@@ -122,46 +128,51 @@ void helper_stsw(CPUPPCState *env, target_ulong addr, uint32_t nb,
     int sh;
 
     for (; nb > 3; nb -= 4) {
-        cpu_stl_data(env, addr, env->gpr[reg]);
+        cpu_stl_data_ra(env, addr, env->gpr[reg], GETPC());
         reg = (reg + 1) % 32;
         addr = addr_add(env, addr, 4);
     }
     if (unlikely(nb > 0)) {
         for (sh = 24; nb > 0; nb--, sh -= 8) {
-            cpu_stb_data(env, addr, (env->gpr[reg] >> sh) & 0xFF);
+            cpu_stb_data_ra(env, addr, (env->gpr[reg] >> sh) & 0xFF, GETPC());
             addr = addr_add(env, addr, 1);
         }
     }
 }
 
-static void do_dcbz(CPUPPCState *env, target_ulong addr, int dcache_line_size)
+void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t opcode)
 {
-    int i;
-
-    addr &= ~(dcache_line_size - 1);
-    for (i = 0; i < dcache_line_size; i += 4) {
-        cpu_stl_data(env, addr + i, 0);
-    }
-    if (env->reserve_addr == addr) {
-        env->reserve_addr = (target_ulong)-1ULL;
-    }
-}
-
-void helper_dcbz(CPUPPCState *env, target_ulong addr, uint32_t is_dcbzl)
-{
-    int dcbz_size = env->dcache_line_size;
+    target_ulong mask, dcbz_size = env->dcache_line_size;
+    uint32_t i;
+    void *haddr;
 
 #if defined(TARGET_PPC64)
-    if (!is_dcbzl &&
-        (env->excp_model == POWERPC_EXCP_970) &&
-        ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
+    /* Check for dcbz vs dcbzl on 970 */
+    if (env->excp_model == POWERPC_EXCP_970 &&
+        !(opcode & 0x00200000) && ((env->spr[SPR_970_HID5] >> 7) & 0x3) == 1) {
         dcbz_size = 32;
     }
 #endif
 
-    /* XXX add e500mc support */
+    /* Align address */
+    mask = ~(dcbz_size - 1);
+    addr &= mask;
 
-    do_dcbz(env, addr, dcbz_size);
+    /* Check reservation */
+    if ((env->reserve_addr & mask) == (addr & mask))  {
+        env->reserve_addr = (target_ulong)-1ULL;
+    }
+
+    /* Try fast path translate */
+    haddr = tlb_vaddr_to_host(env, addr, MMU_DATA_STORE, env->dmmu_idx);
+    if (haddr) {
+        memset(haddr, 0, dcbz_size);
+    } else {
+        /* Slow path */
+        for (i = 0; i < dcbz_size; i += 8) {
+            cpu_stq_data_ra(env, addr + i, 0, GETPC());
+        }
+    }
 }
 
 void helper_icbi(CPUPPCState *env, target_ulong addr)
@@ -172,7 +183,7 @@ void helper_icbi(CPUPPCState *env, target_ulong addr)
      * (not a fetch) by the MMU. To be sure it will be so,
      * do the load "by hand".
      */
-    cpu_ldl_data(env, addr);
+    cpu_ldl_data_ra(env, addr, GETPC());
 }
 
 /* XXX: to be tested */
@@ -183,7 +194,7 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg,
 
     d = 24;
     for (i = 0; i < xer_bc; i++) {
-        c = cpu_ldub_data(env, addr);
+        c = cpu_ldub_data_ra(env, addr, GETPC());
         addr = addr_add(env, addr, 1);
         /* ra (if not 0) and rb are never modified */
         if (likely(reg != rb && (ra == 0 || reg != ra))) {
diff --git a/target-ppc/misc_helper.c b/target-ppc/misc_helper.c
index cb5ebf56cf..1e6e705a4e 100644
--- a/target-ppc/misc_helper.c
+++ b/target-ppc/misc_helper.c
@@ -39,7 +39,8 @@ void helper_store_dump_spr(CPUPPCState *env, uint32_t sprn)
 
 #ifdef TARGET_PPC64
 static void raise_fu_exception(CPUPPCState *env, uint32_t bit,
-                               uint32_t sprn, uint32_t cause)
+                               uint32_t sprn, uint32_t cause,
+                               uintptr_t raddr)
 {
     qemu_log("Facility SPR %d is unavailable (SPR FSCR:%d)\n", sprn, bit);
 
@@ -47,7 +48,7 @@ static void raise_fu_exception(CPUPPCState *env, uint32_t bit,
     cause &= FSCR_IC_MASK;
     env->spr[SPR_FSCR] |= (target_ulong)cause << FSCR_IC_POS;
 
-    helper_raise_exception_err(env, POWERPC_EXCP_FU, 0);
+    raise_exception_err_ra(env, POWERPC_EXCP_FU, 0, raddr);
 }
 #endif
 
@@ -59,7 +60,7 @@ void helper_fscr_facility_check(CPUPPCState *env, uint32_t bit,
         /* Facility is enabled, continue */
         return;
     }
-    raise_fu_exception(env, bit, sprn, cause);
+    raise_fu_exception(env, bit, sprn, cause, GETPC());
 #endif
 }
 
@@ -71,7 +72,7 @@ void helper_msr_facility_check(CPUPPCState *env, uint32_t bit,
         /* Facility is enabled, continue */
         return;
     }
-    raise_fu_exception(env, bit, sprn, cause);
+    raise_fu_exception(env, bit, sprn, cause, GETPC());
 #endif
 }
 
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 5de1358d1c..fdb7a787bf 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -110,7 +110,7 @@ void helper_slbia(CPUPPCState *env)
              *      and we still don't have a tlb_flush_mask(env, n, mask)
              *      in QEMU, we just invalidate all TLBs
              */
-            env->tlb_need_flush = 1;
+            env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
         }
     }
 }
@@ -132,7 +132,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask)
          *      in QEMU, we just invalidate all TLBs
          */
-        env->tlb_need_flush = 1;
+        env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
     }
 }
 
@@ -241,8 +241,8 @@ void helper_store_slb(CPUPPCState *env, target_ulong rb, target_ulong rs)
     PowerPCCPU *cpu = ppc_env_get_cpu(env);
 
     if (ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs) < 0) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL, GETPC());
     }
 }
 
@@ -252,8 +252,8 @@ target_ulong helper_load_slb_esid(CPUPPCState *env, target_ulong rb)
     target_ulong rt = 0;
 
     if (ppc_load_slb_esid(cpu, rb, &rt) < 0) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL, GETPC());
     }
     return rt;
 }
@@ -264,8 +264,8 @@ target_ulong helper_find_slb_vsid(CPUPPCState *env, target_ulong rb)
     target_ulong rt = 0;
 
     if (ppc_find_slb_vsid(cpu, rb, &rt) < 0) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL, GETPC());
     }
     return rt;
 }
@@ -276,8 +276,8 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, target_ulong rb)
     target_ulong rt = 0;
 
     if (ppc_load_slb_vsid(cpu, rb, &rt) < 0) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL, GETPC());
     }
     return rt;
 }
@@ -912,7 +912,7 @@ void ppc_hash64_tlb_flush_hpte(PowerPCCPU *cpu,
      * invalidate, and we still don't have a tlb_flush_mask(env, n,
      * mask) in QEMU, we just invalidate all TLBs
      */
-    tlb_flush(CPU(cpu), 1);
+    cpu->env.tlb_need_flush = TLB_NEED_GLOBAL_FLUSH | TLB_NEED_LOCAL_FLUSH;
 }
 
 void ppc_hash64_update_rmls(CPUPPCState *env)
diff --git a/target-ppc/mmu-hash64.h b/target-ppc/mmu-hash64.h
index db265e30b2..ab5d347a53 100644
--- a/target-ppc/mmu-hash64.h
+++ b/target-ppc/mmu-hash64.h
@@ -4,7 +4,6 @@
 #ifndef CONFIG_USER_ONLY
 
 #ifdef TARGET_PPC64
-void ppc_hash64_check_page_sizes(PowerPCCPU *cpu, Error **errp);
 void dump_slb(FILE *f, fprintf_function cpu_fprintf, PowerPCCPU *cpu);
 int ppc_store_slb(PowerPCCPU *cpu, target_ulong slot,
                   target_ulong esid, target_ulong vsid);
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 3eb3cd78e2..d09fc0a85f 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1941,7 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
         break;
     default:
         /* XXX: TODO */
-        cpu_abort(CPU(cpu), "Unknown MMU model\n");
+        cpu_abort(CPU(cpu), "Unknown MMU model %d\n", env->mmu_model);
         break;
     }
 }
@@ -1965,7 +1965,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          * we just mark the TLB to be flushed later (context synchronizing
          * event or sync instruction on 32-bit).
          */
-        env->tlb_need_flush = 1;
+        env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
         break;
 #if defined(TARGET_PPC64)
     case POWERPC_MMU_64B:
@@ -1979,7 +1979,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
          *      and we still don't have a tlb_flush_mask(env, n, mask) in QEMU,
          *      we just invalidate all TLBs
          */
-        env->tlb_need_flush = 1;
+        env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
         break;
 #endif /* defined(TARGET_PPC64) */
     default:
@@ -2065,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
             }
         }
 #else
-        env->tlb_need_flush = 1;
+        env->tlb_need_flush |= TLB_NEED_LOCAL_FLUSH;
 #endif
     }
 }
@@ -2598,9 +2598,9 @@ void helper_booke206_tlbwe(CPUPPCState *env)
     tlb = booke206_cur_tlb(env);
 
     if (!tlb) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL |
-                                   POWERPC_EXCP_INVAL_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_INVAL_INVAL, GETPC());
     }
 
     /* check that we support the targeted size */
@@ -2608,9 +2608,9 @@ void helper_booke206_tlbwe(CPUPPCState *env)
     size_ps = booke206_tlbnps(env, tlbn);
     if ((env->spr[SPR_BOOKE_MAS1] & MAS1_VALID) && (tlbncfg & TLBnCFG_AVAIL) &&
         !(size_ps & (1 << size_tlb))) {
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL |
-                                   POWERPC_EXCP_INVAL_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_INVAL_INVAL, GETPC());
     }
 
     if (msr_gs) {
@@ -2757,7 +2757,7 @@ static inline void booke206_invalidate_ea_tlb(CPUPPCState *env, int tlbn,
 
 void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
 {
-    PowerPCCPU *cpu = ppc_env_get_cpu(env);
+    CPUState *cs;
 
     if (address & 0x4) {
         /* flush all entries */
@@ -2774,11 +2774,15 @@ void helper_booke206_tlbivax(CPUPPCState *env, target_ulong address)
     if (address & 0x8) {
         /* flush TLB1 entries */
         booke206_invalidate_ea_tlb(env, 1, address);
-        tlb_flush(CPU(cpu), 1);
+        CPU_FOREACH(cs) {
+            tlb_flush(cs, 1);
+        }
     } else {
         /* flush TLB0 entries */
         booke206_invalidate_ea_tlb(env, 0, address);
-        tlb_flush_page(CPU(cpu), address & MAS2_EPN_MASK);
+        CPU_FOREACH(cs) {
+            tlb_flush_page(cs, address & MAS2_EPN_MASK);
+        }
     }
 }
 
@@ -2867,9 +2871,14 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
 }
 
 
-void helper_check_tlb_flush(CPUPPCState *env)
+void helper_check_tlb_flush_local(CPUPPCState *env)
 {
-    check_tlb_flush(env);
+    check_tlb_flush(env, false);
+}
+
+void helper_check_tlb_flush_global(CPUPPCState *env)
+{
+    check_tlb_flush(env, true);
 }
 
 /*****************************************************************************/
@@ -2892,10 +2901,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
         ret = cpu_ppc_handle_mmu_fault(env, addr, access_type, mmu_idx);
     }
     if (unlikely(ret != 0)) {
-        if (likely(retaddr)) {
-            /* now we have a real cpu fault */
-            cpu_restore_state(cs, retaddr);
-        }
-        helper_raise_exception_err(env, cs->exception_index, env->error_code);
+        raise_exception_err_ra(env, cs->exception_index, env->error_code,
+                               retaddr);
     }
 }
diff --git a/target-ppc/timebase_helper.c b/target-ppc/timebase_helper.c
index a07faa42cb..73363e08ae 100644
--- a/target-ppc/timebase_helper.c
+++ b/target-ppc/timebase_helper.c
@@ -19,6 +19,7 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "exec/exec-all.h"
 #include "qemu/log.h"
 
 /*****************************************************************************/
@@ -143,15 +144,16 @@ target_ulong helper_load_dcr(CPUPPCState *env, target_ulong dcrn)
 
     if (unlikely(env->dcr_env == NULL)) {
         qemu_log_mask(LOG_GUEST_ERROR, "No DCR environment\n");
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL |
-                                   POWERPC_EXCP_INVAL_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_INVAL_INVAL, GETPC());
     } else if (unlikely(ppc_dcr_read(env->dcr_env,
                                      (uint32_t)dcrn, &val) != 0)) {
         qemu_log_mask(LOG_GUEST_ERROR, "DCR read error %d %03x\n",
                       (uint32_t)dcrn, (uint32_t)dcrn);
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_PRIV_REG, GETPC());
     }
     return val;
 }
@@ -160,14 +162,15 @@ void helper_store_dcr(CPUPPCState *env, target_ulong dcrn, target_ulong val)
 {
     if (unlikely(env->dcr_env == NULL)) {
         qemu_log_mask(LOG_GUEST_ERROR, "No DCR environment\n");
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL |
-                                   POWERPC_EXCP_INVAL_INVAL);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_INVAL_INVAL, GETPC());
     } else if (unlikely(ppc_dcr_write(env->dcr_env, (uint32_t)dcrn,
                                       (uint32_t)val) != 0)) {
         qemu_log_mask(LOG_GUEST_ERROR, "DCR write error %d %03x\n",
                       (uint32_t)dcrn, (uint32_t)dcrn);
-        helper_raise_exception_err(env, POWERPC_EXCP_PROGRAM,
-                                   POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG);
+        raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM,
+                               POWERPC_EXCP_INVAL |
+                               POWERPC_EXCP_PRIV_REG, GETPC());
     }
 }
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 92030b66a5..59e9552d2b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "internal.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
@@ -195,6 +196,7 @@ struct DisasContext {
     /* Routine used to access memory */
     bool pr, hv, dr, le_mode;
     bool lazy_tlb_flush;
+    bool need_access_type;
     int mem_idx;
     int access_type;
     /* Translation flags */
@@ -250,20 +252,9 @@ struct opc_handler_t {
 #endif
 };
 
-static inline void gen_reset_fpstatus(void)
-{
-    gen_helper_reset_fpstatus(cpu_env);
-}
-
-static inline void gen_compute_fprf(TCGv_i64 arg)
-{
-    gen_helper_compute_fprf(cpu_env, arg);
-    gen_helper_float_check_status(cpu_env);
-}
-
 static inline void gen_set_access_type(DisasContext *ctx, int access_type)
 {
-    if (ctx->access_type != access_type) {
+    if (ctx->need_access_type && ctx->access_type != access_type) {
         tcg_gen_movi_i32(cpu_access_type, access_type);
         ctx->access_type = access_type;
     }
@@ -277,18 +268,15 @@ static inline void gen_update_nip(DisasContext *ctx, target_ulong nip)
     tcg_gen_movi_tl(cpu_nip, nip);
 }
 
-void gen_update_current_nip(void *opaque)
-{
-    DisasContext *ctx = opaque;
-
-    tcg_gen_movi_tl(cpu_nip, ctx->nip);
-}
-
 static void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
 {
     TCGv_i32 t0, t1;
+
+    /* These are all synchronous exceptions, we set the PC back to
+     * the faulting instruction
+     */
     if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->nip);
+        gen_update_nip(ctx, ctx->nip - 4);
     }
     t0 = tcg_const_i32(excp);
     t1 = tcg_const_i32(error);
@@ -301,8 +289,12 @@ static void gen_exception_err(DisasContext *ctx, uint32_t excp, uint32_t error)
 static void gen_exception(DisasContext *ctx, uint32_t excp)
 {
     TCGv_i32 t0;
+
+    /* These are all synchronous exceptions, we set the PC back to
+     * the faulting instruction
+     */
     if (ctx->exception == POWERPC_EXCP_NONE) {
-        gen_update_nip(ctx, ctx->nip);
+        gen_update_nip(ctx, ctx->nip - 4);
     }
     t0 = tcg_const_i32(excp);
     gen_helper_raise_exception(cpu_env, t0);
@@ -310,10 +302,25 @@ static void gen_exception(DisasContext *ctx, uint32_t excp)
     ctx->exception = (excp);
 }
 
+static void gen_exception_nip(DisasContext *ctx, uint32_t excp,
+                              target_ulong nip)
+{
+    TCGv_i32 t0;
+
+    gen_update_nip(ctx, nip);
+    t0 = tcg_const_i32(excp);
+    gen_helper_raise_exception(cpu_env, t0);
+    tcg_temp_free_i32(t0);
+    ctx->exception = (excp);
+}
+
 static void gen_debug_exception(DisasContext *ctx)
 {
     TCGv_i32 t0;
 
+    /* These are all synchronous exceptions, we set the PC back to
+     * the faulting instruction
+     */
     if ((ctx->exception != POWERPC_EXCP_BRANCH) &&
         (ctx->exception != POWERPC_EXCP_SYNC)) {
         gen_update_nip(ctx, ctx->nip);
@@ -367,12 +374,16 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, PPC_NONE)
 #define GEN_HANDLER2_E(name, onam, opc1, opc2, opc3, inval, type, type2)      \
 GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
 
+#define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
+GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
+
+#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) \
+GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
+
 typedef struct opcode_t {
-    unsigned char opc1, opc2, opc3;
+    unsigned char opc1, opc2, opc3, opc4;
 #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
-    unsigned char pad[5];
-#else
-    unsigned char pad[1];
+    unsigned char pad[4];
 #endif
     opc_handler_t handler;
     const char *oname;
@@ -432,12 +443,28 @@ static inline uint32_t name(uint32_t opcode)                                  \
     return (((opcode >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) |             \
             ((opcode >> (shift2)) & ((1 << (nb2)) - 1));                      \
 }
+
+#define EXTRACT_HELPER_DXFORM(name,                                           \
+                              d0_bits, shift_op_d0, shift_d0,                 \
+                              d1_bits, shift_op_d1, shift_d1,                 \
+                              d2_bits, shift_op_d2, shift_d2)                 \
+static inline int16_t name(uint32_t opcode)                                   \
+{                                                                             \
+    return                                                                    \
+        (((opcode >> (shift_op_d0)) & ((1 << (d0_bits)) - 1)) << (shift_d0)) | \
+        (((opcode >> (shift_op_d1)) & ((1 << (d1_bits)) - 1)) << (shift_d1)) | \
+        (((opcode >> (shift_op_d2)) & ((1 << (d2_bits)) - 1)) << (shift_d2));  \
+}
+
+
 /* Opcode part 1 */
 EXTRACT_HELPER(opc1, 26, 6);
 /* Opcode part 2 */
 EXTRACT_HELPER(opc2, 1, 5);
 /* Opcode part 3 */
 EXTRACT_HELPER(opc3, 6, 5);
+/* Opcode part 4 */
+EXTRACT_HELPER(opc4, 16, 5);
 /* Update Cr0 flags */
 EXTRACT_HELPER(Rc, 0, 1);
 /* Update Cr6 flags (Altivec) */
@@ -475,6 +502,8 @@ EXTRACT_HELPER(UIMM, 0, 16);
 EXTRACT_HELPER(SIMM5, 16, 5);
 /* 5 bits signed immediate value */
 EXTRACT_HELPER(UIMM5, 16, 5);
+/* 4 bits unsigned immediate value */
+EXTRACT_HELPER(UIMM4, 16, 4);
 /* Bit count */
 EXTRACT_HELPER(NB, 11, 5);
 /* Shift count */
@@ -501,6 +530,13 @@ EXTRACT_HELPER(FPL, 25, 1);
 EXTRACT_HELPER(FPFLM, 17, 8);
 EXTRACT_HELPER(FPW, 16, 1);
 
+/* addpcis */
+EXTRACT_HELPER_DXFORM(DX, 10, 6, 6, 5, 16, 1, 1, 0, 0)
+#if defined(TARGET_PPC64)
+/* darn */
+EXTRACT_HELPER(L, 16, 2);
+#endif
+
 /***                            Jump target decoding                       ***/
 /* Immediate address */
 static inline target_ulong LI(uint32_t opcode)
@@ -526,34 +562,6 @@ EXTRACT_HELPER(DCM, 10, 6)
 /* DFP Z23-form */
 EXTRACT_HELPER(RMC, 9, 2)
 
-/* Create a mask between <start> and <end> bits */
-static inline target_ulong MASK(uint32_t start, uint32_t end)
-{
-    target_ulong ret;
-
-#if defined(TARGET_PPC64)
-    if (likely(start == 0)) {
-        ret = UINT64_MAX << (63 - end);
-    } else if (likely(end == 63)) {
-        ret = UINT64_MAX >> start;
-    }
-#else
-    if (likely(start == 0)) {
-        ret = UINT32_MAX << (31  - end);
-    } else if (likely(end == 31)) {
-        ret = UINT32_MAX >> start;
-    }
-#endif
-    else {
-        ret = (((target_ulong)(-1ULL)) >> (start)) ^
-            (((target_ulong)(-1ULL) >> (end)) >> 1);
-        if (unlikely(start > end))
-            return ~ret;
-    }
-
-    return ret;
-}
-
 EXTRACT_HELPER_SPLIT(xT, 0, 1, 21, 5);
 EXTRACT_HELPER_SPLIT(xS, 0, 1, 21, 5);
 EXTRACT_HELPER_SPLIT(xA, 2, 1, 16, 5);
@@ -563,6 +571,8 @@ EXTRACT_HELPER(DM, 8, 2);
 EXTRACT_HELPER(UIM, 16, 2);
 EXTRACT_HELPER(SHW, 8, 2);
 EXTRACT_HELPER(SP, 19, 2);
+EXTRACT_HELPER(IMM8, 11, 8);
+
 /*****************************************************************************/
 /* PowerPC instructions table                                                */
 
@@ -572,7 +582,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -587,7 +597,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl1,                                                     \
         .inval2  = invl2,                                                     \
@@ -603,7 +613,37 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+        .oname = onam,                                                        \
+    },                                                                        \
+    .oname = onam,                                                            \
+}
+#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+        .oname = stringify(name),                                             \
+    },                                                                        \
+    .oname = stringify(name),                                                 \
+}
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -619,7 +659,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -633,7 +673,7 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
     .handler = {                                                              \
         .inval1  = invl1,                                                     \
         .inval2  = invl2,                                                     \
@@ -648,7 +688,35 @@ EXTRACT_HELPER(SP, 19, 2);
     .opc1 = op1,                                                              \
     .opc2 = op2,                                                              \
     .opc3 = op3,                                                              \
-    .pad  = { 0, },                                                           \
+    .opc4 = 0xff,                                                             \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+    },                                                                        \
+    .oname = onam,                                                            \
+}
+#define GEN_OPCODE3(name, op1, op2, op3, op4, invl, _typ, _typ2)              \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+    },                                                                        \
+    .oname = stringify(name),                                                 \
+}
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
     .handler = {                                                              \
         .inval1  = invl,                                                      \
         .type = _typ,                                                         \
@@ -800,6 +868,53 @@ static void gen_cmpli(DisasContext *ctx)
     }
 }
 
+/* cmprb - range comparison: isupper, isaplha, islower*/
+static void gen_cmprb(DisasContext *ctx)
+{
+    TCGv_i32 src1 = tcg_temp_new_i32();
+    TCGv_i32 src2 = tcg_temp_new_i32();
+    TCGv_i32 src2lo = tcg_temp_new_i32();
+    TCGv_i32 src2hi = tcg_temp_new_i32();
+    TCGv_i32 crf = cpu_crf[crfD(ctx->opcode)];
+
+    tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]);
+
+    tcg_gen_andi_i32(src1, src1, 0xFF);
+    tcg_gen_ext8u_i32(src2lo, src2);
+    tcg_gen_shri_i32(src2, src2, 8);
+    tcg_gen_ext8u_i32(src2hi, src2);
+
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+    tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+    tcg_gen_and_i32(crf, src2lo, src2hi);
+
+    if (ctx->opcode & 0x00200000) {
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2lo, src2);
+        tcg_gen_shri_i32(src2, src2, 8);
+        tcg_gen_ext8u_i32(src2hi, src2);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1);
+        tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi);
+        tcg_gen_and_i32(src2lo, src2lo, src2hi);
+        tcg_gen_or_i32(crf, crf, src2lo);
+    }
+    tcg_gen_shli_i32(crf, crf, CRF_GT);
+    tcg_temp_free_i32(src1);
+    tcg_temp_free_i32(src2);
+    tcg_temp_free_i32(src2lo);
+    tcg_temp_free_i32(src2hi);
+}
+
+#if defined(TARGET_PPC64)
+/* cmpeqb */
+static void gen_cmpeqb(DisasContext *ctx)
+{
+    gen_helper_cmpeqb(cpu_crf[crfD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rB(ctx->opcode)]);
+}
+#endif
+
 /* isel (PowerPC 2.03 specification) */
 static void gen_isel(DisasContext *ctx)
 {
@@ -984,44 +1099,50 @@ static void gen_addis(DisasContext *ctx)
     }
 }
 
+/* addpcis */
+static void gen_addpcis(DisasContext *ctx)
+{
+    target_long d = DX(ctx->opcode);
+
+    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], ctx->nip + (d << 16));
+}
+
 static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-    TCGv_i32 t1 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+    TCGv_i32 t3 = tcg_temp_new_i32();
 
     tcg_gen_trunc_tl_i32(t0, arg1);
     tcg_gen_trunc_tl_i32(t1, arg2);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t1, 0, l1);
-    if (sign) {
-        TCGLabel *l3 = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_NE, t1, -1, l3);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, t0, INT32_MIN, l1);
-        gen_set_label(l3);
-        tcg_gen_div_i32(t0, t0, t1);
-    } else {
-        tcg_gen_divu_i32(t0, t0, t1);
-    }
-    if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
-    tcg_gen_br(l2);
-    gen_set_label(l1);
     if (sign) {
-        tcg_gen_sari_i32(t0, t0, 31);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i32(t2, t2, t3);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i32(t2, t2, t3);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_div_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
     } else {
-        tcg_gen_movi_i32(t0, 0);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_divu_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
     }
     if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 1);
-        tcg_gen_movi_tl(cpu_so, 1);
+        tcg_gen_extu_i32_tl(cpu_ov, t2);
+        tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
-    gen_set_label(l2);
-    tcg_gen_extu_i32_tl(ret, t0);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, ret);
 }
@@ -1062,37 +1183,41 @@ GEN_DIVE(divweo, divwe, 1);
 static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1,
                                      TCGv arg2, int sign, int compute_ov)
 {
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
 
-    tcg_gen_brcondi_i64(TCG_COND_EQ, arg2, 0, l1);
-    if (sign) {
-        TCGLabel *l3 = gen_new_label();
-        tcg_gen_brcondi_i64(TCG_COND_NE, arg2, -1, l3);
-        tcg_gen_brcondi_i64(TCG_COND_EQ, arg1, INT64_MIN, l1);
-        gen_set_label(l3);
-        tcg_gen_div_i64(ret, arg1, arg2);
-    } else {
-        tcg_gen_divu_i64(ret, arg1, arg2);
-    }
-    if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 0);
-    }
-    tcg_gen_br(l2);
-    gen_set_label(l1);
+    tcg_gen_mov_i64(t0, arg1);
+    tcg_gen_mov_i64(t1, arg2);
     if (sign) {
-        tcg_gen_sari_i64(ret, arg1, 63);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t0, INT64_MIN);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i64(t2, t2, t3);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i64(t2, t2, t3);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_div_i64(ret, t0, t1);
     } else {
-        tcg_gen_movi_i64(ret, 0);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t1, 0);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_divu_i64(ret, t0, t1);
     }
     if (compute_ov) {
-        tcg_gen_movi_tl(cpu_ov, 1);
-        tcg_gen_movi_tl(cpu_so, 1);
+        tcg_gen_mov_tl(cpu_ov, t2);
+        tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
     }
-    gen_set_label(l2);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, ret);
 }
+
 #define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov)                      \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
 {                                                                             \
@@ -1113,6 +1238,98 @@ GEN_DIVE(divde, divde, 0);
 GEN_DIVE(divdeo, divde, 1);
 #endif
 
+static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1,
+                                     TCGv arg2, int sign)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+
+    tcg_gen_trunc_tl_i32(t0, arg1);
+    tcg_gen_trunc_tl_i32(t1, arg2);
+    if (sign) {
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        TCGv_i32 t3 = tcg_temp_new_i32();
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i32(t2, t2, t3);
+        tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i32(t2, t2, t3);
+        tcg_gen_movi_i32(t3, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_rem_i32(t3, t0, t1);
+        tcg_gen_ext_i32_tl(ret, t3);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    } else {
+        TCGv_i32 t2 = tcg_const_i32(1);
+        TCGv_i32 t3 = tcg_const_i32(0);
+        tcg_gen_movcond_i32(TCG_COND_EQ, t1, t1, t3, t2, t1);
+        tcg_gen_remu_i32(t3, t0, t1);
+        tcg_gen_extu_i32_tl(ret, t3);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free_i32(t3);
+    }
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+}
+
+#define GEN_INT_ARITH_MODW(name, opc3, sign)                                \
+static void glue(gen_, name)(DisasContext *ctx)                             \
+{                                                                           \
+    gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)],                        \
+                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
+                      sign);                                                \
+}
+
+GEN_INT_ARITH_MODW(moduw, 0x08, 0);
+GEN_INT_ARITH_MODW(modsw, 0x18, 1);
+
+#if defined(TARGET_PPC64)
+static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1,
+                                     TCGv arg2, int sign)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_mov_i64(t0, arg1);
+    tcg_gen_mov_i64(t1, arg2);
+    if (sign) {
+        TCGv_i64 t2 = tcg_temp_new_i64();
+        TCGv_i64 t3 = tcg_temp_new_i64();
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t2, t0, INT64_MIN);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, -1);
+        tcg_gen_and_i64(t2, t2, t3);
+        tcg_gen_setcondi_i64(TCG_COND_EQ, t3, t1, 0);
+        tcg_gen_or_i64(t2, t2, t3);
+        tcg_gen_movi_i64(t3, 0);
+        tcg_gen_movcond_i64(TCG_COND_NE, t1, t2, t3, t2, t1);
+        tcg_gen_rem_i64(ret, t0, t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    } else {
+        TCGv_i64 t2 = tcg_const_i64(1);
+        TCGv_i64 t3 = tcg_const_i64(0);
+        tcg_gen_movcond_i64(TCG_COND_EQ, t1, t1, t3, t2, t1);
+        tcg_gen_remu_i64(ret, t0, t1);
+        tcg_temp_free_i64(t2);
+        tcg_temp_free_i64(t3);
+    }
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+#define GEN_INT_ARITH_MODD(name, opc3, sign)                            \
+static void glue(gen_, name)(DisasContext *ctx)                           \
+{                                                                         \
+  gen_op_arith_modd(ctx, cpu_gpr[rD(ctx->opcode)],                        \
+                    cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],   \
+                    sign);                                                \
+}
+
+GEN_INT_ARITH_MODD(modud, 0x08, 0);
+GEN_INT_ARITH_MODD(modsd, 0x18, 1);
+#endif
+
 /* mulhw  mulhw. */
 static void gen_mulhw(DisasContext *ctx)
 {
@@ -1428,6 +1645,16 @@ static void gen_cntlzw(DisasContext *ctx)
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
+
+/* cnttzw */
+static void gen_cnttzw(DisasContext *ctx)
+{
+    gen_helper_cnttzw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    }
+}
+
 /* eqv & eqv. */
 GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER);
 /* extsb & extsb. */
@@ -1448,7 +1675,7 @@ static void gen_pause(DisasContext *ctx)
     tcg_temp_free_i32(t0);
 
     /* Stop translation, this gives other CPUs a chance to run */
-    gen_exception_err(ctx, EXCP_HLT, 1);
+    gen_exception_nip(ctx, EXCP_HLT, ctx->nip);
 }
 #endif /* defined(TARGET_PPC64) */
 
@@ -1668,6 +1895,30 @@ static void gen_cntlzd(DisasContext *ctx)
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
+
+/* cnttzd */
+static void gen_cnttzd(DisasContext *ctx)
+{
+    gen_helper_cnttzd(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
+    }
+}
+
+/* darn */
+static void gen_darn(DisasContext *ctx)
+{
+    int l = L(ctx->opcode);
+
+    if (l == 0) {
+        gen_helper_darn32(cpu_gpr[rD(ctx->opcode)]);
+    } else if (l <= 2) {
+        /* Return 64-bit random for both CRN and RRN */
+        gen_helper_darn64(cpu_gpr[rD(ctx->opcode)]);
+    } else {
+        tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1);
+    }
+}
 #endif
 
 /***                             Integer rotate                            ***/
@@ -2106,6 +2357,30 @@ static void gen_sradi1(DisasContext *ctx)
     gen_sradi(ctx, 1);
 }
 
+/* extswsli & extswsli. */
+static inline void gen_extswsli(DisasContext *ctx, int n)
+{
+    int sh = SH(ctx->opcode) + (n << 5);
+    TCGv dst = cpu_gpr[rA(ctx->opcode)];
+    TCGv src = cpu_gpr[rS(ctx->opcode)];
+
+    tcg_gen_ext32s_tl(dst, src);
+    tcg_gen_shli_tl(dst, dst, sh);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_Rc0(ctx, dst);
+    }
+}
+
+static void gen_extswsli0(DisasContext *ctx)
+{
+    gen_extswsli(ctx, 0);
+}
+
+static void gen_extswsli1(DisasContext *ctx)
+{
+    gen_extswsli(ctx, 1);
+}
+
 /* srd & srd. */
 static void gen_srd(DisasContext *ctx)
 {
@@ -2126,627 +2401,31 @@ static void gen_srd(DisasContext *ctx)
 }
 #endif
 
-#if defined(TARGET_PPC64)
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
-{
-    TCGv_i32 tmp = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
-    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
-    tcg_temp_free_i32(tmp);
-}
-#else
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+/***                           Addressing modes                            ***/
+/* Register indirect with immediate index : EA = (rA|0) + SIMM */
+static inline void gen_addr_imm_index(DisasContext *ctx, TCGv EA,
+                                      target_long maskl)
 {
-    tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
-}
-#endif
+    target_long simm = SIMM(ctx->opcode);
 
-/***                       Floating-Point arithmetic                       ***/
-#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    gen_reset_fpstatus();                                                     \
-    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
-                     cpu_fpr[rA(ctx->opcode)],                                \
-                     cpu_fpr[rC(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);     \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
-                        cpu_fpr[rD(ctx->opcode)]);                            \
-    }                                                                         \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-}
-
-#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
-_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type);                     \
-_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type);
-
-#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    gen_reset_fpstatus();                                                     \
-    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
-                     cpu_fpr[rA(ctx->opcode)],                                \
-                     cpu_fpr[rB(ctx->opcode)]);                               \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
-                        cpu_fpr[rD(ctx->opcode)]);                            \
-    }                                                                         \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-}
-#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
-_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
-
-#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    gen_reset_fpstatus();                                                     \
-    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
-                     cpu_fpr[rA(ctx->opcode)],                                \
-                     cpu_fpr[rC(ctx->opcode)]);                               \
-    if (isfloat) {                                                            \
-        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
-                        cpu_fpr[rD(ctx->opcode)]);                            \
-    }                                                                         \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-}
-#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
-_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
-
-#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                           \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    gen_reset_fpstatus();                                                     \
-    gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
-                       cpu_fpr[rB(ctx->opcode)]);                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-}
-
-#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
-static void gen_f##name(DisasContext *ctx)                                    \
-{                                                                             \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    gen_reset_fpstatus();                                                     \
-    gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
-                       cpu_fpr[rB(ctx->opcode)]);                             \
-    if (set_fprf) {                                                           \
-        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
-    }                                                                         \
-    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
-        gen_set_cr1_from_fpscr(ctx);                                          \
-    }                                                                         \
-}
-
-/* fadd - fadds */
-GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT);
-/* fdiv - fdivs */
-GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT);
-/* fmul - fmuls */
-GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT);
-
-/* fre */
-GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT);
-
-/* fres */
-GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES);
-
-/* frsqrte */
-GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE);
-
-/* frsqrtes */
-static void gen_frsqrtes(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    gen_helper_frsqrte(cpu_fpr[rD(ctx->opcode)], cpu_env,
-                       cpu_fpr[rB(ctx->opcode)]);
-    gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
-                    cpu_fpr[rD(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* fsel */
-_GEN_FLOAT_ACB(sel, sel, 0x3F, 0x17, 0, 0, PPC_FLOAT_FSEL);
-/* fsub - fsubs */
-GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT);
-/* Optional: */
-
-/* fsqrt */
-static void gen_fsqrt(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env,
-                     cpu_fpr[rB(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-static void gen_fsqrts(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env,
-                     cpu_fpr[rB(ctx->opcode)]);
-    gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
-                    cpu_fpr[rD(ctx->opcode)]);
-    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/***                     Floating-Point multiply-and-add                   ***/
-/* fmadd - fmadds */
-GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT);
-/* fmsub - fmsubs */
-GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT);
-/* fnmadd - fnmadds */
-GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT);
-/* fnmsub - fnmsubs */
-GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT);
-
-/***                     Floating-Point round & convert                    ***/
-/* fctiw */
-GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT);
-/* fctiwu */
-GEN_FLOAT_B(ctiwu, 0x0E, 0x04, 0, PPC2_FP_CVT_ISA206);
-/* fctiwz */
-GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
-/* fctiwuz */
-GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
-/* frsp */
-GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
-/* fcfid */
-GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC2_FP_CVT_S64);
-/* fcfids */
-GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
-/* fcfidu */
-GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
-/* fcfidus */
-GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
-/* fctid */
-GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64);
-/* fctidu */
-GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
-/* fctidz */
-GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64);
-/* fctidu */
-GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
-
-/* frin */
-GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT);
-/* friz */
-GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT);
-/* frip */
-GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT);
-/* frim */
-GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
-
-static void gen_ftdiv(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
-                     cpu_fpr[rB(ctx->opcode)]);
-}
-
-static void gen_ftsqrt(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
-}
-
-
-
-/***                         Floating-Point compare                        ***/
-
-/* fcmpo */
-static void gen_fcmpo(DisasContext *ctx)
-{
-    TCGv_i32 crf;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    crf = tcg_const_i32(crfD(ctx->opcode));
-    gen_helper_fcmpo(cpu_env, cpu_fpr[rA(ctx->opcode)],
-                     cpu_fpr[rB(ctx->opcode)], crf);
-    tcg_temp_free_i32(crf);
-    gen_helper_float_check_status(cpu_env);
-}
-
-/* fcmpu */
-static void gen_fcmpu(DisasContext *ctx)
-{
-    TCGv_i32 crf;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    crf = tcg_const_i32(crfD(ctx->opcode));
-    gen_helper_fcmpu(cpu_env, cpu_fpr[rA(ctx->opcode)],
-                     cpu_fpr[rB(ctx->opcode)], crf);
-    tcg_temp_free_i32(crf);
-    gen_helper_float_check_status(cpu_env);
-}
-
-/***                         Floating-point move                           ***/
-/* fabs */
-/* XXX: beware that fabs never checks for NaNs nor update FPSCR */
-static void gen_fabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_andi_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
-                     ~(1ULL << 63));
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* fmr  - fmr. */
-/* XXX: beware that fmr never checks for NaNs nor update FPSCR */
-static void gen_fmr(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_mov_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* fnabs */
-/* XXX: beware that fnabs never checks for NaNs nor update FPSCR */
-static void gen_fnabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_ori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
-                    1ULL << 63);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* fneg */
-/* XXX: beware that fneg never checks for NaNs nor update FPSCR */
-static void gen_fneg(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_xori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
-                     1ULL << 63);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* fcpsgn: PowerPC 2.05 specification */
-/* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */
-static void gen_fcpsgn(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
-                        cpu_fpr[rB(ctx->opcode)], 0, 63);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-static void gen_fmrgew(DisasContext *ctx)
-{
-    TCGv_i64 b0;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    b0 = tcg_temp_new_i64();
-    tcg_gen_shri_i64(b0, cpu_fpr[rB(ctx->opcode)], 32);
-    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
-                        b0, 0, 32);
-    tcg_temp_free_i64(b0);
-}
-
-static void gen_fmrgow(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)],
-                        cpu_fpr[rB(ctx->opcode)],
-                        cpu_fpr[rA(ctx->opcode)],
-                        32, 32);
-}
-
-/***                  Floating-Point status & ctrl register                ***/
-
-/* mcrfs */
-static void gen_mcrfs(DisasContext *ctx)
-{
-    TCGv tmp = tcg_temp_new();
-    TCGv_i32 tmask;
-    TCGv_i64 tnew_fpscr = tcg_temp_new_i64();
-    int bfa;
-    int nibble;
-    int shift;
-
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    bfa = crfS(ctx->opcode);
-    nibble = 7 - bfa;
-    shift = 4 * nibble;
-    tcg_gen_shri_tl(tmp, cpu_fpscr, shift);
-    tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], tmp);
-    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 0xf);
-    tcg_temp_free(tmp);
-    tcg_gen_extu_tl_i64(tnew_fpscr, cpu_fpscr);
-    /* Only the exception bits (including FX) should be cleared if read */
-    tcg_gen_andi_i64(tnew_fpscr, tnew_fpscr, ~((0xF << shift) & FP_EX_CLEAR_BITS));
-    /* FEX and VX need to be updated, so don't set fpscr directly */
-    tmask = tcg_const_i32(1 << nibble);
-    gen_helper_store_fpscr(cpu_env, tnew_fpscr, tmask);
-    tcg_temp_free_i32(tmask);
-    tcg_temp_free_i64(tnew_fpscr);
-}
-
-/* mffs */
-static void gen_mffs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_reset_fpstatus();
-    tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
-    if (unlikely(Rc(ctx->opcode))) {
-        gen_set_cr1_from_fpscr(ctx);
-    }
-}
-
-/* mtfsb0 */
-static void gen_mtfsb0(DisasContext *ctx)
-{
-    uint8_t crb;
-
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    crb = 31 - crbD(ctx->opcode);
-    gen_reset_fpstatus();
-    if (likely(crb != FPSCR_FEX && crb != FPSCR_VX)) {
-        TCGv_i32 t0;
-        /* NIP cannot be restored if the memory exception comes from an helper */
-        gen_update_nip(ctx, ctx->nip - 4);
-        t0 = tcg_const_i32(crb);
-        gen_helper_fpscr_clrbit(cpu_env, t0);
-        tcg_temp_free_i32(t0);
-    }
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
-    }
-}
-
-/* mtfsb1 */
-static void gen_mtfsb1(DisasContext *ctx)
-{
-    uint8_t crb;
-
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    crb = 31 - crbD(ctx->opcode);
-    gen_reset_fpstatus();
-    /* XXX: we pretend we can only do IEEE floating-point computations */
-    if (likely(crb != FPSCR_FEX && crb != FPSCR_VX && crb != FPSCR_NI)) {
-        TCGv_i32 t0;
-        /* NIP cannot be restored if the memory exception comes from an helper */
-        gen_update_nip(ctx, ctx->nip - 4);
-        t0 = tcg_const_i32(crb);
-        gen_helper_fpscr_setbit(cpu_env, t0);
-        tcg_temp_free_i32(t0);
-    }
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
-    }
-    /* We can raise a differed exception */
-    gen_helper_float_check_status(cpu_env);
-}
-
-/* mtfsf */
-static void gen_mtfsf(DisasContext *ctx)
-{
-    TCGv_i32 t0;
-    int flm, l, w;
-
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    flm = FPFLM(ctx->opcode);
-    l = FPL(ctx->opcode);
-    w = FPW(ctx->opcode);
-    if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) {
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-        return;
-    }
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    if (l) {
-        t0 = tcg_const_i32((ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff);
-    } else {
-        t0 = tcg_const_i32(flm << (w * 8));
-    }
-    gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0);
-    tcg_temp_free_i32(t0);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
-    }
-    /* We can raise a differed exception */
-    gen_helper_float_check_status(cpu_env);
-}
-
-/* mtfsfi */
-static void gen_mtfsfi(DisasContext *ctx)
-{
-    int bf, sh, w;
-    TCGv_i64 t0;
-    TCGv_i32 t1;
-
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    w = FPW(ctx->opcode);
-    bf = FPBF(ctx->opcode);
-    if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) {
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-        return;
-    }
-    sh = (8 * w) + 7 - bf;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_reset_fpstatus();
-    t0 = tcg_const_i64(((uint64_t)FPIMM(ctx->opcode)) << (4 * sh));
-    t1 = tcg_const_i32(1 << sh);
-    gen_helper_store_fpscr(cpu_env, t0, t1);
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i32(t1);
-    if (unlikely(Rc(ctx->opcode) != 0)) {
-        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
-        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
-    }
-    /* We can raise a differed exception */
-    gen_helper_float_check_status(cpu_env);
-}
-
-/***                           Addressing modes                            ***/
-/* Register indirect with immediate index : EA = (rA|0) + SIMM */
-static inline void gen_addr_imm_index(DisasContext *ctx, TCGv EA,
-                                      target_long maskl)
-{
-    target_long simm = SIMM(ctx->opcode);
-
-    simm &= ~maskl;
-    if (rA(ctx->opcode) == 0) {
-        if (NARROW_MODE(ctx)) {
-            simm = (uint32_t)simm;
-        }
-        tcg_gen_movi_tl(EA, simm);
-    } else if (likely(simm != 0)) {
-        tcg_gen_addi_tl(EA, cpu_gpr[rA(ctx->opcode)], simm);
-        if (NARROW_MODE(ctx)) {
-            tcg_gen_ext32u_tl(EA, EA);
-        }
-    } else {
-        if (NARROW_MODE(ctx)) {
-            tcg_gen_ext32u_tl(EA, cpu_gpr[rA(ctx->opcode)]);
-        } else {
-            tcg_gen_mov_tl(EA, cpu_gpr[rA(ctx->opcode)]);
-        }
-    }
+    simm &= ~maskl;
+    if (rA(ctx->opcode) == 0) {
+        if (NARROW_MODE(ctx)) {
+            simm = (uint32_t)simm;
+        }
+        tcg_gen_movi_tl(EA, simm);
+    } else if (likely(simm != 0)) {
+        tcg_gen_addi_tl(EA, cpu_gpr[rA(ctx->opcode)], simm);
+        if (NARROW_MODE(ctx)) {
+            tcg_gen_ext32u_tl(EA, EA);
+        }
+    } else {
+        if (NARROW_MODE(ctx)) {
+            tcg_gen_ext32u_tl(EA, cpu_gpr[rA(ctx->opcode)]);
+        } else {
+            tcg_gen_mov_tl(EA, cpu_gpr[rA(ctx->opcode)]);
+        }
+    }
 }
 
 static inline void gen_addr_reg_index(DisasContext *ctx, TCGv EA)
@@ -2790,12 +2469,11 @@ static inline void gen_check_align(DisasContext *ctx, TCGv EA, int mask)
     TCGLabel *l1 = gen_new_label();
     TCGv t0 = tcg_temp_new();
     TCGv_i32 t1, t2;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     tcg_gen_andi_tl(t0, EA, mask);
     tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
     t1 = tcg_const_i32(POWERPC_EXCP_ALIGN);
-    t2 = tcg_const_i32(0);
+    t2 = tcg_const_i32(ctx->opcode & 0x03FF0000);
+    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_raise_exception_err(cpu_env, t1, t2);
     tcg_temp_free_i32(t1);
     tcg_temp_free_i32(t2);
@@ -2803,88 +2481,82 @@ static inline void gen_check_align(DisasContext *ctx, TCGv EA, int mask)
     tcg_temp_free(t0);
 }
 
-/***                             Integer load                              ***/
-static inline void gen_qemu_ld8u(DisasContext *ctx, TCGv arg1, TCGv arg2)
+static inline void gen_align_no_le(DisasContext *ctx)
 {
-    tcg_gen_qemu_ld8u(arg1, arg2, ctx->mem_idx);
+    gen_exception_err(ctx, POWERPC_EXCP_ALIGN,
+                      (ctx->opcode & 0x03FF0000) | POWERPC_EXCP_ALIGN_LE);
 }
 
-static inline void gen_qemu_ld16u(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
-}
+/***                             Integer load                              ***/
+#define DEF_MEMOP(op) ((op) | ctx->default_tcg_memop_mask)
+#define BSWAP_MEMOP(op) ((op) | (ctx->default_tcg_memop_mask ^ MO_BSWAP))
 
-static inline void gen_qemu_ld16s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_SW | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
+#define GEN_QEMU_LOAD_TL(ldop, op)                                      \
+static void glue(gen_qemu_, ldop)(DisasContext *ctx,                    \
+                                  TCGv val,                             \
+                                  TCGv addr)                            \
+{                                                                       \
+    tcg_gen_qemu_ld_tl(val, addr, ctx->mem_idx, op);                    \
 }
 
-static inline void gen_qemu_ld32u(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UL | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
-}
+GEN_QEMU_LOAD_TL(ld8u,  DEF_MEMOP(MO_UB))
+GEN_QEMU_LOAD_TL(ld16u, DEF_MEMOP(MO_UW))
+GEN_QEMU_LOAD_TL(ld16s, DEF_MEMOP(MO_SW))
+GEN_QEMU_LOAD_TL(ld32u, DEF_MEMOP(MO_UL))
+GEN_QEMU_LOAD_TL(ld32s, DEF_MEMOP(MO_SL))
 
-static void gen_qemu_ld32u_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    gen_qemu_ld32u(ctx, tmp, addr);
-    tcg_gen_extu_tl_i64(val, tmp);
-    tcg_temp_free(tmp);
-}
+GEN_QEMU_LOAD_TL(ld16ur, BSWAP_MEMOP(MO_UW))
+GEN_QEMU_LOAD_TL(ld32ur, BSWAP_MEMOP(MO_UL))
 
-static inline void gen_qemu_ld32s(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_SL | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
+#define GEN_QEMU_LOAD_64(ldop, op)                                  \
+static void glue(gen_qemu_, glue(ldop, _i64))(DisasContext *ctx,    \
+                                             TCGv_i64 val,          \
+                                             TCGv addr)             \
+{                                                                   \
+    tcg_gen_qemu_ld_i64(val, addr, ctx->mem_idx, op);               \
 }
 
-static void gen_qemu_ld32s_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    gen_qemu_ld32s(ctx, tmp, addr);
-    tcg_gen_ext_tl_i64(val, tmp);
-    tcg_temp_free(tmp);
-}
+GEN_QEMU_LOAD_64(ld8u,  DEF_MEMOP(MO_UB))
+GEN_QEMU_LOAD_64(ld16u, DEF_MEMOP(MO_UW))
+GEN_QEMU_LOAD_64(ld32u, DEF_MEMOP(MO_UL))
+GEN_QEMU_LOAD_64(ld32s, DEF_MEMOP(MO_SL))
+GEN_QEMU_LOAD_64(ld64,  DEF_MEMOP(MO_Q))
 
-static inline void gen_qemu_ld64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_Q | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_ld_i64(arg1, arg2, ctx->mem_idx, op);
-}
+#if defined(TARGET_PPC64)
+GEN_QEMU_LOAD_64(ld64ur, BSWAP_MEMOP(MO_Q))
+#endif
 
-static inline void gen_qemu_st8(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    tcg_gen_qemu_st8(arg1, arg2, ctx->mem_idx);
+#define GEN_QEMU_STORE_TL(stop, op)                                     \
+static void glue(gen_qemu_, stop)(DisasContext *ctx,                    \
+                                  TCGv val,                             \
+                                  TCGv addr)                            \
+{                                                                       \
+    tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, op);                    \
 }
 
-static inline void gen_qemu_st16(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UW | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_st_tl(arg1, arg2, ctx->mem_idx, op);
-}
+GEN_QEMU_STORE_TL(st8,  DEF_MEMOP(MO_UB))
+GEN_QEMU_STORE_TL(st16, DEF_MEMOP(MO_UW))
+GEN_QEMU_STORE_TL(st32, DEF_MEMOP(MO_UL))
 
-static inline void gen_qemu_st32(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UL | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_st_tl(arg1, arg2, ctx->mem_idx, op);
-}
+GEN_QEMU_STORE_TL(st16r, BSWAP_MEMOP(MO_UW))
+GEN_QEMU_STORE_TL(st32r, BSWAP_MEMOP(MO_UL))
 
-static void gen_qemu_st32_i64(DisasContext *ctx, TCGv_i64 val, TCGv addr)
-{
-    TCGv tmp = tcg_temp_new();
-    tcg_gen_trunc_i64_tl(tmp, val);
-    gen_qemu_st32(ctx, tmp, addr);
-    tcg_temp_free(tmp);
+#define GEN_QEMU_STORE_64(stop, op)                               \
+static void glue(gen_qemu_, glue(stop, _i64))(DisasContext *ctx,  \
+                                              TCGv_i64 val,       \
+                                              TCGv addr)          \
+{                                                                 \
+    tcg_gen_qemu_st_i64(val, addr, ctx->mem_idx, op);             \
 }
 
-static inline void gen_qemu_st64(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_Q | ctx->default_tcg_memop_mask;
-    tcg_gen_qemu_st_i64(arg1, arg2, ctx->mem_idx, op);
-}
+GEN_QEMU_STORE_64(st8,  DEF_MEMOP(MO_UB))
+GEN_QEMU_STORE_64(st16, DEF_MEMOP(MO_UW))
+GEN_QEMU_STORE_64(st32, DEF_MEMOP(MO_UL))
+GEN_QEMU_STORE_64(st64, DEF_MEMOP(MO_Q))
+
+#if defined(TARGET_PPC64)
+GEN_QEMU_STORE_64(st64r, BSWAP_MEMOP(MO_Q))
+#endif
 
 #define GEN_LD(name, ldop, opc, type)                                         \
 static void glue(gen_, name)(DisasContext *ctx)                                       \
@@ -2972,12 +2644,12 @@ GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B);
 /* lwax */
 GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B);
 /* ldux */
-GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B);
+GEN_LDUX(ld, ld64_i64, 0x15, 0x01, PPC_64B);
 /* ldx */
-GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B);
+GEN_LDX(ld, ld64_i64, 0x15, 0x00, PPC_64B);
 
 /* CI load/store variants */
-GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
+GEN_LDX_HVRM(ldcix, ld64_i64, 0x15, 0x1b, PPC_CILDST)
 GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x15, PPC_CILDST)
 GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
 GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
@@ -3000,7 +2672,7 @@ static void gen_ld(DisasContext *ctx)
         gen_qemu_ld32s(ctx, cpu_gpr[rD(ctx->opcode)], EA);
     } else {
         /* ld - ldu */
-        gen_qemu_ld64(ctx, cpu_gpr[rD(ctx->opcode)], EA);
+        gen_qemu_ld64_i64(ctx, cpu_gpr[rD(ctx->opcode)], EA);
     }
     if (Rc(ctx->opcode))
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);
@@ -3023,10 +2695,9 @@ static void gen_lq(DisasContext *ctx)
     }
 
     if (!le_is_supported && ctx->le_mode) {
-        gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
+        gen_align_no_le(ctx);
         return;
     }
-
     ra = rA(ctx->opcode);
     rd = rD(ctx->opcode);
     if (unlikely((rd & 1) || rd == ra)) {
@@ -3038,16 +2709,16 @@ static void gen_lq(DisasContext *ctx)
     EA = tcg_temp_new();
     gen_addr_imm_index(ctx, EA, 0x0F);
 
-    /* We only need to swap high and low halves. gen_qemu_ld64 does necessary
-       64-bit byteswap already. */
+    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
+       necessary 64-bit byteswap already. */
     if (unlikely(ctx->le_mode)) {
-        gen_qemu_ld64(ctx, cpu_gpr[rd+1], EA);
+        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_gpr[rd], EA);
+        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
     } else {
-        gen_qemu_ld64(ctx, cpu_gpr[rd], EA);
+        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
         gen_addr_add(ctx, EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_gpr[rd+1], EA);
+        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
     }
     tcg_temp_free(EA);
 }
@@ -3130,9 +2801,9 @@ GEN_STS(sth, st16, 0x0C, PPC_INTEGER);
 /* stw stwu stwux stwx */
 GEN_STS(stw, st32, 0x04, PPC_INTEGER);
 #if defined(TARGET_PPC64)
-GEN_STUX(std, st64, 0x15, 0x05, PPC_64B);
-GEN_STX(std, st64, 0x15, 0x04, PPC_64B);
-GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
+GEN_STUX(std, st64_i64, 0x15, 0x05, PPC_64B);
+GEN_STX(std, st64_i64, 0x15, 0x04, PPC_64B);
+GEN_STX_HVRM(stdcix, st64_i64, 0x15, 0x1f, PPC_CILDST)
 GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
 GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
 GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
@@ -3157,7 +2828,7 @@ static void gen_std(DisasContext *ctx)
         }
 
         if (!le_is_supported && ctx->le_mode) {
-            gen_exception_err(ctx, POWERPC_EXCP_ALIGN, POWERPC_EXCP_ALIGN_LE);
+            gen_align_no_le(ctx);
             return;
         }
 
@@ -3169,16 +2840,16 @@ static void gen_std(DisasContext *ctx)
         EA = tcg_temp_new();
         gen_addr_imm_index(ctx, EA, 0x03);
 
-        /* We only need to swap high and low halves. gen_qemu_st64 does
+        /* We only need to swap high and low halves. gen_qemu_st64_i64 does
            necessary 64-bit byteswap already. */
         if (unlikely(ctx->le_mode)) {
-            gen_qemu_st64(ctx, cpu_gpr[rs+1], EA);
+            gen_qemu_st64_i64(ctx, cpu_gpr[rs + 1], EA);
             gen_addr_add(ctx, EA, EA, 8);
-            gen_qemu_st64(ctx, cpu_gpr[rs], EA);
+            gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
         } else {
-            gen_qemu_st64(ctx, cpu_gpr[rs], EA);
+            gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
             gen_addr_add(ctx, EA, EA, 8);
-            gen_qemu_st64(ctx, cpu_gpr[rs+1], EA);
+            gen_qemu_st64_i64(ctx, cpu_gpr[rs + 1], EA);
         }
         tcg_temp_free(EA);
     } else {
@@ -3192,7 +2863,7 @@ static void gen_std(DisasContext *ctx)
         gen_set_access_type(ctx, ACCESS_INT);
         EA = tcg_temp_new();
         gen_addr_imm_index(ctx, EA, 0x03);
-        gen_qemu_st64(ctx, cpu_gpr[rs], EA);
+        gen_qemu_st64_i64(ctx, cpu_gpr[rs], EA);
         if (Rc(ctx->opcode))
             tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);
         tcg_temp_free(EA);
@@ -3202,57 +2873,23 @@ static void gen_std(DisasContext *ctx)
 /***                Integer load and store with byte reverse               ***/
 
 /* lhbrx */
-static inline void gen_qemu_ld16ur(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UW | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
-}
 GEN_LDX(lhbr, ld16ur, 0x16, 0x18, PPC_INTEGER);
 
 /* lwbrx */
-static inline void gen_qemu_ld32ur(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UL | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_ld_tl(arg1, arg2, ctx->mem_idx, op);
-}
 GEN_LDX(lwbr, ld32ur, 0x16, 0x10, PPC_INTEGER);
 
 #if defined(TARGET_PPC64)
 /* ldbrx */
-static inline void gen_qemu_ld64ur(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_ld_i64(arg1, arg2, ctx->mem_idx, op);
-}
-GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE);
+GEN_LDX_E(ldbr, ld64ur_i64, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE);
+/* stdbrx */
+GEN_STX_E(stdbr, st64r_i64, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE);
 #endif  /* TARGET_PPC64 */
 
 /* sthbrx */
-static inline void gen_qemu_st16r(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UW | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_st_tl(arg1, arg2, ctx->mem_idx, op);
-}
 GEN_STX(sthbr, st16r, 0x16, 0x1C, PPC_INTEGER);
-
 /* stwbrx */
-static inline void gen_qemu_st32r(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_UL | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_st_tl(arg1, arg2, ctx->mem_idx, op);
-}
 GEN_STX(stwbr, st32r, 0x16, 0x14, PPC_INTEGER);
 
-#if defined(TARGET_PPC64)
-/* stdbrx */
-static inline void gen_qemu_st64r(DisasContext *ctx, TCGv arg1, TCGv arg2)
-{
-    TCGMemOp op = MO_Q | (ctx->default_tcg_memop_mask ^ MO_BSWAP);
-    tcg_gen_qemu_st_i64(arg1, arg2, ctx->mem_idx, op);
-}
-GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE);
-#endif  /* TARGET_PPC64 */
-
 /***                    Integer load and store multiple                    ***/
 
 /* lmw */
@@ -3260,9 +2897,12 @@ static void gen_lmw(DisasContext *ctx)
 {
     TCGv t0;
     TCGv_i32 t1;
+
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     t1 = tcg_const_i32(rD(ctx->opcode));
     gen_addr_imm_index(ctx, t0, 0);
@@ -3276,9 +2916,12 @@ static void gen_stmw(DisasContext *ctx)
 {
     TCGv t0;
     TCGv_i32 t1;
+
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     t1 = tcg_const_i32(rS(ctx->opcode));
     gen_addr_imm_index(ctx, t0, 0);
@@ -3304,6 +2947,10 @@ static void gen_lswi(DisasContext *ctx)
     int ra = rA(ctx->opcode);
     int nr;
 
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     if (nb == 0)
         nb = 32;
     nr = (nb + 3) / 4;
@@ -3312,8 +2959,6 @@ static void gen_lswi(DisasContext *ctx)
         return;
     }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     gen_addr_register(ctx, t0);
     t1 = tcg_const_i32(nb);
@@ -3329,9 +2974,12 @@ static void gen_lswx(DisasContext *ctx)
 {
     TCGv t0;
     TCGv_i32 t1, t2, t3;
+
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     t1 = tcg_const_i32(rD(ctx->opcode));
@@ -3350,9 +2998,12 @@ static void gen_stswi(DisasContext *ctx)
     TCGv t0;
     TCGv_i32 t1, t2;
     int nb = NB(ctx->opcode);
+
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     gen_addr_register(ctx, t0);
     if (nb == 0)
@@ -3370,9 +3021,12 @@ static void gen_stswx(DisasContext *ctx)
 {
     TCGv t0;
     TCGv_i32 t1, t2;
+
+    if (ctx->le_mode) {
+        gen_align_no_le(ctx);
+        return;
+    }
     gen_set_access_type(ctx, ACCESS_INT);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     t1 = tcg_temp_new_i32();
@@ -3392,7 +3046,7 @@ static void gen_eieio(DisasContext *ctx)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static inline void gen_check_tlb_flush(DisasContext *ctx)
+static inline void gen_check_tlb_flush(DisasContext *ctx, bool global)
 {
     TCGv_i32 t;
     TCGLabel *l;
@@ -3404,12 +3058,16 @@ static inline void gen_check_tlb_flush(DisasContext *ctx)
     t = tcg_temp_new_i32();
     tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
     tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
-    gen_helper_check_tlb_flush(cpu_env);
+    if (global) {
+        gen_helper_check_tlb_flush_global(cpu_env);
+    } else {
+        gen_helper_check_tlb_flush_local(cpu_env);
+    }
     gen_set_label(l);
     tcg_temp_free_i32(t);
 }
 #else
-static inline void gen_check_tlb_flush(DisasContext *ctx) { }
+static inline void gen_check_tlb_flush(DisasContext *ctx, bool global) { }
 #endif
 
 /* isync */
@@ -3420,53 +3078,51 @@ static void gen_isync(DisasContext *ctx)
      * kernel mode however so check MSR_PR
      */
     if (!ctx->pr) {
-        gen_check_tlb_flush(ctx);
+        gen_check_tlb_flush(ctx, false);
     }
     gen_stop_exception(ctx);
 }
 
-#define LARX(name, len, loadop)                                      \
+#define MEMOP_GET_SIZE(x)  (1 << ((x) & MO_SIZE))
+
+#define LARX(name, memop)                                            \
 static void gen_##name(DisasContext *ctx)                            \
 {                                                                    \
     TCGv t0;                                                         \
     TCGv gpr = cpu_gpr[rD(ctx->opcode)];                             \
+    int len = MEMOP_GET_SIZE(memop);                                 \
     gen_set_access_type(ctx, ACCESS_RES);                            \
     t0 = tcg_temp_local_new();                                       \
     gen_addr_reg_index(ctx, t0);                                     \
     if ((len) > 1) {                                                 \
         gen_check_align(ctx, t0, (len)-1);                           \
     }                                                                \
-    gen_qemu_##loadop(ctx, gpr, t0);                                 \
+    tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop);                \
     tcg_gen_mov_tl(cpu_reserve, t0);                                 \
     tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \
     tcg_temp_free(t0);                                               \
 }
 
 /* lwarx */
-LARX(lbarx, 1, ld8u);
-LARX(lharx, 2, ld16u);
-LARX(lwarx, 4, ld32u);
-
+LARX(lbarx, DEF_MEMOP(MO_UB))
+LARX(lharx, DEF_MEMOP(MO_UW))
+LARX(lwarx, DEF_MEMOP(MO_UL))
 
 #if defined(CONFIG_USER_ONLY)
 static void gen_conditional_store(DisasContext *ctx, TCGv EA,
-                                  int reg, int size)
+                                  int reg, int memop)
 {
     TCGv t0 = tcg_temp_new();
-    uint32_t save_exception = ctx->exception;
 
     tcg_gen_st_tl(EA, cpu_env, offsetof(CPUPPCState, reserve_ea));
-    tcg_gen_movi_tl(t0, (size << 5) | reg);
+    tcg_gen_movi_tl(t0, (MEMOP_GET_SIZE(memop) << 5) | reg);
     tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, reserve_info));
     tcg_temp_free(t0);
-    gen_update_nip(ctx, ctx->nip-4);
-    ctx->exception = POWERPC_EXCP_BRANCH;
-    gen_exception(ctx, POWERPC_EXCP_STCX);
-    ctx->exception = save_exception;
+    gen_exception_err(ctx, POWERPC_EXCP_STCX, 0);
 }
 #else
 static void gen_conditional_store(DisasContext *ctx, TCGv EA,
-                                  int reg, int size)
+                                  int reg, int memop)
 {
     TCGLabel *l1;
 
@@ -3474,65 +3130,36 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA,
     l1 = gen_new_label();
     tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
     tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
-#if defined(TARGET_PPC64)
-    if (size == 8) {
-        gen_qemu_st64(ctx, cpu_gpr[reg], EA);
-    } else
-#endif
-    if (size == 4) {
-        gen_qemu_st32(ctx, cpu_gpr[reg], EA);
-    } else if (size == 2) {
-        gen_qemu_st16(ctx, cpu_gpr[reg], EA);
-#if defined(TARGET_PPC64)
-    } else if (size == 16) {
-        TCGv gpr1, gpr2 , EA8;
-        if (unlikely(ctx->le_mode)) {
-            gpr1 = cpu_gpr[reg+1];
-            gpr2 = cpu_gpr[reg];
-        } else {
-            gpr1 = cpu_gpr[reg];
-            gpr2 = cpu_gpr[reg+1];
-        }
-        gen_qemu_st64(ctx, gpr1, EA);
-        EA8 = tcg_temp_local_new();
-        gen_addr_add(ctx, EA8, EA, 8);
-        gen_qemu_st64(ctx, gpr2, EA8);
-        tcg_temp_free(EA8);
-#endif
-    } else {
-        gen_qemu_st8(ctx, cpu_gpr[reg], EA);
-    }
+    tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop);
     gen_set_label(l1);
     tcg_gen_movi_tl(cpu_reserve, -1);
 }
 #endif
 
-#define STCX(name, len)                                   \
-static void gen_##name(DisasContext *ctx)                 \
-{                                                         \
-    TCGv t0;                                              \
-    if (unlikely((len == 16) && (rD(ctx->opcode) & 1))) { \
-        gen_inval_exception(ctx,                          \
-                            POWERPC_EXCP_INVAL_INVAL);    \
-        return;                                           \
-    }                                                     \
-    gen_set_access_type(ctx, ACCESS_RES);                 \
-    t0 = tcg_temp_local_new();                            \
-    gen_addr_reg_index(ctx, t0);                          \
-    if (len > 1) {                                        \
-        gen_check_align(ctx, t0, (len)-1);                \
-    }                                                     \
-    gen_conditional_store(ctx, t0, rS(ctx->opcode), len); \
-    tcg_temp_free(t0);                                    \
-}
-
-STCX(stbcx_, 1);
-STCX(sthcx_, 2);
-STCX(stwcx_, 4);
+#define STCX(name, memop)                                   \
+static void gen_##name(DisasContext *ctx)                   \
+{                                                           \
+    TCGv t0;                                                \
+    int len = MEMOP_GET_SIZE(memop);                        \
+    gen_set_access_type(ctx, ACCESS_RES);                   \
+    t0 = tcg_temp_local_new();                              \
+    gen_addr_reg_index(ctx, t0);                            \
+    if (len > 1) {                                          \
+        gen_check_align(ctx, t0, (len) - 1);                \
+    }                                                       \
+    gen_conditional_store(ctx, t0, rS(ctx->opcode), memop); \
+    tcg_temp_free(t0);                                      \
+}
+
+STCX(stbcx_, DEF_MEMOP(MO_UB))
+STCX(sthcx_, DEF_MEMOP(MO_UW))
+STCX(stwcx_, DEF_MEMOP(MO_UL))
 
 #if defined(TARGET_PPC64)
 /* ldarx */
-LARX(ldarx, 8, ld64);
+LARX(ldarx, DEF_MEMOP(MO_Q))
+/* stdcx. */
+STCX(stdcx_, DEF_MEMOP(MO_Q))
 
 /* lqarx */
 static void gen_lqarx(DisasContext *ctx)
@@ -3558,21 +3185,63 @@ static void gen_lqarx(DisasContext *ctx)
         gpr1 = cpu_gpr[rd];
         gpr2 = cpu_gpr[rd+1];
     }
-    gen_qemu_ld64(ctx, gpr1, EA);
+    tcg_gen_qemu_ld_i64(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
     tcg_gen_mov_tl(cpu_reserve, EA);
-
     gen_addr_add(ctx, EA, EA, 8);
-    gen_qemu_ld64(ctx, gpr2, EA);
+    tcg_gen_qemu_ld_i64(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
 
     tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));
     tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));
+    tcg_temp_free(EA);
+}
+
+/* stqcx. */
+static void gen_stqcx_(DisasContext *ctx)
+{
+    TCGv EA;
+    int reg = rS(ctx->opcode);
+    int len = 16;
+#if !defined(CONFIG_USER_ONLY)
+    TCGLabel *l1;
+    TCGv gpr1, gpr2;
+#endif
+
+    if (unlikely((rD(ctx->opcode) & 1))) {
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_RES);
+    EA = tcg_temp_local_new();
+    gen_addr_reg_index(ctx, EA);
+    if (len > 1) {
+        gen_check_align(ctx, EA, (len) - 1);
+    }
+
+#if defined(CONFIG_USER_ONLY)
+    gen_conditional_store(ctx, EA, reg, 16);
+#else
+    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+    l1 = gen_new_label();
+    tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
+    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+
+    if (unlikely(ctx->le_mode)) {
+        gpr1 = cpu_gpr[reg + 1];
+        gpr2 = cpu_gpr[reg];
+    } else {
+        gpr1 = cpu_gpr[reg];
+        gpr2 = cpu_gpr[reg + 1];
+    }
+    tcg_gen_qemu_st_tl(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
+    gen_addr_add(ctx, EA, EA, 8);
+    tcg_gen_qemu_st_tl(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
 
+    gen_set_label(l1);
+    tcg_gen_movi_tl(cpu_reserve, -1);
+#endif
     tcg_temp_free(EA);
 }
 
-/* stdcx. */
-STCX(stdcx_, 8);
-STCX(stqcx_, 16);
 #endif /* defined(TARGET_PPC64) */
 
 /* sync */
@@ -3589,7 +3258,7 @@ static void gen_sync(DisasContext *ctx)
      * check MSR_PR as well.
      */
     if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
-        gen_check_tlb_flush(ctx);
+        gen_check_tlb_flush(ctx, true);
     }
 }
 
@@ -3601,7 +3270,7 @@ static void gen_wait(DisasContext *ctx)
                    -offsetof(PowerPCCPU, env) + offsetof(CPUState, halted));
     tcg_temp_free_i32(t0);
     /* Stop translation, as the CPU is supposed to sleep from now */
-    gen_exception_err(ctx, EXCP_HLT, 1);
+    gen_exception_nip(ctx, EXCP_HLT, ctx->nip);
 }
 
 #if defined(TARGET_PPC64)
@@ -3666,420 +3335,87 @@ static void gen_rvwinkle(DisasContext *ctx)
 }
 #endif /* #if defined(TARGET_PPC64) */
 
-/***                         Floating-point load                           ***/
-#define GEN_LDF(name, ldop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDUF(name, ldop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDUXF(name, ldop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
-static void glue(gen_, name##x)(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_LDFS(name, ldop, op, type)                                        \
-GEN_LDF(name, ldop, op | 0x20, type);                                         \
-GEN_LDUF(name, ldop, op | 0x21, type);                                        \
-GEN_LDUXF(name, ldop, op | 0x01, type);                                       \
-GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
-
-static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
 {
-    TCGv t0 = tcg_temp_new();
-    TCGv_i32 t1 = tcg_temp_new_i32();
-    gen_qemu_ld32u(ctx, t0, arg2);
-    tcg_gen_trunc_tl_i32(t1, t0);
-    tcg_temp_free(t0);
-    gen_helper_float32_to_float64(arg1, cpu_env, t1);
-    tcg_temp_free_i32(t1);
+#if defined(TARGET_PPC64)
+    if (ctx->has_cfar)
+        tcg_gen_movi_tl(cpu_cfar, nip);
+#endif
 }
 
- /* lfd lfdu lfdux lfdx */
-GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT);
- /* lfs lfsu lfsux lfsx */
-GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT);
-
-/* lfdp */
-static void gen_lfdp(DisasContext *ctx)
+static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
 {
-    TCGv EA;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    gen_addr_imm_index(ctx, EA, 0);
-    /* We only need to swap high and low halves. gen_qemu_ld64 does necessary
-       64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-    } else {
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+    if (unlikely(ctx->singlestep_enabled)) {
+        return false;
     }
-    tcg_temp_free(EA);
+
+#ifndef CONFIG_USER_ONLY
+    return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
+#else
+    return true;
+#endif
 }
 
-/* lfdpx */
-static void gen_lfdpx(DisasContext *ctx)
+/***                                Branch                                 ***/
+static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
 {
-    TCGv EA;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
+    if (NARROW_MODE(ctx)) {
+        dest = (uint32_t) dest;
     }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    /* We only need to swap high and low halves. gen_qemu_ld64 does necessary
-       64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    if (use_goto_tb(ctx, dest)) {
+        tcg_gen_goto_tb(n);
+        tcg_gen_movi_tl(cpu_nip, dest & ~3);
+        tcg_gen_exit_tb((uintptr_t)ctx->tb + n);
     } else {
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_ld64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+        tcg_gen_movi_tl(cpu_nip, dest & ~3);
+        if (unlikely(ctx->singlestep_enabled)) {
+            if ((ctx->singlestep_enabled &
+                (CPU_BRANCH_STEP | CPU_SINGLE_STEP)) &&
+                (ctx->exception == POWERPC_EXCP_BRANCH ||
+                 ctx->exception == POWERPC_EXCP_TRACE)) {
+                gen_exception_nip(ctx, POWERPC_EXCP_TRACE, dest);
+            }
+            if (ctx->singlestep_enabled & GDBSTUB_SINGLE_STEP) {
+                gen_debug_exception(ctx);
+            }
+        }
+        tcg_gen_exit_tb(0);
     }
-    tcg_temp_free(EA);
 }
 
-/* lfiwax */
-static void gen_lfiwax(DisasContext *ctx)
+static inline void gen_setlr(DisasContext *ctx, target_ulong nip)
 {
-    TCGv EA;
-    TCGv t0;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
+    if (NARROW_MODE(ctx)) {
+        nip = (uint32_t)nip;
     }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld32s(ctx, t0, EA);
-    tcg_gen_ext_tl_i64(cpu_fpr[rD(ctx->opcode)], t0);
-    tcg_temp_free(EA);
-    tcg_temp_free(t0);
+    tcg_gen_movi_tl(cpu_lr, nip);
 }
 
-/* lfiwzx */
-static void gen_lfiwzx(DisasContext *ctx)
+/* b ba bl bla */
+static void gen_b(DisasContext *ctx)
 {
-    TCGv EA;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
+    target_ulong li, target;
+
+    ctx->exception = POWERPC_EXCP_BRANCH;
+    /* sign extend LI */
+    li = LI(ctx->opcode);
+    li = (li ^ 0x02000000) - 0x02000000;
+    if (likely(AA(ctx->opcode) == 0)) {
+        target = ctx->nip + li - 4;
+    } else {
+        target = li;
     }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld32u_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-    tcg_temp_free(EA);
-}
-/***                         Floating-point store                          ***/
-#define GEN_STF(name, stop, opc, type)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
+    if (LK(ctx->opcode)) {
+        gen_setlr(ctx, ctx->nip);
+    }
+    gen_update_cfar(ctx, ctx->nip - 4);
+    gen_goto_tb(ctx, 0, target);
 }
 
-#define GEN_STUF(name, stop, opc, type)                                       \
-static void glue(gen_, name##u)(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_imm_index(ctx, EA, 0);                                           \
-    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_STUXF(name, stop, opc, type)                                      \
-static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
-    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_STXF(name, stop, opc2, opc3, type)                                \
-static void glue(gen_, name##x)(DisasContext *ctx)                                    \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->fpu_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_STFS(name, stop, op, type)                                        \
-GEN_STF(name, stop, op | 0x20, type);                                         \
-GEN_STUF(name, stop, op | 0x21, type);                                        \
-GEN_STUXF(name, stop, op | 0x01, type);                                       \
-GEN_STXF(name, stop, 0x17, op | 0x00, type)
-
-static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    TCGv t1 = tcg_temp_new();
-    gen_helper_float64_to_float32(t0, cpu_env, arg1);
-    tcg_gen_extu_i32_tl(t1, t0);
-    tcg_temp_free_i32(t0);
-    gen_qemu_st32(ctx, t1, arg2);
-    tcg_temp_free(t1);
-}
-
-/* stfd stfdu stfdux stfdx */
-GEN_STFS(stfd, st64, 0x16, PPC_FLOAT);
-/* stfs stfsu stfsux stfsx */
-GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT);
-
-/* stfdp */
-static void gen_stfdp(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    gen_addr_imm_index(ctx, EA, 0);
-    /* We only need to swap high and low halves. gen_qemu_st64 does necessary
-       64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-    } else {
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-    }
-    tcg_temp_free(EA);
-}
-
-/* stfdpx */
-static void gen_stfdpx(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->fpu_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_FPU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    /* We only need to swap high and low halves. gen_qemu_st64 does necessary
-       64-bit byteswap already. */
-    if (unlikely(ctx->le_mode)) {
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-    } else {
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
-        tcg_gen_addi_tl(EA, EA, 8);
-        gen_qemu_st64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
-    }
-    tcg_temp_free(EA);
-}
-
-/* Optional: */
-static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
-{
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_trunc_i64_tl(t0, arg1),
-    gen_qemu_st32(ctx, t0, arg2);
-    tcg_temp_free(t0);
-}
-/* stfiwx */
-GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX);
-
-static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
-{
-#if defined(TARGET_PPC64)
-    if (ctx->has_cfar)
-        tcg_gen_movi_tl(cpu_cfar, nip);
-#endif
-}
-
-static inline bool use_goto_tb(DisasContext *ctx, target_ulong dest)
-{
-    if (unlikely(ctx->singlestep_enabled)) {
-        return false;
-    }
-
-#ifndef CONFIG_USER_ONLY
-    return (ctx->tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
-#else
-    return true;
-#endif
-}
-
-/***                                Branch                                 ***/
-static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
-{
-    if (NARROW_MODE(ctx)) {
-        dest = (uint32_t) dest;
-    }
-    if (use_goto_tb(ctx, dest)) {
-        tcg_gen_goto_tb(n);
-        tcg_gen_movi_tl(cpu_nip, dest & ~3);
-        tcg_gen_exit_tb((uintptr_t)ctx->tb + n);
-    } else {
-        tcg_gen_movi_tl(cpu_nip, dest & ~3);
-        if (unlikely(ctx->singlestep_enabled)) {
-            if ((ctx->singlestep_enabled &
-                (CPU_BRANCH_STEP | CPU_SINGLE_STEP)) &&
-                (ctx->exception == POWERPC_EXCP_BRANCH ||
-                 ctx->exception == POWERPC_EXCP_TRACE)) {
-                target_ulong tmp = ctx->nip;
-                ctx->nip = dest;
-                gen_exception(ctx, POWERPC_EXCP_TRACE);
-                ctx->nip = tmp;
-            }
-            if (ctx->singlestep_enabled & GDBSTUB_SINGLE_STEP) {
-                gen_debug_exception(ctx);
-            }
-        }
-        tcg_gen_exit_tb(0);
-    }
-}
-
-static inline void gen_setlr(DisasContext *ctx, target_ulong nip)
-{
-    if (NARROW_MODE(ctx)) {
-        nip = (uint32_t)nip;
-    }
-    tcg_gen_movi_tl(cpu_lr, nip);
-}
-
-/* b ba bl bla */
-static void gen_b(DisasContext *ctx)
-{
-    target_ulong li, target;
-
-    ctx->exception = POWERPC_EXCP_BRANCH;
-    /* sign extend LI */
-    li = LI(ctx->opcode);
-    li = (li ^ 0x02000000) - 0x02000000;
-    if (likely(AA(ctx->opcode) == 0)) {
-        target = ctx->nip + li - 4;
-    } else {
-        target = li;
-    }
-    if (LK(ctx->opcode)) {
-        gen_setlr(ctx, ctx->nip);
-    }
-    gen_update_cfar(ctx, ctx->nip);
-    gen_goto_tb(ctx, 0, target);
-}
-
-#define BCOND_IM  0
-#define BCOND_LR  1
-#define BCOND_CTR 2
-#define BCOND_TAR 3
+#define BCOND_IM  0
+#define BCOND_LR  1
+#define BCOND_CTR 2
+#define BCOND_TAR 3
 
 static inline void gen_bcond(DisasContext *ctx, int type)
 {
@@ -4137,7 +3473,7 @@ static inline void gen_bcond(DisasContext *ctx, int type)
         }
         tcg_temp_free_i32(temp);
     }
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
         if (likely(AA(ctx->opcode) == 0)) {
@@ -4145,8 +3481,10 @@ static inline void gen_bcond(DisasContext *ctx, int type)
         } else {
             gen_goto_tb(ctx, 0, li);
         }
-        gen_set_label(l1);
-        gen_goto_tb(ctx, 1, ctx->nip);
+        if ((bo & 0x14) != 0x14) {
+            gen_set_label(l1);
+            gen_goto_tb(ctx, 1, ctx->nip);
+        }
     } else {
         if (NARROW_MODE(ctx)) {
             tcg_gen_andi_tl(cpu_nip, target, (uint32_t)~3);
@@ -4154,9 +3492,11 @@ static inline void gen_bcond(DisasContext *ctx, int type)
             tcg_gen_andi_tl(cpu_nip, target, ~3);
         }
         tcg_gen_exit_tb(0);
-        gen_set_label(l1);
-        gen_update_nip(ctx, ctx->nip);
-        tcg_gen_exit_tb(0);
+        if ((bo & 0x14) != 0x14) {
+            gen_set_label(l1);
+            gen_update_nip(ctx, ctx->nip);
+            tcg_gen_exit_tb(0);
+        }
     }
     if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
         tcg_temp_free(target);
@@ -4246,13 +3586,16 @@ static void gen_rfi(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
-    /* FIXME: This instruction doesn't exist anymore on 64-bit server
-     * processors compliant with arch 2.x, we should remove it there,
-     * but we need to fix OpenBIOS not to use it on 970 first
+    /* This instruction doesn't exist anymore on 64-bit server
+     * processors compliant with arch 2.x
      */
+    if (ctx->insns_flags & PPC_SEGMENT_64B) {
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        return;
+    }
     /* Restore CPU state */
     CHK_SV;
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     gen_helper_rfi(cpu_env);
     gen_sync_exception(ctx);
 #endif
@@ -4266,7 +3609,7 @@ static void gen_rfid(DisasContext *ctx)
 #else
     /* Restore CPU state */
     CHK_SV;
-    gen_update_cfar(ctx, ctx->nip);
+    gen_update_cfar(ctx, ctx->nip - 4);
     gen_helper_rfid(cpu_env);
     gen_sync_exception(ctx);
 #endif
@@ -4301,12 +3644,30 @@ static void gen_sc(DisasContext *ctx)
 
 /***                                Trap                                   ***/
 
+/* Check for unconditional traps (always or never) */
+static bool check_unconditional_trap(DisasContext *ctx)
+{
+    /* Trap never */
+    if (TO(ctx->opcode) == 0) {
+        return true;
+    }
+    /* Trap always */
+    if (TO(ctx->opcode) == 31) {
+        gen_exception_err(ctx, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_TRAP);
+        return true;
+    }
+    return false;
+}
+
 /* tw */
 static void gen_tw(DisasContext *ctx)
 {
-    TCGv_i32 t0 = tcg_const_i32(TO(ctx->opcode));
-    /* Update the nip since this might generate a trap exception */
-    gen_update_nip(ctx, ctx->nip);
+    TCGv_i32 t0;
+
+    if (check_unconditional_trap(ctx)) {
+        return;
+    }
+    t0 = tcg_const_i32(TO(ctx->opcode));
     gen_helper_tw(cpu_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
                   t0);
     tcg_temp_free_i32(t0);
@@ -4315,10 +3676,14 @@ static void gen_tw(DisasContext *ctx)
 /* twi */
 static void gen_twi(DisasContext *ctx)
 {
-    TCGv t0 = tcg_const_tl(SIMM(ctx->opcode));
-    TCGv_i32 t1 = tcg_const_i32(TO(ctx->opcode));
-    /* Update the nip since this might generate a trap exception */
-    gen_update_nip(ctx, ctx->nip);
+    TCGv t0;
+    TCGv_i32 t1;
+
+    if (check_unconditional_trap(ctx)) {
+        return;
+    }
+    t0 = tcg_const_tl(SIMM(ctx->opcode));
+    t1 = tcg_const_i32(TO(ctx->opcode));
     gen_helper_tw(cpu_env, cpu_gpr[rA(ctx->opcode)], t0, t1);
     tcg_temp_free(t0);
     tcg_temp_free_i32(t1);
@@ -4328,9 +3693,12 @@ static void gen_twi(DisasContext *ctx)
 /* td */
 static void gen_td(DisasContext *ctx)
 {
-    TCGv_i32 t0 = tcg_const_i32(TO(ctx->opcode));
-    /* Update the nip since this might generate a trap exception */
-    gen_update_nip(ctx, ctx->nip);
+    TCGv_i32 t0;
+
+    if (check_unconditional_trap(ctx)) {
+        return;
+    }
+    t0 = tcg_const_i32(TO(ctx->opcode));
     gen_helper_td(cpu_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)],
                   t0);
     tcg_temp_free_i32(t0);
@@ -4339,10 +3707,14 @@ static void gen_td(DisasContext *ctx)
 /* tdi */
 static void gen_tdi(DisasContext *ctx)
 {
-    TCGv t0 = tcg_const_tl(SIMM(ctx->opcode));
-    TCGv_i32 t1 = tcg_const_i32(TO(ctx->opcode));
-    /* Update the nip since this might generate a trap exception */
-    gen_update_nip(ctx, ctx->nip);
+    TCGv t0;
+    TCGv_i32 t1;
+
+    if (check_unconditional_trap(ctx)) {
+        return;
+    }
+    t0 = tcg_const_tl(SIMM(ctx->opcode));
+    t1 = tcg_const_i32(TO(ctx->opcode));
     gen_helper_td(cpu_env, cpu_gpr[rA(ctx->opcode)], t0, t1);
     tcg_temp_free(t0);
     tcg_temp_free_i32(t1);
@@ -4684,6 +4056,27 @@ static void gen_mtspr(DisasContext *ctx)
     }
 }
 
+#if defined(TARGET_PPC64)
+/* setb */
+static void gen_setb(DisasContext *ctx)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t8 = tcg_temp_new_i32();
+    TCGv_i32 tm1 = tcg_temp_new_i32();
+    int crf = crfS(ctx->opcode);
+
+    tcg_gen_setcondi_i32(TCG_COND_GEU, t0, cpu_crf[crf], 4);
+    tcg_gen_movi_i32(t8, 8);
+    tcg_gen_movi_i32(tm1, -1);
+    tcg_gen_movcond_i32(TCG_COND_GEU, t0, cpu_crf[crf], t8, tm1, t0);
+    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t8);
+    tcg_temp_free_i32(tm1);
+}
+#endif
+
 /***                         Cache management                              ***/
 
 /* dcbf */
@@ -4764,27 +4157,22 @@ static void gen_dcbtls(DisasContext *ctx)
 static void gen_dcbz(DisasContext *ctx)
 {
     TCGv tcgv_addr;
-    TCGv_i32 tcgv_is_dcbzl;
-    int is_dcbzl = ctx->opcode & 0x00200000 ? 1 : 0;
+    TCGv_i32 tcgv_op;
 
     gen_set_access_type(ctx, ACCESS_CACHE);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     tcgv_addr = tcg_temp_new();
-    tcgv_is_dcbzl = tcg_const_i32(is_dcbzl);
-
+    tcgv_op = tcg_const_i32(ctx->opcode & 0x03FF000);
     gen_addr_reg_index(ctx, tcgv_addr);
-    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_is_dcbzl);
-
+    gen_helper_dcbz(cpu_env, tcgv_addr, tcgv_op);
     tcg_temp_free(tcgv_addr);
-    tcg_temp_free_i32(tcgv_is_dcbzl);
+    tcg_temp_free_i32(tcgv_op);
 }
 
 /* dst / dstt */
 static void gen_dst(DisasContext *ctx)
 {
     if (rA(ctx->opcode) == 0) {
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_LSWX);
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
     } else {
         /* interpreted as no-op */
     }
@@ -4794,7 +4182,7 @@ static void gen_dst(DisasContext *ctx)
 static void gen_dstst(DisasContext *ctx)
 {
     if (rA(ctx->opcode) == 0) {
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_LSWX);
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
     } else {
         /* interpreted as no-op */
     }
@@ -4812,8 +4200,6 @@ static void gen_icbi(DisasContext *ctx)
 {
     TCGv t0;
     gen_set_access_type(ctx, ACCESS_CACHE);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
     gen_helper_icbi(cpu_env, t0);
@@ -5060,6 +4446,7 @@ static void gen_tlbie(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
     GEN_PRIV;
 #else
+    TCGv_i32 t1;
     CHK_HV;
 
     if (NARROW_MODE(ctx)) {
@@ -5070,6 +4457,11 @@ static void gen_tlbie(DisasContext *ctx)
     } else {
         gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     }
+    t1 = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
+    tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
+    tcg_temp_free_i32(t1);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -5081,11 +4473,10 @@ static void gen_tlbsync(DisasContext *ctx)
 #else
     CHK_HV;
 
-    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
-     * embedded however needs to deal with tlbsync. We don't try to be
-     * fancy and swallow the overhead of checking for both.
-     */
-    gen_check_tlb_flush(ctx);
+    /* BookS does both ptesync and tlbsync make tlbsync a nop for server */
+    if (ctx->insns_flags & PPC_BOOKE) {
+        gen_check_tlb_flush(ctx, true);
+    }
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
@@ -5299,8 +4690,6 @@ static void gen_lscbx(DisasContext *ctx)
     TCGv_i32 t3 = tcg_const_i32(rB(ctx->opcode));
 
     gen_addr_reg_index(ctx, t0);
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_lscbx(t0, cpu_env, t0, t1, t2, t3);
     tcg_temp_free_i32(t1);
     tcg_temp_free_i32(t2);
@@ -5942,169 +5331,34 @@ static void gen_rfsvc(DisasContext *ctx)
 
 /* svc is not implemented for now */
 
-/* POWER2 specific instructions */
-/* Quad manipulation (load/store two floats at a time) */
-
-/* lfq */
-static void gen_lfq(DisasContext *ctx)
-{
-    int rd = rD(ctx->opcode);
-    TCGv t0;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    gen_addr_imm_index(ctx, t0, 0);
-    gen_qemu_ld64(ctx, cpu_fpr[rd], t0);
-    gen_addr_add(ctx, t0, t0, 8);
-    gen_qemu_ld64(ctx, cpu_fpr[(rd + 1) % 32], t0);
-    tcg_temp_free(t0);
-}
-
-/* lfqu */
-static void gen_lfqu(DisasContext *ctx)
-{
-    int ra = rA(ctx->opcode);
-    int rd = rD(ctx->opcode);
-    TCGv t0, t1;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    t1 = tcg_temp_new();
-    gen_addr_imm_index(ctx, t0, 0);
-    gen_qemu_ld64(ctx, cpu_fpr[rd], t0);
-    gen_addr_add(ctx, t1, t0, 8);
-    gen_qemu_ld64(ctx, cpu_fpr[(rd + 1) % 32], t1);
-    if (ra != 0)
-        tcg_gen_mov_tl(cpu_gpr[ra], t0);
-    tcg_temp_free(t0);
-    tcg_temp_free(t1);
-}
+/* BookE specific instructions */
 
-/* lfqux */
-static void gen_lfqux(DisasContext *ctx)
+/* XXX: not implemented on 440 ? */
+static void gen_mfapidi(DisasContext *ctx)
 {
-    int ra = rA(ctx->opcode);
-    int rd = rD(ctx->opcode);
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    TCGv t0, t1;
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_qemu_ld64(ctx, cpu_fpr[rd], t0);
-    t1 = tcg_temp_new();
-    gen_addr_add(ctx, t1, t0, 8);
-    gen_qemu_ld64(ctx, cpu_fpr[(rd + 1) % 32], t1);
-    tcg_temp_free(t1);
-    if (ra != 0)
-        tcg_gen_mov_tl(cpu_gpr[ra], t0);
-    tcg_temp_free(t0);
+    /* XXX: TODO */
+    gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
 }
 
-/* lfqx */
-static void gen_lfqx(DisasContext *ctx)
+/* XXX: not implemented on 440 ? */
+static void gen_tlbiva(DisasContext *ctx)
 {
-    int rd = rD(ctx->opcode);
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
     TCGv t0;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_qemu_ld64(ctx, cpu_fpr[rd], t0);
-    gen_addr_add(ctx, t0, t0, 8);
-    gen_qemu_ld64(ctx, cpu_fpr[(rd + 1) % 32], t0);
-    tcg_temp_free(t0);
-}
 
-/* stfq */
-static void gen_stfq(DisasContext *ctx)
-{
-    int rd = rD(ctx->opcode);
-    TCGv t0;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
+    CHK_SV;
     t0 = tcg_temp_new();
-    gen_addr_imm_index(ctx, t0, 0);
-    gen_qemu_st64(ctx, cpu_fpr[rd], t0);
-    gen_addr_add(ctx, t0, t0, 8);
-    gen_qemu_st64(ctx, cpu_fpr[(rd + 1) % 32], t0);
+    gen_addr_reg_index(ctx, t0);
+    gen_helper_tlbiva(cpu_env, cpu_gpr[rB(ctx->opcode)]);
     tcg_temp_free(t0);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-/* stfqu */
-static void gen_stfqu(DisasContext *ctx)
-{
-    int ra = rA(ctx->opcode);
-    int rd = rD(ctx->opcode);
-    TCGv t0, t1;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    gen_addr_imm_index(ctx, t0, 0);
-    gen_qemu_st64(ctx, cpu_fpr[rd], t0);
-    t1 = tcg_temp_new();
-    gen_addr_add(ctx, t1, t0, 8);
-    gen_qemu_st64(ctx, cpu_fpr[(rd + 1) % 32], t1);
-    tcg_temp_free(t1);
-    if (ra != 0)
-        tcg_gen_mov_tl(cpu_gpr[ra], t0);
-    tcg_temp_free(t0);
-}
-
-/* stfqux */
-static void gen_stfqux(DisasContext *ctx)
-{
-    int ra = rA(ctx->opcode);
-    int rd = rD(ctx->opcode);
-    TCGv t0, t1;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_qemu_st64(ctx, cpu_fpr[rd], t0);
-    t1 = tcg_temp_new();
-    gen_addr_add(ctx, t1, t0, 8);
-    gen_qemu_st64(ctx, cpu_fpr[(rd + 1) % 32], t1);
-    tcg_temp_free(t1);
-    if (ra != 0)
-        tcg_gen_mov_tl(cpu_gpr[ra], t0);
-    tcg_temp_free(t0);
-}
-
-/* stfqx */
-static void gen_stfqx(DisasContext *ctx)
-{
-    int rd = rD(ctx->opcode);
-    TCGv t0;
-    gen_set_access_type(ctx, ACCESS_FLOAT);
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_qemu_st64(ctx, cpu_fpr[rd], t0);
-    gen_addr_add(ctx, t0, t0, 8);
-    gen_qemu_st64(ctx, cpu_fpr[(rd + 1) % 32], t0);
-    tcg_temp_free(t0);
-}
-
-/* BookE specific instructions */
-
-/* XXX: not implemented on 440 ? */
-static void gen_mfapidi(DisasContext *ctx)
-{
-    /* XXX: TODO */
-    gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-}
-
-/* XXX: not implemented on 440 ? */
-static void gen_tlbiva(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    TCGv t0;
-
-    CHK_SV;
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_helper_tlbiva(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-    tcg_temp_free(t0);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* All 405 MAC instructions are translated here */
-static inline void gen_405_mulladd_insn(DisasContext *ctx, int opc2, int opc3,
-                                        int ra, int rb, int rt, int Rc)
+/* All 405 MAC instructions are translated here */
+static inline void gen_405_mulladd_insn(DisasContext *ctx, int opc2, int opc3,
+                                        int ra, int rb, int rt, int Rc)
 {
     TCGv t0, t1;
 
@@ -6326,8 +5580,6 @@ static void gen_mfdcr(DisasContext *ctx)
     TCGv dcrn;
 
     CHK_SV;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     dcrn = tcg_const_tl(SPR(ctx->opcode));
     gen_helper_load_dcr(cpu_gpr[rD(ctx->opcode)], cpu_env, dcrn);
     tcg_temp_free(dcrn);
@@ -6343,8 +5595,6 @@ static void gen_mtdcr(DisasContext *ctx)
     TCGv dcrn;
 
     CHK_SV;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     dcrn = tcg_const_tl(SPR(ctx->opcode));
     gen_helper_store_dcr(cpu_env, dcrn, cpu_gpr[rS(ctx->opcode)]);
     tcg_temp_free(dcrn);
@@ -6359,8 +5609,6 @@ static void gen_mfdcrx(DisasContext *ctx)
     GEN_PRIV;
 #else
     CHK_SV;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_load_dcr(cpu_gpr[rD(ctx->opcode)], cpu_env,
                         cpu_gpr[rA(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
@@ -6375,8 +5623,6 @@ static void gen_mtdcrx(DisasContext *ctx)
     GEN_PRIV;
 #else
     CHK_SV;
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_store_dcr(cpu_env, cpu_gpr[rA(ctx->opcode)],
                          cpu_gpr[rS(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
@@ -6386,8 +5632,6 @@ static void gen_mtdcrx(DisasContext *ctx)
 /* mfdcrux (PPC 460) : user-mode access to DCR */
 static void gen_mfdcrux(DisasContext *ctx)
 {
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_load_dcr(cpu_gpr[rD(ctx->opcode)], cpu_env,
                         cpu_gpr[rA(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
@@ -6396,8 +5640,6 @@ static void gen_mfdcrux(DisasContext *ctx)
 /* mtdcrux (PPC 460) : user-mode access to DCR */
 static void gen_mtdcrux(DisasContext *ctx)
 {
-    /* NIP cannot be restored if the memory exception comes from an helper */
-    gen_update_nip(ctx, ctx->nip - 4);
     gen_helper_store_dcr(cpu_env, cpu_gpr[rA(ctx->opcode)],
                          cpu_gpr[rS(ctx->opcode)]);
     /* Note: Rc update flag set leads to undefined state of Rc0 */
@@ -6681,3110 +5923,197 @@ static void gen_tlbsx_booke206(DisasContext *ctx)
         tcg_gen_mov_tl(t0, cpu_gpr[rD(ctx->opcode)]);
     } else {
         t0 = tcg_const_tl(0);
-    }
-
-    tcg_gen_add_tl(t0, t0, cpu_gpr[rB(ctx->opcode)]);
-    gen_helper_booke206_tlbsx(cpu_env, t0);
-    tcg_temp_free(t0);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* tlbwe */
-static void gen_tlbwe_booke206(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    CHK_SV;
-    gen_update_nip(ctx, ctx->nip - 4);
-    gen_helper_booke206_tlbwe(cpu_env);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-static void gen_tlbivax_booke206(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    TCGv t0;
-
-    CHK_SV;
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-    gen_helper_booke206_tlbivax(cpu_env, t0);
-    tcg_temp_free(t0);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-static void gen_tlbilx_booke206(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    TCGv t0;
-
-    CHK_SV;
-    t0 = tcg_temp_new();
-    gen_addr_reg_index(ctx, t0);
-
-    switch((ctx->opcode >> 21) & 0x3) {
-    case 0:
-        gen_helper_booke206_tlbilx0(cpu_env, t0);
-        break;
-    case 1:
-        gen_helper_booke206_tlbilx1(cpu_env, t0);
-        break;
-    case 3:
-        gen_helper_booke206_tlbilx3(cpu_env, t0);
-        break;
-    default:
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-        break;
-    }
-
-    tcg_temp_free(t0);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-
-/* wrtee */
-static void gen_wrtee(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    TCGv t0;
-
-    CHK_SV;
-    t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[rD(ctx->opcode)], (1 << MSR_EE));
-    tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
-    tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
-    tcg_temp_free(t0);
-    /* Stop translation to have a chance to raise an exception
-     * if we just set msr_ee to 1
-     */
-    gen_stop_exception(ctx);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* wrteei */
-static void gen_wrteei(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    CHK_SV;
-    if (ctx->opcode & 0x00008000) {
-        tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE));
-        /* Stop translation to have a chance to raise an exception */
-        gen_stop_exception(ctx);
-    } else {
-        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
-    }
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/* PowerPC 440 specific instructions */
-
-/* dlmzb */
-static void gen_dlmzb(DisasContext *ctx)
-{
-    TCGv_i32 t0 = tcg_const_i32(Rc(ctx->opcode));
-    gen_helper_dlmzb(cpu_gpr[rA(ctx->opcode)], cpu_env,
-                     cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0);
-    tcg_temp_free_i32(t0);
-}
-
-/* mbar replaces eieio on 440 */
-static void gen_mbar(DisasContext *ctx)
-{
-    /* interpreted as no-op */
-}
-
-/* msync replaces sync on 440 */
-static void gen_msync_4xx(DisasContext *ctx)
-{
-    /* interpreted as no-op */
-}
-
-/* icbt */
-static void gen_icbt_440(DisasContext *ctx)
-{
-    /* interpreted as no-op */
-    /* XXX: specification say this is treated as a load by the MMU
-     *      but does not generate any exception
-     */
-}
-
-/* Embedded.Processor Control */
-
-static void gen_msgclr(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    CHK_SV;
-    gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-static void gen_msgsnd(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-    GEN_PRIV;
-#else
-    CHK_SV;
-    gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
-/***                      Altivec vector extension                         ***/
-/* Altivec registers moves */
-
-static inline TCGv_ptr gen_avr_ptr(int reg)
-{
-    TCGv_ptr r = tcg_temp_new_ptr();
-    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, avr[reg]));
-    return r;
-}
-
-#define GEN_VR_LDX(name, opc2, opc3)                                          \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->altivec_enabled)) {                                    \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-    /* We only need to swap high and low halves. gen_qemu_ld64 does necessary \
-       64-bit byteswap already. */                                            \
-    if (ctx->le_mode) {                                                       \
-        gen_qemu_ld64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_ld64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-    } else {                                                                  \
-        gen_qemu_ld64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_ld64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-    }                                                                         \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_VR_STX(name, opc2, opc3)                                          \
-static void gen_st##name(DisasContext *ctx)                                   \
-{                                                                             \
-    TCGv EA;                                                                  \
-    if (unlikely(!ctx->altivec_enabled)) {                                    \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    EA = tcg_temp_new();                                                      \
-    gen_addr_reg_index(ctx, EA);                                              \
-    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-    /* We only need to swap high and low halves. gen_qemu_st64 does necessary \
-       64-bit byteswap already. */                                            \
-    if (ctx->le_mode) {                                                       \
-        gen_qemu_st64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_st64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-    } else {                                                                  \
-        gen_qemu_st64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                    \
-        tcg_gen_addi_tl(EA, EA, 8);                                           \
-        gen_qemu_st64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                    \
-    }                                                                         \
-    tcg_temp_free(EA);                                                        \
-}
-
-#define GEN_VR_LVE(name, opc2, opc3, size)                              \
-static void gen_lve##name(DisasContext *ctx)                            \
-    {                                                                   \
-        TCGv EA;                                                        \
-        TCGv_ptr rs;                                                    \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        gen_set_access_type(ctx, ACCESS_INT);                           \
-        EA = tcg_temp_new();                                            \
-        gen_addr_reg_index(ctx, EA);                                    \
-        if (size > 1) {                                                 \
-            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
-        }                                                               \
-        rs = gen_avr_ptr(rS(ctx->opcode));                              \
-        gen_helper_lve##name(cpu_env, rs, EA);                          \
-        tcg_temp_free(EA);                                              \
-        tcg_temp_free_ptr(rs);                                          \
-    }
-
-#define GEN_VR_STVE(name, opc2, opc3, size)                             \
-static void gen_stve##name(DisasContext *ctx)                           \
-    {                                                                   \
-        TCGv EA;                                                        \
-        TCGv_ptr rs;                                                    \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        gen_set_access_type(ctx, ACCESS_INT);                           \
-        EA = tcg_temp_new();                                            \
-        gen_addr_reg_index(ctx, EA);                                    \
-        if (size > 1) {                                                 \
-            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
-        }                                                               \
-        rs = gen_avr_ptr(rS(ctx->opcode));                              \
-        gen_helper_stve##name(cpu_env, rs, EA);                         \
-        tcg_temp_free(EA);                                              \
-        tcg_temp_free_ptr(rs);                                          \
-    }
-
-GEN_VR_LDX(lvx, 0x07, 0x03);
-/* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
-GEN_VR_LDX(lvxl, 0x07, 0x0B);
-
-GEN_VR_LVE(bx, 0x07, 0x00, 1);
-GEN_VR_LVE(hx, 0x07, 0x01, 2);
-GEN_VR_LVE(wx, 0x07, 0x02, 4);
-
-GEN_VR_STX(svx, 0x07, 0x07);
-/* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
-GEN_VR_STX(svxl, 0x07, 0x0F);
-
-GEN_VR_STVE(bx, 0x07, 0x04, 1);
-GEN_VR_STVE(hx, 0x07, 0x05, 2);
-GEN_VR_STVE(wx, 0x07, 0x06, 4);
-
-static void gen_lvsl(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsl(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
-static void gen_lvsr(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsr(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
-static void gen_mfvscr(DisasContext *ctx)
-{
-    TCGv_i32 t;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0);
-    t = tcg_temp_new_i32();
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, vscr));
-    tcg_gen_extu_i32_i64(cpu_avrl[rD(ctx->opcode)], t);
-    tcg_temp_free_i32(t);
-}
-
-static void gen_mtvscr(DisasContext *ctx)
-{
-    TCGv_ptr p;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    p = gen_avr_ptr(rB(ctx->opcode));
-    gen_helper_mtvscr(cpu_env, p);
-    tcg_temp_free_ptr(p);
-}
-
-/* Logical operations */
-#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)                        \
-static void glue(gen_, name)(DisasContext *ctx)                                 \
-{                                                                       \
-    if (unlikely(!ctx->altivec_enabled)) {                              \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
-        return;                                                         \
-    }                                                                   \
-    tcg_op(cpu_avrh[rD(ctx->opcode)], cpu_avrh[rA(ctx->opcode)], cpu_avrh[rB(ctx->opcode)]); \
-    tcg_op(cpu_avrl[rD(ctx->opcode)], cpu_avrl[rA(ctx->opcode)], cpu_avrl[rB(ctx->opcode)]); \
-}
-
-GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16);
-GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17);
-GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18);
-GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19);
-GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20);
-GEN_VX_LOGICAL(veqv, tcg_gen_eqv_i64, 2, 26);
-GEN_VX_LOGICAL(vnand, tcg_gen_nand_i64, 2, 22);
-GEN_VX_LOGICAL(vorc, tcg_gen_orc_i64, 2, 21);
-
-#define GEN_VXFORM(name, opc2, opc3)                                    \
-static void glue(gen_, name)(DisasContext *ctx)                                 \
-{                                                                       \
-    TCGv_ptr ra, rb, rd;                                                \
-    if (unlikely(!ctx->altivec_enabled)) {                              \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
-        return;                                                         \
-    }                                                                   \
-    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
-    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
-    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
-    gen_helper_##name (rd, ra, rb);                                     \
-    tcg_temp_free_ptr(ra);                                              \
-    tcg_temp_free_ptr(rb);                                              \
-    tcg_temp_free_ptr(rd);                                              \
-}
-
-#define GEN_VXFORM_ENV(name, opc2, opc3)                                \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-{                                                                       \
-    TCGv_ptr ra, rb, rd;                                                \
-    if (unlikely(!ctx->altivec_enabled)) {                              \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
-        return;                                                         \
-    }                                                                   \
-    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
-    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
-    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
-    gen_helper_##name(cpu_env, rd, ra, rb);                             \
-    tcg_temp_free_ptr(ra);                                              \
-    tcg_temp_free_ptr(rb);                                              \
-    tcg_temp_free_ptr(rd);                                              \
-}
-
-#define GEN_VXFORM3(name, opc2, opc3)                                   \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-{                                                                       \
-    TCGv_ptr ra, rb, rc, rd;                                            \
-    if (unlikely(!ctx->altivec_enabled)) {                              \
-        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
-        return;                                                         \
-    }                                                                   \
-    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
-    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
-    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
-    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
-    gen_helper_##name(rd, ra, rb, rc);                                  \
-    tcg_temp_free_ptr(ra);                                              \
-    tcg_temp_free_ptr(rb);                                              \
-    tcg_temp_free_ptr(rc);                                              \
-    tcg_temp_free_ptr(rd);                                              \
-}
-
-/*
- * Support for Altivec instruction pairs that use bit 31 (Rc) as
- * an opcode bit.  In general, these pairs come from different
- * versions of the ISA, so we must also support a pair of flags for
- * each instruction.
- */
-#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)          \
-static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
-{                                                                      \
-    if ((Rc(ctx->opcode) == 0) &&                                      \
-        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
-        gen_##name0(ctx);                                              \
-    } else if ((Rc(ctx->opcode) == 1) &&                               \
-        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
-        gen_##name1(ctx);                                              \
-    } else {                                                           \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
-    }                                                                  \
-}
-
-GEN_VXFORM(vaddubm, 0, 0);
-GEN_VXFORM(vadduhm, 0, 1);
-GEN_VXFORM(vadduwm, 0, 2);
-GEN_VXFORM(vaddudm, 0, 3);
-GEN_VXFORM(vsububm, 0, 16);
-GEN_VXFORM(vsubuhm, 0, 17);
-GEN_VXFORM(vsubuwm, 0, 18);
-GEN_VXFORM(vsubudm, 0, 19);
-GEN_VXFORM(vmaxub, 1, 0);
-GEN_VXFORM(vmaxuh, 1, 1);
-GEN_VXFORM(vmaxuw, 1, 2);
-GEN_VXFORM(vmaxud, 1, 3);
-GEN_VXFORM(vmaxsb, 1, 4);
-GEN_VXFORM(vmaxsh, 1, 5);
-GEN_VXFORM(vmaxsw, 1, 6);
-GEN_VXFORM(vmaxsd, 1, 7);
-GEN_VXFORM(vminub, 1, 8);
-GEN_VXFORM(vminuh, 1, 9);
-GEN_VXFORM(vminuw, 1, 10);
-GEN_VXFORM(vminud, 1, 11);
-GEN_VXFORM(vminsb, 1, 12);
-GEN_VXFORM(vminsh, 1, 13);
-GEN_VXFORM(vminsw, 1, 14);
-GEN_VXFORM(vminsd, 1, 15);
-GEN_VXFORM(vavgub, 1, 16);
-GEN_VXFORM(vavguh, 1, 17);
-GEN_VXFORM(vavguw, 1, 18);
-GEN_VXFORM(vavgsb, 1, 20);
-GEN_VXFORM(vavgsh, 1, 21);
-GEN_VXFORM(vavgsw, 1, 22);
-GEN_VXFORM(vmrghb, 6, 0);
-GEN_VXFORM(vmrghh, 6, 1);
-GEN_VXFORM(vmrghw, 6, 2);
-GEN_VXFORM(vmrglb, 6, 4);
-GEN_VXFORM(vmrglh, 6, 5);
-GEN_VXFORM(vmrglw, 6, 6);
-
-static void gen_vmrgew(DisasContext *ctx)
-{
-    TCGv_i64 tmp;
-    int VT, VA, VB;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    VT = rD(ctx->opcode);
-    VA = rA(ctx->opcode);
-    VB = rB(ctx->opcode);
-    tmp = tcg_temp_new_i64();
-    tcg_gen_shri_i64(tmp, cpu_avrh[VB], 32);
-    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VA], tmp, 0, 32);
-    tcg_gen_shri_i64(tmp, cpu_avrl[VB], 32);
-    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VA], tmp, 0, 32);
-    tcg_temp_free_i64(tmp);
-}
-
-static void gen_vmrgow(DisasContext *ctx)
-{
-    int VT, VA, VB;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    VT = rD(ctx->opcode);
-    VA = rA(ctx->opcode);
-    VB = rB(ctx->opcode);
-
-    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VB], cpu_avrh[VA], 32, 32);
-    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VB], cpu_avrl[VA], 32, 32);
-}
-
-GEN_VXFORM(vmuloub, 4, 0);
-GEN_VXFORM(vmulouh, 4, 1);
-GEN_VXFORM(vmulouw, 4, 2);
-GEN_VXFORM(vmuluwm, 4, 2);
-GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
-                vmuluwm, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vmulosb, 4, 4);
-GEN_VXFORM(vmulosh, 4, 5);
-GEN_VXFORM(vmulosw, 4, 6);
-GEN_VXFORM(vmuleub, 4, 8);
-GEN_VXFORM(vmuleuh, 4, 9);
-GEN_VXFORM(vmuleuw, 4, 10);
-GEN_VXFORM(vmulesb, 4, 12);
-GEN_VXFORM(vmulesh, 4, 13);
-GEN_VXFORM(vmulesw, 4, 14);
-GEN_VXFORM(vslb, 2, 4);
-GEN_VXFORM(vslh, 2, 5);
-GEN_VXFORM(vslw, 2, 6);
-GEN_VXFORM(vsld, 2, 23);
-GEN_VXFORM(vsrb, 2, 8);
-GEN_VXFORM(vsrh, 2, 9);
-GEN_VXFORM(vsrw, 2, 10);
-GEN_VXFORM(vsrd, 2, 27);
-GEN_VXFORM(vsrab, 2, 12);
-GEN_VXFORM(vsrah, 2, 13);
-GEN_VXFORM(vsraw, 2, 14);
-GEN_VXFORM(vsrad, 2, 15);
-GEN_VXFORM(vslo, 6, 16);
-GEN_VXFORM(vsro, 6, 17);
-GEN_VXFORM(vaddcuw, 0, 6);
-GEN_VXFORM(vsubcuw, 0, 22);
-GEN_VXFORM_ENV(vaddubs, 0, 8);
-GEN_VXFORM_ENV(vadduhs, 0, 9);
-GEN_VXFORM_ENV(vadduws, 0, 10);
-GEN_VXFORM_ENV(vaddsbs, 0, 12);
-GEN_VXFORM_ENV(vaddshs, 0, 13);
-GEN_VXFORM_ENV(vaddsws, 0, 14);
-GEN_VXFORM_ENV(vsububs, 0, 24);
-GEN_VXFORM_ENV(vsubuhs, 0, 25);
-GEN_VXFORM_ENV(vsubuws, 0, 26);
-GEN_VXFORM_ENV(vsubsbs, 0, 28);
-GEN_VXFORM_ENV(vsubshs, 0, 29);
-GEN_VXFORM_ENV(vsubsws, 0, 30);
-GEN_VXFORM(vadduqm, 0, 4);
-GEN_VXFORM(vaddcuq, 0, 5);
-GEN_VXFORM3(vaddeuqm, 30, 0);
-GEN_VXFORM3(vaddecuq, 30, 0);
-GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
-            vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vsubuqm, 0, 20);
-GEN_VXFORM(vsubcuq, 0, 21);
-GEN_VXFORM3(vsubeuqm, 31, 0);
-GEN_VXFORM3(vsubecuq, 31, 0);
-GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
-            vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vrlb, 2, 0);
-GEN_VXFORM(vrlh, 2, 1);
-GEN_VXFORM(vrlw, 2, 2);
-GEN_VXFORM(vrld, 2, 3);
-GEN_VXFORM(vsl, 2, 7);
-GEN_VXFORM(vsr, 2, 11);
-GEN_VXFORM_ENV(vpkuhum, 7, 0);
-GEN_VXFORM_ENV(vpkuwum, 7, 1);
-GEN_VXFORM_ENV(vpkudum, 7, 17);
-GEN_VXFORM_ENV(vpkuhus, 7, 2);
-GEN_VXFORM_ENV(vpkuwus, 7, 3);
-GEN_VXFORM_ENV(vpkudus, 7, 19);
-GEN_VXFORM_ENV(vpkshus, 7, 4);
-GEN_VXFORM_ENV(vpkswus, 7, 5);
-GEN_VXFORM_ENV(vpksdus, 7, 21);
-GEN_VXFORM_ENV(vpkshss, 7, 6);
-GEN_VXFORM_ENV(vpkswss, 7, 7);
-GEN_VXFORM_ENV(vpksdss, 7, 23);
-GEN_VXFORM(vpkpx, 7, 12);
-GEN_VXFORM_ENV(vsum4ubs, 4, 24);
-GEN_VXFORM_ENV(vsum4sbs, 4, 28);
-GEN_VXFORM_ENV(vsum4shs, 4, 25);
-GEN_VXFORM_ENV(vsum2sws, 4, 26);
-GEN_VXFORM_ENV(vsumsws, 4, 30);
-GEN_VXFORM_ENV(vaddfp, 5, 0);
-GEN_VXFORM_ENV(vsubfp, 5, 1);
-GEN_VXFORM_ENV(vmaxfp, 5, 16);
-GEN_VXFORM_ENV(vminfp, 5, 17);
-
-#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-    {                                                                   \
-        TCGv_ptr ra, rb, rd;                                            \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        ra = gen_avr_ptr(rA(ctx->opcode));                              \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##opname(cpu_env, rd, ra, rb);                       \
-        tcg_temp_free_ptr(ra);                                          \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-#define GEN_VXRFORM(name, opc2, opc3)                                \
-    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
-    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
-
-/*
- * Support for Altivec instructions that use bit 31 (Rc) as an opcode
- * bit but also use bit 21 as an actual Rc bit.  In general, thse pairs
- * come from different versions of the ISA, so we must also support a
- * pair of flags for each instruction.
- */
-#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)     \
-static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
-{                                                                      \
-    if ((Rc(ctx->opcode) == 0) &&                                      \
-        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
-        if (Rc21(ctx->opcode) == 0) {                                  \
-            gen_##name0(ctx);                                          \
-        } else {                                                       \
-            gen_##name0##_(ctx);                                       \
-        }                                                              \
-    } else if ((Rc(ctx->opcode) == 1) &&                               \
-        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
-        if (Rc21(ctx->opcode) == 0) {                                  \
-            gen_##name1(ctx);                                          \
-        } else {                                                       \
-            gen_##name1##_(ctx);                                       \
-        }                                                              \
-    } else {                                                           \
-        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
-    }                                                                  \
-}
-
-GEN_VXRFORM(vcmpequb, 3, 0)
-GEN_VXRFORM(vcmpequh, 3, 1)
-GEN_VXRFORM(vcmpequw, 3, 2)
-GEN_VXRFORM(vcmpequd, 3, 3)
-GEN_VXRFORM(vcmpgtsb, 3, 12)
-GEN_VXRFORM(vcmpgtsh, 3, 13)
-GEN_VXRFORM(vcmpgtsw, 3, 14)
-GEN_VXRFORM(vcmpgtsd, 3, 15)
-GEN_VXRFORM(vcmpgtub, 3, 8)
-GEN_VXRFORM(vcmpgtuh, 3, 9)
-GEN_VXRFORM(vcmpgtuw, 3, 10)
-GEN_VXRFORM(vcmpgtud, 3, 11)
-GEN_VXRFORM(vcmpeqfp, 3, 3)
-GEN_VXRFORM(vcmpgefp, 3, 7)
-GEN_VXRFORM(vcmpgtfp, 3, 11)
-GEN_VXRFORM(vcmpbfp, 3, 15)
-
-GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \
-                 vcmpequd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
-                 vcmpgtsd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
-                 vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
-
-#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-    {                                                                   \
-        TCGv_ptr rd;                                                    \
-        TCGv_i32 simm;                                                  \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        simm = tcg_const_i32(SIMM5(ctx->opcode));                       \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name (rd, simm);                                   \
-        tcg_temp_free_i32(simm);                                        \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-GEN_VXFORM_SIMM(vspltisb, 6, 12);
-GEN_VXFORM_SIMM(vspltish, 6, 13);
-GEN_VXFORM_SIMM(vspltisw, 6, 14);
-
-#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
-static void glue(gen_, name)(DisasContext *ctx)                                 \
-    {                                                                   \
-        TCGv_ptr rb, rd;                                                \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name (rd, rb);                                     \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rd);                                         \
-    }
-
-#define GEN_VXFORM_NOA_ENV(name, opc2, opc3)                            \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-    {                                                                   \
-        TCGv_ptr rb, rd;                                                \
-                                                                        \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name(cpu_env, rd, rb);                             \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-GEN_VXFORM_NOA(vupkhsb, 7, 8);
-GEN_VXFORM_NOA(vupkhsh, 7, 9);
-GEN_VXFORM_NOA(vupkhsw, 7, 25);
-GEN_VXFORM_NOA(vupklsb, 7, 10);
-GEN_VXFORM_NOA(vupklsh, 7, 11);
-GEN_VXFORM_NOA(vupklsw, 7, 27);
-GEN_VXFORM_NOA(vupkhpx, 7, 13);
-GEN_VXFORM_NOA(vupklpx, 7, 15);
-GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
-GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
-GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
-GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
-GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
-GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
-GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
-GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
-
-#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
-static void glue(gen_, name)(DisasContext *ctx)                                 \
-    {                                                                   \
-        TCGv_ptr rd;                                                    \
-        TCGv_i32 simm;                                                  \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        simm = tcg_const_i32(SIMM5(ctx->opcode));                       \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name (rd, simm);                                   \
-        tcg_temp_free_i32(simm);                                        \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-#define GEN_VXFORM_UIMM(name, opc2, opc3)                               \
-static void glue(gen_, name)(DisasContext *ctx)                                 \
-    {                                                                   \
-        TCGv_ptr rb, rd;                                                \
-        TCGv_i32 uimm;                                                  \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name (rd, rb, uimm);                               \
-        tcg_temp_free_i32(uimm);                                        \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-#define GEN_VXFORM_UIMM_ENV(name, opc2, opc3)                           \
-static void glue(gen_, name)(DisasContext *ctx)                         \
-    {                                                                   \
-        TCGv_ptr rb, rd;                                                \
-        TCGv_i32 uimm;                                                  \
-                                                                        \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        gen_helper_##name(cpu_env, rd, rb, uimm);                       \
-        tcg_temp_free_i32(uimm);                                        \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-GEN_VXFORM_UIMM(vspltb, 6, 8);
-GEN_VXFORM_UIMM(vsplth, 6, 9);
-GEN_VXFORM_UIMM(vspltw, 6, 10);
-GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
-GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
-GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
-GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
-
-static void gen_vsldoi(DisasContext *ctx)
-{
-    TCGv_ptr ra, rb, rd;
-    TCGv_i32 sh;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    ra = gen_avr_ptr(rA(ctx->opcode));
-    rb = gen_avr_ptr(rB(ctx->opcode));
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    sh = tcg_const_i32(VSH(ctx->opcode));
-    gen_helper_vsldoi (rd, ra, rb, sh);
-    tcg_temp_free_ptr(ra);
-    tcg_temp_free_ptr(rb);
-    tcg_temp_free_ptr(rd);
-    tcg_temp_free_i32(sh);
-}
-
-#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
-static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
-    {                                                                   \
-        TCGv_ptr ra, rb, rc, rd;                                        \
-        if (unlikely(!ctx->altivec_enabled)) {                          \
-            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
-            return;                                                     \
-        }                                                               \
-        ra = gen_avr_ptr(rA(ctx->opcode));                              \
-        rb = gen_avr_ptr(rB(ctx->opcode));                              \
-        rc = gen_avr_ptr(rC(ctx->opcode));                              \
-        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-        if (Rc(ctx->opcode)) {                                          \
-            gen_helper_##name1(cpu_env, rd, ra, rb, rc);                \
-        } else {                                                        \
-            gen_helper_##name0(cpu_env, rd, ra, rb, rc);                \
-        }                                                               \
-        tcg_temp_free_ptr(ra);                                          \
-        tcg_temp_free_ptr(rb);                                          \
-        tcg_temp_free_ptr(rc);                                          \
-        tcg_temp_free_ptr(rd);                                          \
-    }
-
-GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
-
-static void gen_vmladduhm(DisasContext *ctx)
-{
-    TCGv_ptr ra, rb, rc, rd;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    ra = gen_avr_ptr(rA(ctx->opcode));
-    rb = gen_avr_ptr(rB(ctx->opcode));
-    rc = gen_avr_ptr(rC(ctx->opcode));
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_vmladduhm(rd, ra, rb, rc);
-    tcg_temp_free_ptr(ra);
-    tcg_temp_free_ptr(rb);
-    tcg_temp_free_ptr(rc);
-    tcg_temp_free_ptr(rd);
-}
-
-GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18)
-GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19)
-GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20)
-GEN_VAFORM_PAIRED(vsel, vperm, 21)
-GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
-
-GEN_VXFORM_NOA(vclzb, 1, 28)
-GEN_VXFORM_NOA(vclzh, 1, 29)
-GEN_VXFORM_NOA(vclzw, 1, 30)
-GEN_VXFORM_NOA(vclzd, 1, 31)
-GEN_VXFORM_NOA(vpopcntb, 1, 28)
-GEN_VXFORM_NOA(vpopcnth, 1, 29)
-GEN_VXFORM_NOA(vpopcntw, 1, 30)
-GEN_VXFORM_NOA(vpopcntd, 1, 31)
-GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
-                vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
-                vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
-                vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
-                vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM(vbpermq, 6, 21);
-GEN_VXFORM_NOA(vgbbd, 6, 20);
-GEN_VXFORM(vpmsumb, 4, 16)
-GEN_VXFORM(vpmsumh, 4, 17)
-GEN_VXFORM(vpmsumw, 4, 18)
-GEN_VXFORM(vpmsumd, 4, 19)
-
-#define GEN_BCD(op)                                 \
-static void gen_##op(DisasContext *ctx)             \
-{                                                   \
-    TCGv_ptr ra, rb, rd;                            \
-    TCGv_i32 ps;                                    \
-                                                    \
-    if (unlikely(!ctx->altivec_enabled)) {          \
-        gen_exception(ctx, POWERPC_EXCP_VPU);       \
-        return;                                     \
-    }                                               \
-                                                    \
-    ra = gen_avr_ptr(rA(ctx->opcode));              \
-    rb = gen_avr_ptr(rB(ctx->opcode));              \
-    rd = gen_avr_ptr(rD(ctx->opcode));              \
-                                                    \
-    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
-                                                    \
-    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
-                                                    \
-    tcg_temp_free_ptr(ra);                          \
-    tcg_temp_free_ptr(rb);                          \
-    tcg_temp_free_ptr(rd);                          \
-    tcg_temp_free_i32(ps);                          \
-}
-
-GEN_BCD(bcdadd)
-GEN_BCD(bcdsub)
-
-GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
-                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
-                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
-                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
-                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
-
-static void gen_vsbox(DisasContext *ctx)
-{
-    TCGv_ptr ra, rd;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    ra = gen_avr_ptr(rA(ctx->opcode));
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_vsbox(rd, ra);
-    tcg_temp_free_ptr(ra);
-    tcg_temp_free_ptr(rd);
-}
-
-GEN_VXFORM(vcipher, 4, 20)
-GEN_VXFORM(vcipherlast, 4, 20)
-GEN_VXFORM(vncipher, 4, 21)
-GEN_VXFORM(vncipherlast, 4, 21)
-
-GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
-                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
-GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
-                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
-
-#define VSHASIGMA(op)                         \
-static void gen_##op(DisasContext *ctx)       \
-{                                             \
-    TCGv_ptr ra, rd;                          \
-    TCGv_i32 st_six;                          \
-    if (unlikely(!ctx->altivec_enabled)) {    \
-        gen_exception(ctx, POWERPC_EXCP_VPU); \
-        return;                               \
-    }                                         \
-    ra = gen_avr_ptr(rA(ctx->opcode));        \
-    rd = gen_avr_ptr(rD(ctx->opcode));        \
-    st_six = tcg_const_i32(rB(ctx->opcode));  \
-    gen_helper_##op(rd, ra, st_six);          \
-    tcg_temp_free_ptr(ra);                    \
-    tcg_temp_free_ptr(rd);                    \
-    tcg_temp_free_i32(st_six);                \
-}
-
-VSHASIGMA(vshasigmaw)
-VSHASIGMA(vshasigmad)
-
-GEN_VXFORM3(vpermxor, 22, 0xFF)
-GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
-                vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
-
-/***                           VSX extension                               ***/
-
-static inline TCGv_i64 cpu_vsrh(int n)
-{
-    if (n < 32) {
-        return cpu_fpr[n];
-    } else {
-        return cpu_avrh[n-32];
-    }
-}
-
-static inline TCGv_i64 cpu_vsrl(int n)
-{
-    if (n < 32) {
-        return cpu_vsr[n];
-    } else {
-        return cpu_avrl[n-32];
-    }
-}
-
-#define VSX_LOAD_SCALAR(name, operation)                      \
-static void gen_##name(DisasContext *ctx)                     \
-{                                                             \
-    TCGv EA;                                                  \
-    if (unlikely(!ctx->vsx_enabled)) {                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-        return;                                               \
-    }                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                     \
-    EA = tcg_temp_new();                                      \
-    gen_addr_reg_index(ctx, EA);                              \
-    gen_qemu_##operation(ctx, cpu_vsrh(xT(ctx->opcode)), EA); \
-    /* NOTE: cpu_vsrl is undefined */                         \
-    tcg_temp_free(EA);                                        \
-}
-
-VSX_LOAD_SCALAR(lxsdx, ld64)
-VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
-VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
-VSX_LOAD_SCALAR(lxsspx, ld32fs)
-
-static void gen_lxvd2x(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
-    tcg_gen_addi_tl(EA, EA, 8);
-    gen_qemu_ld64(ctx, cpu_vsrl(xT(ctx->opcode)), EA);
-    tcg_temp_free(EA);
-}
-
-static void gen_lxvdsx(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
-    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xT(ctx->opcode)));
-    tcg_temp_free(EA);
-}
-
-static void gen_lxvw4x(DisasContext *ctx)
-{
-    TCGv EA;
-    TCGv_i64 tmp;
-    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
-    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    tmp = tcg_temp_new_i64();
-
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_ld32u_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_ld32u_i64(ctx, xth, EA);
-    tcg_gen_deposit_i64(xth, xth, tmp, 32, 32);
-
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_ld32u_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_ld32u_i64(ctx, xtl, EA);
-    tcg_gen_deposit_i64(xtl, xtl, tmp, 32, 32);
-
-    tcg_temp_free(EA);
-    tcg_temp_free_i64(tmp);
-}
-
-#define VSX_STORE_SCALAR(name, operation)                     \
-static void gen_##name(DisasContext *ctx)                     \
-{                                                             \
-    TCGv EA;                                                  \
-    if (unlikely(!ctx->vsx_enabled)) {                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-        return;                                               \
-    }                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                     \
-    EA = tcg_temp_new();                                      \
-    gen_addr_reg_index(ctx, EA);                              \
-    gen_qemu_##operation(ctx, cpu_vsrh(xS(ctx->opcode)), EA); \
-    tcg_temp_free(EA);                                        \
-}
-
-VSX_STORE_SCALAR(stxsdx, st64)
-VSX_STORE_SCALAR(stxsiwx, st32_i64)
-VSX_STORE_SCALAR(stxsspx, st32fs)
-
-static void gen_stxvd2x(DisasContext *ctx)
-{
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    gen_qemu_st64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
-    tcg_gen_addi_tl(EA, EA, 8);
-    gen_qemu_st64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
-    tcg_temp_free(EA);
-}
-
-static void gen_stxvw4x(DisasContext *ctx)
-{
-    TCGv_i64 tmp;
-    TCGv EA;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    gen_set_access_type(ctx, ACCESS_INT);
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    tmp = tcg_temp_new_i64();
-
-    tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32);
-    gen_qemu_st32_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
-
-    tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
-
-    tcg_temp_free(EA);
-    tcg_temp_free_i64(tmp);
-}
-
-#define MV_VSRW(name, tcgop1, tcgop2, target, source)           \
-static void gen_##name(DisasContext *ctx)                       \
-{                                                               \
-    if (xS(ctx->opcode) < 32) {                                 \
-        if (unlikely(!ctx->fpu_enabled)) {                      \
-            gen_exception(ctx, POWERPC_EXCP_FPU);               \
-            return;                                             \
-        }                                                       \
-    } else {                                                    \
-        if (unlikely(!ctx->altivec_enabled)) {                  \
-            gen_exception(ctx, POWERPC_EXCP_VPU);               \
-            return;                                             \
-        }                                                       \
-    }                                                           \
-    TCGv_i64 tmp = tcg_temp_new_i64();                          \
-    tcg_gen_##tcgop1(tmp, source);                              \
-    tcg_gen_##tcgop2(target, tmp);                              \
-    tcg_temp_free_i64(tmp);                                     \
-}
-
-
-MV_VSRW(mfvsrwz, ext32u_i64, trunc_i64_tl, cpu_gpr[rA(ctx->opcode)], \
-        cpu_vsrh(xS(ctx->opcode)))
-MV_VSRW(mtvsrwa, extu_tl_i64, ext32s_i64, cpu_vsrh(xT(ctx->opcode)), \
-        cpu_gpr[rA(ctx->opcode)])
-MV_VSRW(mtvsrwz, extu_tl_i64, ext32u_i64, cpu_vsrh(xT(ctx->opcode)), \
-        cpu_gpr[rA(ctx->opcode)])
-
-#if defined(TARGET_PPC64)
-#define MV_VSRD(name, target, source)                           \
-static void gen_##name(DisasContext *ctx)                       \
-{                                                               \
-    if (xS(ctx->opcode) < 32) {                                 \
-        if (unlikely(!ctx->fpu_enabled)) {                      \
-            gen_exception(ctx, POWERPC_EXCP_FPU);               \
-            return;                                             \
-        }                                                       \
-    } else {                                                    \
-        if (unlikely(!ctx->altivec_enabled)) {                  \
-            gen_exception(ctx, POWERPC_EXCP_VPU);               \
-            return;                                             \
-        }                                                       \
-    }                                                           \
-    tcg_gen_mov_i64(target, source);                            \
-}
-
-MV_VSRD(mfvsrd, cpu_gpr[rA(ctx->opcode)], cpu_vsrh(xS(ctx->opcode)))
-MV_VSRD(mtvsrd, cpu_vsrh(xT(ctx->opcode)), cpu_gpr[rA(ctx->opcode)])
-
-#endif
-
-static void gen_xxpermdi(DisasContext *ctx)
-{
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-
-    if (unlikely((xT(ctx->opcode) == xA(ctx->opcode)) ||
-                 (xT(ctx->opcode) == xB(ctx->opcode)))) {
-        TCGv_i64 xh, xl;
-
-        xh = tcg_temp_new_i64();
-        xl = tcg_temp_new_i64();
-
-        if ((DM(ctx->opcode) & 2) == 0) {
-            tcg_gen_mov_i64(xh, cpu_vsrh(xA(ctx->opcode)));
-        } else {
-            tcg_gen_mov_i64(xh, cpu_vsrl(xA(ctx->opcode)));
-        }
-        if ((DM(ctx->opcode) & 1) == 0) {
-            tcg_gen_mov_i64(xl, cpu_vsrh(xB(ctx->opcode)));
-        } else {
-            tcg_gen_mov_i64(xl, cpu_vsrl(xB(ctx->opcode)));
-        }
-
-        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xh);
-        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xl);
-
-        tcg_temp_free_i64(xh);
-        tcg_temp_free_i64(xl);
-    } else {
-        if ((DM(ctx->opcode) & 2) == 0) {
-            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)));
-        } else {
-            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)));
-        }
-        if ((DM(ctx->opcode) & 1) == 0) {
-            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode)));
-        } else {
-            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode)));
-        }
-    }
-}
-
-#define OP_ABS 1
-#define OP_NABS 2
-#define OP_NEG 3
-#define OP_CPSGN 4
-#define SGN_MASK_DP  0x8000000000000000ull
-#define SGN_MASK_SP 0x8000000080000000ull
-
-#define VSX_SCALAR_MOVE(name, op, sgn_mask)                       \
-static void glue(gen_, name)(DisasContext * ctx)                  \
-    {                                                             \
-        TCGv_i64 xb, sgm;                                         \
-        if (unlikely(!ctx->vsx_enabled)) {                        \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-            return;                                               \
-        }                                                         \
-        xb = tcg_temp_new_i64();                                  \
-        sgm = tcg_temp_new_i64();                                 \
-        tcg_gen_mov_i64(xb, cpu_vsrh(xB(ctx->opcode)));           \
-        tcg_gen_movi_i64(sgm, sgn_mask);                          \
-        switch (op) {                                             \
-            case OP_ABS: {                                        \
-                tcg_gen_andc_i64(xb, xb, sgm);                    \
-                break;                                            \
-            }                                                     \
-            case OP_NABS: {                                       \
-                tcg_gen_or_i64(xb, xb, sgm);                      \
-                break;                                            \
-            }                                                     \
-            case OP_NEG: {                                        \
-                tcg_gen_xor_i64(xb, xb, sgm);                     \
-                break;                                            \
-            }                                                     \
-            case OP_CPSGN: {                                      \
-                TCGv_i64 xa = tcg_temp_new_i64();                 \
-                tcg_gen_mov_i64(xa, cpu_vsrh(xA(ctx->opcode)));   \
-                tcg_gen_and_i64(xa, xa, sgm);                     \
-                tcg_gen_andc_i64(xb, xb, sgm);                    \
-                tcg_gen_or_i64(xb, xb, xa);                       \
-                tcg_temp_free_i64(xa);                            \
-                break;                                            \
-            }                                                     \
-        }                                                         \
-        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xb);           \
-        tcg_temp_free_i64(xb);                                    \
-        tcg_temp_free_i64(sgm);                                   \
-    }
-
-VSX_SCALAR_MOVE(xsabsdp, OP_ABS, SGN_MASK_DP)
-VSX_SCALAR_MOVE(xsnabsdp, OP_NABS, SGN_MASK_DP)
-VSX_SCALAR_MOVE(xsnegdp, OP_NEG, SGN_MASK_DP)
-VSX_SCALAR_MOVE(xscpsgndp, OP_CPSGN, SGN_MASK_DP)
-
-#define VSX_VECTOR_MOVE(name, op, sgn_mask)                      \
-static void glue(gen_, name)(DisasContext * ctx)                 \
-    {                                                            \
-        TCGv_i64 xbh, xbl, sgm;                                  \
-        if (unlikely(!ctx->vsx_enabled)) {                       \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);               \
-            return;                                              \
-        }                                                        \
-        xbh = tcg_temp_new_i64();                                \
-        xbl = tcg_temp_new_i64();                                \
-        sgm = tcg_temp_new_i64();                                \
-        tcg_gen_mov_i64(xbh, cpu_vsrh(xB(ctx->opcode)));         \
-        tcg_gen_mov_i64(xbl, cpu_vsrl(xB(ctx->opcode)));         \
-        tcg_gen_movi_i64(sgm, sgn_mask);                         \
-        switch (op) {                                            \
-            case OP_ABS: {                                       \
-                tcg_gen_andc_i64(xbh, xbh, sgm);                 \
-                tcg_gen_andc_i64(xbl, xbl, sgm);                 \
-                break;                                           \
-            }                                                    \
-            case OP_NABS: {                                      \
-                tcg_gen_or_i64(xbh, xbh, sgm);                   \
-                tcg_gen_or_i64(xbl, xbl, sgm);                   \
-                break;                                           \
-            }                                                    \
-            case OP_NEG: {                                       \
-                tcg_gen_xor_i64(xbh, xbh, sgm);                  \
-                tcg_gen_xor_i64(xbl, xbl, sgm);                  \
-                break;                                           \
-            }                                                    \
-            case OP_CPSGN: {                                     \
-                TCGv_i64 xah = tcg_temp_new_i64();               \
-                TCGv_i64 xal = tcg_temp_new_i64();               \
-                tcg_gen_mov_i64(xah, cpu_vsrh(xA(ctx->opcode))); \
-                tcg_gen_mov_i64(xal, cpu_vsrl(xA(ctx->opcode))); \
-                tcg_gen_and_i64(xah, xah, sgm);                  \
-                tcg_gen_and_i64(xal, xal, sgm);                  \
-                tcg_gen_andc_i64(xbh, xbh, sgm);                 \
-                tcg_gen_andc_i64(xbl, xbl, sgm);                 \
-                tcg_gen_or_i64(xbh, xbh, xah);                   \
-                tcg_gen_or_i64(xbl, xbl, xal);                   \
-                tcg_temp_free_i64(xah);                          \
-                tcg_temp_free_i64(xal);                          \
-                break;                                           \
-            }                                                    \
-        }                                                        \
-        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xbh);         \
-        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xbl);         \
-        tcg_temp_free_i64(xbh);                                  \
-        tcg_temp_free_i64(xbl);                                  \
-        tcg_temp_free_i64(sgm);                                  \
-    }
-
-VSX_VECTOR_MOVE(xvabsdp, OP_ABS, SGN_MASK_DP)
-VSX_VECTOR_MOVE(xvnabsdp, OP_NABS, SGN_MASK_DP)
-VSX_VECTOR_MOVE(xvnegdp, OP_NEG, SGN_MASK_DP)
-VSX_VECTOR_MOVE(xvcpsgndp, OP_CPSGN, SGN_MASK_DP)
-VSX_VECTOR_MOVE(xvabssp, OP_ABS, SGN_MASK_SP)
-VSX_VECTOR_MOVE(xvnabssp, OP_NABS, SGN_MASK_SP)
-VSX_VECTOR_MOVE(xvnegsp, OP_NEG, SGN_MASK_SP)
-VSX_VECTOR_MOVE(xvcpsgnsp, OP_CPSGN, SGN_MASK_SP)
-
-#define GEN_VSX_HELPER_2(name, op1, op2, inval, type)                         \
-static void gen_##name(DisasContext * ctx)                                    \
-{                                                                             \
-    TCGv_i32 opc;                                                             \
-    if (unlikely(!ctx->vsx_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                                \
-        return;                                                               \
-    }                                                                         \
-    /* NIP cannot be restored if the memory exception comes from an helper */ \
-    gen_update_nip(ctx, ctx->nip - 4);                                        \
-    opc = tcg_const_i32(ctx->opcode);                                         \
-    gen_helper_##name(cpu_env, opc);                                          \
-    tcg_temp_free_i32(opc);                                                   \
-}
-
-#define GEN_VSX_HELPER_XT_XB_ENV(name, op1, op2, inval, type) \
-static void gen_##name(DisasContext * ctx)                    \
-{                                                             \
-    if (unlikely(!ctx->vsx_enabled)) {                        \
-        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
-        return;                                               \
-    }                                                         \
-    /* NIP cannot be restored if the exception comes */       \
-    /* from a helper. */                                      \
-    gen_update_nip(ctx, ctx->nip - 4);                        \
-                                                              \
-    gen_helper_##name(cpu_vsrh(xT(ctx->opcode)), cpu_env,     \
-                      cpu_vsrh(xB(ctx->opcode)));             \
-}
-
-GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmaddadp, 0x04, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmaddmdp, 0x04, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmsubadp, 0x04, 0x06, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmsubmdp, 0x04, 0x07, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsnmaddadp, 0x04, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsnmaddmdp, 0x04, 0x15, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
-GEN_VSX_HELPER_XT_XB_ENV(xscvdpspn, 0x16, 0x10, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_XT_XB_ENV(xscvspdpn, 0x16, 0x14, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xscvdpsxds, 0x10, 0x15, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvdpsxws, 0x10, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrdpi, 0x12, 0x04, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrdpic, 0x16, 0x06, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrdpim, 0x12, 0x07, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
-GEN_VSX_HELPER_XT_XB_ENV(xsrsp, 0x12, 0x11, 0, PPC2_VSX207)
-
-GEN_VSX_HELPER_2(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xssubsp, 0x00, 0x01, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsmaddasp, 0x04, 0x00, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsmaddmsp, 0x04, 0x01, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsmsubasp, 0x04, 0x02, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsmsubmsp, 0x04, 0x03, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsnmaddasp, 0x04, 0x10, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsnmaddmsp, 0x04, 0x11, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsnmsubasp, 0x04, 0x12, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xsnmsubmsp, 0x04, 0x13, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207)
-GEN_VSX_HELPER_2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207)
-
-GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaddadp, 0x04, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaddmdp, 0x04, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmsubadp, 0x04, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmsubmdp, 0x04, 0x0F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvdpuxds, 0x10, 0x1C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvdpuxws, 0x10, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrdpi, 0x12, 0x0C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrdpic, 0x16, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
-
-GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaddasp, 0x04, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaddmsp, 0x04, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmsubasp, 0x04, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmsubmsp, 0x04, 0x0B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvspsxds, 0x10, 0x19, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvspsxws, 0x10, 0x09, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvspuxds, 0x10, 0x18, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvspuxws, 0x10, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrspi, 0x12, 0x08, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
-GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
-
-#define VSX_LOGICAL(name, tcg_op)                                    \
-static void glue(gen_, name)(DisasContext * ctx)                     \
-    {                                                                \
-        if (unlikely(!ctx->vsx_enabled)) {                           \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);                   \
-            return;                                                  \
-        }                                                            \
-        tcg_op(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)), \
-            cpu_vsrh(xB(ctx->opcode)));                              \
-        tcg_op(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)), \
-            cpu_vsrl(xB(ctx->opcode)));                              \
-    }
-
-VSX_LOGICAL(xxland, tcg_gen_and_i64)
-VSX_LOGICAL(xxlandc, tcg_gen_andc_i64)
-VSX_LOGICAL(xxlor, tcg_gen_or_i64)
-VSX_LOGICAL(xxlxor, tcg_gen_xor_i64)
-VSX_LOGICAL(xxlnor, tcg_gen_nor_i64)
-VSX_LOGICAL(xxleqv, tcg_gen_eqv_i64)
-VSX_LOGICAL(xxlnand, tcg_gen_nand_i64)
-VSX_LOGICAL(xxlorc, tcg_gen_orc_i64)
-
-#define VSX_XXMRG(name, high)                               \
-static void glue(gen_, name)(DisasContext * ctx)            \
-    {                                                       \
-        TCGv_i64 a0, a1, b0, b1;                            \
-        if (unlikely(!ctx->vsx_enabled)) {                  \
-            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
-            return;                                         \
-        }                                                   \
-        a0 = tcg_temp_new_i64();                            \
-        a1 = tcg_temp_new_i64();                            \
-        b0 = tcg_temp_new_i64();                            \
-        b1 = tcg_temp_new_i64();                            \
-        if (high) {                                         \
-            tcg_gen_mov_i64(a0, cpu_vsrh(xA(ctx->opcode))); \
-            tcg_gen_mov_i64(a1, cpu_vsrh(xA(ctx->opcode))); \
-            tcg_gen_mov_i64(b0, cpu_vsrh(xB(ctx->opcode))); \
-            tcg_gen_mov_i64(b1, cpu_vsrh(xB(ctx->opcode))); \
-        } else {                                            \
-            tcg_gen_mov_i64(a0, cpu_vsrl(xA(ctx->opcode))); \
-            tcg_gen_mov_i64(a1, cpu_vsrl(xA(ctx->opcode))); \
-            tcg_gen_mov_i64(b0, cpu_vsrl(xB(ctx->opcode))); \
-            tcg_gen_mov_i64(b1, cpu_vsrl(xB(ctx->opcode))); \
-        }                                                   \
-        tcg_gen_shri_i64(a0, a0, 32);                       \
-        tcg_gen_shri_i64(b0, b0, 32);                       \
-        tcg_gen_deposit_i64(cpu_vsrh(xT(ctx->opcode)),      \
-                            b0, a0, 32, 32);                \
-        tcg_gen_deposit_i64(cpu_vsrl(xT(ctx->opcode)),      \
-                            b1, a1, 32, 32);                \
-        tcg_temp_free_i64(a0);                              \
-        tcg_temp_free_i64(a1);                              \
-        tcg_temp_free_i64(b0);                              \
-        tcg_temp_free_i64(b1);                              \
-    }
-
-VSX_XXMRG(xxmrghw, 1)
-VSX_XXMRG(xxmrglw, 0)
-
-static void gen_xxsel(DisasContext * ctx)
-{
-    TCGv_i64 a, b, c;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    a = tcg_temp_new_i64();
-    b = tcg_temp_new_i64();
-    c = tcg_temp_new_i64();
-
-    tcg_gen_mov_i64(a, cpu_vsrh(xA(ctx->opcode)));
-    tcg_gen_mov_i64(b, cpu_vsrh(xB(ctx->opcode)));
-    tcg_gen_mov_i64(c, cpu_vsrh(xC(ctx->opcode)));
-
-    tcg_gen_and_i64(b, b, c);
-    tcg_gen_andc_i64(a, a, c);
-    tcg_gen_or_i64(cpu_vsrh(xT(ctx->opcode)), a, b);
-
-    tcg_gen_mov_i64(a, cpu_vsrl(xA(ctx->opcode)));
-    tcg_gen_mov_i64(b, cpu_vsrl(xB(ctx->opcode)));
-    tcg_gen_mov_i64(c, cpu_vsrl(xC(ctx->opcode)));
-
-    tcg_gen_and_i64(b, b, c);
-    tcg_gen_andc_i64(a, a, c);
-    tcg_gen_or_i64(cpu_vsrl(xT(ctx->opcode)), a, b);
-
-    tcg_temp_free_i64(a);
-    tcg_temp_free_i64(b);
-    tcg_temp_free_i64(c);
-}
-
-static void gen_xxspltw(DisasContext *ctx)
-{
-    TCGv_i64 b, b2;
-    TCGv_i64 vsr = (UIM(ctx->opcode) & 2) ?
-                   cpu_vsrl(xB(ctx->opcode)) :
-                   cpu_vsrh(xB(ctx->opcode));
-
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-
-    b = tcg_temp_new_i64();
-    b2 = tcg_temp_new_i64();
-
-    if (UIM(ctx->opcode) & 1) {
-        tcg_gen_ext32u_i64(b, vsr);
-    } else {
-        tcg_gen_shri_i64(b, vsr, 32);
-    }
-
-    tcg_gen_shli_i64(b2, b, 32);
-    tcg_gen_or_i64(cpu_vsrh(xT(ctx->opcode)), b, b2);
-    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xT(ctx->opcode)));
-
-    tcg_temp_free_i64(b);
-    tcg_temp_free_i64(b2);
-}
-
-static void gen_xxsldwi(DisasContext *ctx)
-{
-    TCGv_i64 xth, xtl;
-    if (unlikely(!ctx->vsx_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VSXU);
-        return;
-    }
-    xth = tcg_temp_new_i64();
-    xtl = tcg_temp_new_i64();
-
-    switch (SHW(ctx->opcode)) {
-        case 0: {
-            tcg_gen_mov_i64(xth, cpu_vsrh(xA(ctx->opcode)));
-            tcg_gen_mov_i64(xtl, cpu_vsrl(xA(ctx->opcode)));
-            break;
-        }
-        case 1: {
-            TCGv_i64 t0 = tcg_temp_new_i64();
-            tcg_gen_mov_i64(xth, cpu_vsrh(xA(ctx->opcode)));
-            tcg_gen_shli_i64(xth, xth, 32);
-            tcg_gen_mov_i64(t0, cpu_vsrl(xA(ctx->opcode)));
-            tcg_gen_shri_i64(t0, t0, 32);
-            tcg_gen_or_i64(xth, xth, t0);
-            tcg_gen_mov_i64(xtl, cpu_vsrl(xA(ctx->opcode)));
-            tcg_gen_shli_i64(xtl, xtl, 32);
-            tcg_gen_mov_i64(t0, cpu_vsrh(xB(ctx->opcode)));
-            tcg_gen_shri_i64(t0, t0, 32);
-            tcg_gen_or_i64(xtl, xtl, t0);
-            tcg_temp_free_i64(t0);
-            break;
-        }
-        case 2: {
-            tcg_gen_mov_i64(xth, cpu_vsrl(xA(ctx->opcode)));
-            tcg_gen_mov_i64(xtl, cpu_vsrh(xB(ctx->opcode)));
-            break;
-        }
-        case 3: {
-            TCGv_i64 t0 = tcg_temp_new_i64();
-            tcg_gen_mov_i64(xth, cpu_vsrl(xA(ctx->opcode)));
-            tcg_gen_shli_i64(xth, xth, 32);
-            tcg_gen_mov_i64(t0, cpu_vsrh(xB(ctx->opcode)));
-            tcg_gen_shri_i64(t0, t0, 32);
-            tcg_gen_or_i64(xth, xth, t0);
-            tcg_gen_mov_i64(xtl, cpu_vsrh(xB(ctx->opcode)));
-            tcg_gen_shli_i64(xtl, xtl, 32);
-            tcg_gen_mov_i64(t0, cpu_vsrl(xB(ctx->opcode)));
-            tcg_gen_shri_i64(t0, t0, 32);
-            tcg_gen_or_i64(xtl, xtl, t0);
-            tcg_temp_free_i64(t0);
-            break;
-        }
-    }
-
-    tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xth);
-    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xtl);
-
-    tcg_temp_free_i64(xth);
-    tcg_temp_free_i64(xtl);
-}
-
-/*** Decimal Floating Point ***/
-
-static inline TCGv_ptr gen_fprp_ptr(int reg)
-{
-    TCGv_ptr r = tcg_temp_new_ptr();
-    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, fpr[reg]));
-    return r;
-}
-
-#define GEN_DFP_T_A_B_Rc(name)                   \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rd, ra, rb;                         \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->nip - 4);           \
-    rd = gen_fprp_ptr(rD(ctx->opcode));          \
-    ra = gen_fprp_ptr(rA(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    gen_helper_##name(cpu_env, rd, ra, rb);      \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rd);                       \
-    tcg_temp_free_ptr(ra);                       \
-    tcg_temp_free_ptr(rb);                       \
-}
-
-#define GEN_DFP_BF_A_B(name)                      \
-static void gen_##name(DisasContext *ctx)         \
-{                                                 \
-    TCGv_ptr ra, rb;                              \
-    if (unlikely(!ctx->fpu_enabled)) {            \
-        gen_exception(ctx, POWERPC_EXCP_FPU);     \
-        return;                                   \
-    }                                             \
-    gen_update_nip(ctx, ctx->nip - 4);            \
-    ra = gen_fprp_ptr(rA(ctx->opcode));           \
-    rb = gen_fprp_ptr(rB(ctx->opcode));           \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, rb);           \
-    tcg_temp_free_ptr(ra);                        \
-    tcg_temp_free_ptr(rb);                        \
-}
-
-#define GEN_DFP_BF_A_DCM(name)                    \
-static void gen_##name(DisasContext *ctx)         \
-{                                                 \
-    TCGv_ptr ra;                                  \
-    TCGv_i32 dcm;                                 \
-    if (unlikely(!ctx->fpu_enabled)) {            \
-        gen_exception(ctx, POWERPC_EXCP_FPU);     \
-        return;                                   \
-    }                                             \
-    gen_update_nip(ctx, ctx->nip - 4);            \
-    ra = gen_fprp_ptr(rA(ctx->opcode));           \
-    dcm = tcg_const_i32(DCM(ctx->opcode));        \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
-                      cpu_env, ra, dcm);          \
-    tcg_temp_free_ptr(ra);                        \
-    tcg_temp_free_i32(dcm);                       \
-}
-
-#define GEN_DFP_T_B_U32_U32_Rc(name, u32f1, u32f2)    \
-static void gen_##name(DisasContext *ctx)             \
-{                                                     \
-    TCGv_ptr rt, rb;                                  \
-    TCGv_i32 u32_1, u32_2;                            \
-    if (unlikely(!ctx->fpu_enabled)) {                \
-        gen_exception(ctx, POWERPC_EXCP_FPU);         \
-        return;                                       \
-    }                                                 \
-    gen_update_nip(ctx, ctx->nip - 4);                \
-    rt = gen_fprp_ptr(rD(ctx->opcode));               \
-    rb = gen_fprp_ptr(rB(ctx->opcode));               \
-    u32_1 = tcg_const_i32(u32f1(ctx->opcode));        \
-    u32_2 = tcg_const_i32(u32f2(ctx->opcode));        \
-    gen_helper_##name(cpu_env, rt, rb, u32_1, u32_2); \
-    if (unlikely(Rc(ctx->opcode) != 0)) {             \
-        gen_set_cr1_from_fpscr(ctx);                  \
-    }                                                 \
-    tcg_temp_free_ptr(rt);                            \
-    tcg_temp_free_ptr(rb);                            \
-    tcg_temp_free_i32(u32_1);                         \
-    tcg_temp_free_i32(u32_2);                         \
-}
-
-#define GEN_DFP_T_A_B_I32_Rc(name, i32fld)       \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rt, ra, rb;                         \
-    TCGv_i32 i32;                                \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->nip - 4);           \
-    rt = gen_fprp_ptr(rD(ctx->opcode));          \
-    ra = gen_fprp_ptr(rA(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    i32 = tcg_const_i32(i32fld(ctx->opcode));    \
-    gen_helper_##name(cpu_env, rt, ra, rb, i32); \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rt);                       \
-    tcg_temp_free_ptr(rb);                       \
-    tcg_temp_free_ptr(ra);                       \
-    tcg_temp_free_i32(i32);                      \
-    }
-
-#define GEN_DFP_T_B_Rc(name)                     \
-static void gen_##name(DisasContext *ctx)        \
-{                                                \
-    TCGv_ptr rt, rb;                             \
-    if (unlikely(!ctx->fpu_enabled)) {           \
-        gen_exception(ctx, POWERPC_EXCP_FPU);    \
-        return;                                  \
-    }                                            \
-    gen_update_nip(ctx, ctx->nip - 4);           \
-    rt = gen_fprp_ptr(rD(ctx->opcode));          \
-    rb = gen_fprp_ptr(rB(ctx->opcode));          \
-    gen_helper_##name(cpu_env, rt, rb);          \
-    if (unlikely(Rc(ctx->opcode) != 0)) {        \
-        gen_set_cr1_from_fpscr(ctx);             \
-    }                                            \
-    tcg_temp_free_ptr(rt);                       \
-    tcg_temp_free_ptr(rb);                       \
-    }
-
-#define GEN_DFP_T_FPR_I32_Rc(name, fprfld, i32fld) \
-static void gen_##name(DisasContext *ctx)          \
-{                                                  \
-    TCGv_ptr rt, rs;                               \
-    TCGv_i32 i32;                                  \
-    if (unlikely(!ctx->fpu_enabled)) {             \
-        gen_exception(ctx, POWERPC_EXCP_FPU);      \
-        return;                                    \
-    }                                              \
-    gen_update_nip(ctx, ctx->nip - 4);             \
-    rt = gen_fprp_ptr(rD(ctx->opcode));            \
-    rs = gen_fprp_ptr(fprfld(ctx->opcode));        \
-    i32 = tcg_const_i32(i32fld(ctx->opcode));      \
-    gen_helper_##name(cpu_env, rt, rs, i32);       \
-    if (unlikely(Rc(ctx->opcode) != 0)) {          \
-        gen_set_cr1_from_fpscr(ctx);               \
-    }                                              \
-    tcg_temp_free_ptr(rt);                         \
-    tcg_temp_free_ptr(rs);                         \
-    tcg_temp_free_i32(i32);                        \
-}
-
-GEN_DFP_T_A_B_Rc(dadd)
-GEN_DFP_T_A_B_Rc(daddq)
-GEN_DFP_T_A_B_Rc(dsub)
-GEN_DFP_T_A_B_Rc(dsubq)
-GEN_DFP_T_A_B_Rc(dmul)
-GEN_DFP_T_A_B_Rc(dmulq)
-GEN_DFP_T_A_B_Rc(ddiv)
-GEN_DFP_T_A_B_Rc(ddivq)
-GEN_DFP_BF_A_B(dcmpu)
-GEN_DFP_BF_A_B(dcmpuq)
-GEN_DFP_BF_A_B(dcmpo)
-GEN_DFP_BF_A_B(dcmpoq)
-GEN_DFP_BF_A_DCM(dtstdc)
-GEN_DFP_BF_A_DCM(dtstdcq)
-GEN_DFP_BF_A_DCM(dtstdg)
-GEN_DFP_BF_A_DCM(dtstdgq)
-GEN_DFP_BF_A_B(dtstex)
-GEN_DFP_BF_A_B(dtstexq)
-GEN_DFP_BF_A_B(dtstsf)
-GEN_DFP_BF_A_B(dtstsfq)
-GEN_DFP_T_B_U32_U32_Rc(dquai, SIMM5, RMC)
-GEN_DFP_T_B_U32_U32_Rc(dquaiq, SIMM5, RMC)
-GEN_DFP_T_A_B_I32_Rc(dqua, RMC)
-GEN_DFP_T_A_B_I32_Rc(dquaq, RMC)
-GEN_DFP_T_A_B_I32_Rc(drrnd, RMC)
-GEN_DFP_T_A_B_I32_Rc(drrndq, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintx, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintxq, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintn, FPW, RMC)
-GEN_DFP_T_B_U32_U32_Rc(drintnq, FPW, RMC)
-GEN_DFP_T_B_Rc(dctdp)
-GEN_DFP_T_B_Rc(dctqpq)
-GEN_DFP_T_B_Rc(drsp)
-GEN_DFP_T_B_Rc(drdpq)
-GEN_DFP_T_B_Rc(dcffix)
-GEN_DFP_T_B_Rc(dcffixq)
-GEN_DFP_T_B_Rc(dctfix)
-GEN_DFP_T_B_Rc(dctfixq)
-GEN_DFP_T_FPR_I32_Rc(ddedpd, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(ddedpdq, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(denbcd, rB, SP)
-GEN_DFP_T_FPR_I32_Rc(denbcdq, rB, SP)
-GEN_DFP_T_B_Rc(dxex)
-GEN_DFP_T_B_Rc(dxexq)
-GEN_DFP_T_A_B_Rc(diex)
-GEN_DFP_T_A_B_Rc(diexq)
-GEN_DFP_T_FPR_I32_Rc(dscli, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscliq, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscri, rA, DCM)
-GEN_DFP_T_FPR_I32_Rc(dscriq, rA, DCM)
-
-/***                           SPE extension                               ***/
-/* Register moves */
-
-static inline void gen_evmra(DisasContext *ctx)
-{
-
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    /* tmp := rA_lo + rA_hi << 32 */
-    tcg_gen_concat_tl_i64(tmp, cpu_gpr[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-
-    /* spe_acc := tmp */
-    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
-    tcg_temp_free_i64(tmp);
-
-    /* rD := rA */
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-}
-
-static inline void gen_load_gpr64(TCGv_i64 t, int reg)
-{
-    tcg_gen_concat_tl_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
-}
-
-static inline void gen_store_gpr64(int reg, TCGv_i64 t)
-{
-    tcg_gen_extr_i64_tl(cpu_gpr[reg], cpu_gprh[reg], t);
-}
-
-#define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type)         \
-static void glue(gen_, name0##_##name1)(DisasContext *ctx)                    \
-{                                                                             \
-    if (Rc(ctx->opcode))                                                      \
-        gen_##name1(ctx);                                                     \
-    else                                                                      \
-        gen_##name0(ctx);                                                     \
-}
-
-/* Handler for undefined SPE opcodes */
-static inline void gen_speundef(DisasContext *ctx)
-{
-    gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
-}
-
-/* SPE logic */
-#define GEN_SPEOP_LOGIC2(name, tcg_op)                                        \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    tcg_op(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],                \
-           cpu_gpr[rB(ctx->opcode)]);                                         \
-    tcg_op(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],              \
-           cpu_gprh[rB(ctx->opcode)]);                                        \
-}
-
-GEN_SPEOP_LOGIC2(evand, tcg_gen_and_tl);
-GEN_SPEOP_LOGIC2(evandc, tcg_gen_andc_tl);
-GEN_SPEOP_LOGIC2(evxor, tcg_gen_xor_tl);
-GEN_SPEOP_LOGIC2(evor, tcg_gen_or_tl);
-GEN_SPEOP_LOGIC2(evnor, tcg_gen_nor_tl);
-GEN_SPEOP_LOGIC2(eveqv, tcg_gen_eqv_tl);
-GEN_SPEOP_LOGIC2(evorc, tcg_gen_orc_tl);
-GEN_SPEOP_LOGIC2(evnand, tcg_gen_nand_tl);
-
-/* SPE logic immediate */
-#define GEN_SPEOP_TCG_LOGIC_IMM2(name, tcg_opi)                               \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0;                                                              \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
-    tcg_opi(t0, t0, rB(ctx->opcode));                                         \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
-    tcg_opi(t0, t0, rB(ctx->opcode));                                         \
-    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-}
-GEN_SPEOP_TCG_LOGIC_IMM2(evslwi, tcg_gen_shli_i32);
-GEN_SPEOP_TCG_LOGIC_IMM2(evsrwiu, tcg_gen_shri_i32);
-GEN_SPEOP_TCG_LOGIC_IMM2(evsrwis, tcg_gen_sari_i32);
-GEN_SPEOP_TCG_LOGIC_IMM2(evrlwi, tcg_gen_rotli_i32);
-
-/* SPE arithmetic */
-#define GEN_SPEOP_ARITH1(name, tcg_op)                                        \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0;                                                              \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
-    tcg_op(t0, t0);                                                           \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
-    tcg_op(t0, t0);                                                           \
-    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-}
-
-static inline void gen_op_evabs(TCGv_i32 ret, TCGv_i32 arg1)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-
-    tcg_gen_brcondi_i32(TCG_COND_GE, arg1, 0, l1);
-    tcg_gen_neg_i32(ret, arg1);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_i32(ret, arg1);
-    gen_set_label(l2);
-}
-GEN_SPEOP_ARITH1(evabs, gen_op_evabs);
-GEN_SPEOP_ARITH1(evneg, tcg_gen_neg_i32);
-GEN_SPEOP_ARITH1(evextsb, tcg_gen_ext8s_i32);
-GEN_SPEOP_ARITH1(evextsh, tcg_gen_ext16s_i32);
-static inline void gen_op_evrndw(TCGv_i32 ret, TCGv_i32 arg1)
-{
-    tcg_gen_addi_i32(ret, arg1, 0x8000);
-    tcg_gen_ext16u_i32(ret, ret);
-}
-GEN_SPEOP_ARITH1(evrndw, gen_op_evrndw);
-GEN_SPEOP_ARITH1(evcntlsw, gen_helper_cntlsw32);
-GEN_SPEOP_ARITH1(evcntlzw, gen_helper_cntlzw32);
-
-#define GEN_SPEOP_ARITH2(name, tcg_op)                                        \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-    t1 = tcg_temp_new_i32();                                                  \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    tcg_op(t0, t0, t1);                                                       \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
-    tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]);                      \
-    tcg_op(t0, t0, t1);                                                       \
-    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
-}
-
-static inline void gen_op_evsrwu(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-
-    /* No error here: 6 bits are used */
-    tcg_gen_andi_i32(t0, arg2, 0x3F);
-    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
-    tcg_gen_shr_i32(ret, arg1, t0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i32(ret, 0);
-    gen_set_label(l2);
-    tcg_temp_free_i32(t0);
-}
-GEN_SPEOP_ARITH2(evsrwu, gen_op_evsrwu);
-static inline void gen_op_evsrws(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-
-    /* No error here: 6 bits are used */
-    tcg_gen_andi_i32(t0, arg2, 0x3F);
-    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
-    tcg_gen_sar_i32(ret, arg1, t0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i32(ret, 0);
-    gen_set_label(l2);
-    tcg_temp_free_i32(t0);
-}
-GEN_SPEOP_ARITH2(evsrws, gen_op_evsrws);
-static inline void gen_op_evslw(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-
-    /* No error here: 6 bits are used */
-    tcg_gen_andi_i32(t0, arg2, 0x3F);
-    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
-    tcg_gen_shl_i32(ret, arg1, t0);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_movi_i32(ret, 0);
-    gen_set_label(l2);
-    tcg_temp_free_i32(t0);
-}
-GEN_SPEOP_ARITH2(evslw, gen_op_evslw);
-static inline void gen_op_evrlw(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    TCGv_i32 t0 = tcg_temp_new_i32();
-    tcg_gen_andi_i32(t0, arg2, 0x1F);
-    tcg_gen_rotl_i32(ret, arg1, t0);
-    tcg_temp_free_i32(t0);
-}
-GEN_SPEOP_ARITH2(evrlw, gen_op_evrlw);
-static inline void gen_evmergehi(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-}
-GEN_SPEOP_ARITH2(evaddw, tcg_gen_add_i32);
-static inline void gen_op_evsubf(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
-{
-    tcg_gen_sub_i32(ret, arg2, arg1);
-}
-GEN_SPEOP_ARITH2(evsubfw, gen_op_evsubf);
-
-/* SPE arithmetic immediate */
-#define GEN_SPEOP_ARITH_IMM2(name, tcg_op)                                    \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0;                                                              \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);                       \
-    tcg_op(t0, t0, rA(ctx->opcode));                                          \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rB(ctx->opcode)]);                      \
-    tcg_op(t0, t0, rA(ctx->opcode));                                          \
-    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-}
-GEN_SPEOP_ARITH_IMM2(evaddiw, tcg_gen_addi_i32);
-GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
-
-/* SPE comparison */
-#define GEN_SPEOP_COMP(name, tcg_cond)                                        \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    TCGLabel *l1 = gen_new_label();                                           \
-    TCGLabel *l2 = gen_new_label();                                           \
-    TCGLabel *l3 = gen_new_label();                                           \
-    TCGLabel *l4 = gen_new_label();                                           \
-                                                                              \
-    tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
-    tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
-    tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
-    tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
-                                                                              \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
-                       cpu_gpr[rB(ctx->opcode)], l1);                         \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
-    tcg_gen_br(l2);                                                           \
-    gen_set_label(l1);                                                        \
-    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
-                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
-    gen_set_label(l2);                                                        \
-    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
-                       cpu_gprh[rB(ctx->opcode)], l3);                        \
-    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
-                     ~(CRF_CH | CRF_CH_AND_CL));                              \
-    tcg_gen_br(l4);                                                           \
-    gen_set_label(l3);                                                        \
-    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
-                    CRF_CH | CRF_CH_OR_CL);                                   \
-    gen_set_label(l4);                                                        \
-}
-GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
-GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
-GEN_SPEOP_COMP(evcmpltu, TCG_COND_LTU);
-GEN_SPEOP_COMP(evcmplts, TCG_COND_LT);
-GEN_SPEOP_COMP(evcmpeq, TCG_COND_EQ);
-
-/* SPE misc */
-static inline void gen_brinc(DisasContext *ctx)
-{
-    /* Note: brinc is usable even if SPE is disabled */
-    gen_helper_brinc(cpu_gpr[rD(ctx->opcode)],
-                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-}
-static inline void gen_evmergelo(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-}
-static inline void gen_evmergehilo(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-}
-static inline void gen_evmergelohi(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    if (rD(ctx->opcode) == rA(ctx->opcode)) {
-        TCGv tmp = tcg_temp_new();
-        tcg_gen_mov_tl(tmp, cpu_gpr[rA(ctx->opcode)]);
-        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-        tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], tmp);
-        tcg_temp_free(tmp);
-    } else {
-        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-        tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    }
-}
-static inline void gen_evsplati(DisasContext *ctx)
-{
-    uint64_t imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27;
-
-    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
-    tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
-}
-static inline void gen_evsplatfi(DisasContext *ctx)
-{
-    uint64_t imm = rA(ctx->opcode) << 27;
-
-    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
-    tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
-}
-
-static inline void gen_evsel(DisasContext *ctx)
-{
-    TCGLabel *l1 = gen_new_label();
-    TCGLabel *l2 = gen_new_label();
-    TCGLabel *l3 = gen_new_label();
-    TCGLabel *l4 = gen_new_label();
-    TCGv_i32 t0 = tcg_temp_local_new_i32();
-
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-    gen_set_label(l2);
-    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
-    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_br(l4);
-    gen_set_label(l3);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
-    gen_set_label(l4);
-    tcg_temp_free_i32(t0);
-}
-
-static void gen_evsel0(DisasContext *ctx)
-{
-    gen_evsel(ctx);
-}
-
-static void gen_evsel1(DisasContext *ctx)
-{
-    gen_evsel(ctx);
-}
-
-static void gen_evsel2(DisasContext *ctx)
-{
-    gen_evsel(ctx);
-}
-
-static void gen_evsel3(DisasContext *ctx)
-{
-    gen_evsel(ctx);
-}
-
-/* Multiply */
-
-static inline void gen_evmwumi(DisasContext *ctx)
-{
-    TCGv_i64 t0, t1;
-
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    /* t0 := rA; t1 := rB */
-    tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32u_i64(t0, t0);
-    tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_ext32u_i64(t1, t1);
-
-    tcg_gen_mul_i64(t0, t0, t1);  /* t0 := rA * rB */
-
-    gen_store_gpr64(rD(ctx->opcode), t0); /* rD := t0 */
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-static inline void gen_evmwumia(DisasContext *ctx)
-{
-    TCGv_i64 tmp;
-
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-
-    gen_evmwumi(ctx);            /* rD := rA * rB */
-
-    tmp = tcg_temp_new_i64();
-
-    /* acc := rD */
-    gen_load_gpr64(tmp, rD(ctx->opcode));
-    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
-    tcg_temp_free_i64(tmp);
-}
-
-static inline void gen_evmwumiaa(DisasContext *ctx)
-{
-    TCGv_i64 acc;
-    TCGv_i64 tmp;
-
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-
-    gen_evmwumi(ctx);           /* rD := rA * rB */
-
-    acc = tcg_temp_new_i64();
-    tmp = tcg_temp_new_i64();
-
-    /* tmp := rD */
-    gen_load_gpr64(tmp, rD(ctx->opcode));
-
-    /* Load acc */
-    tcg_gen_ld_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
-
-    /* acc := tmp + acc */
-    tcg_gen_add_i64(acc, acc, tmp);
-
-    /* Store acc */
-    tcg_gen_st_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
-
-    /* rD := acc */
-    gen_store_gpr64(rD(ctx->opcode), acc);
-
-    tcg_temp_free_i64(acc);
-    tcg_temp_free_i64(tmp);
-}
-
-static inline void gen_evmwsmi(DisasContext *ctx)
-{
-    TCGv_i64 t0, t1;
-
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-
-    t0 = tcg_temp_new_i64();
-    t1 = tcg_temp_new_i64();
-
-    /* t0 := rA; t1 := rB */
-    tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ext32s_i64(t0, t0);
-    tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_ext32s_i64(t1, t1);
-
-    tcg_gen_mul_i64(t0, t0, t1);  /* t0 := rA * rB */
-
-    gen_store_gpr64(rD(ctx->opcode), t0); /* rD := t0 */
-
-    tcg_temp_free_i64(t0);
-    tcg_temp_free_i64(t1);
-}
-
-static inline void gen_evmwsmia(DisasContext *ctx)
-{
-    TCGv_i64 tmp;
-
-    gen_evmwsmi(ctx);            /* rD := rA * rB */
-
-    tmp = tcg_temp_new_i64();
-
-    /* acc := rD */
-    gen_load_gpr64(tmp, rD(ctx->opcode));
-    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
-
-    tcg_temp_free_i64(tmp);
-}
-
-static inline void gen_evmwsmiaa(DisasContext *ctx)
-{
-    TCGv_i64 acc = tcg_temp_new_i64();
-    TCGv_i64 tmp = tcg_temp_new_i64();
-
-    gen_evmwsmi(ctx);           /* rD := rA * rB */
-
-    acc = tcg_temp_new_i64();
-    tmp = tcg_temp_new_i64();
-
-    /* tmp := rD */
-    gen_load_gpr64(tmp, rD(ctx->opcode));
-
-    /* Load acc */
-    tcg_gen_ld_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
-
-    /* acc := tmp + acc */
-    tcg_gen_add_i64(acc, acc, tmp);
-
-    /* Store acc */
-    tcg_gen_st_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
-
-    /* rD := acc */
-    gen_store_gpr64(rD(ctx->opcode), acc);
-
-    tcg_temp_free_i64(acc);
-    tcg_temp_free_i64(tmp);
-}
-
-GEN_SPE(evaddw,      speundef,    0x00, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
-GEN_SPE(evaddiw,     speundef,    0x01, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(evsubfw,     speundef,    0x02, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
-GEN_SPE(evsubifw,    speundef,    0x03, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(evabs,       evneg,       0x04, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
-GEN_SPE(evextsb,     evextsh,     0x05, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
-GEN_SPE(evrndw,      evcntlzw,    0x06, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
-GEN_SPE(evcntlsw,    brinc,       0x07, 0x08, 0x0000F800, 0x00000000, PPC_SPE); //
-GEN_SPE(evmra,       speundef,    0x02, 0x13, 0x0000F800, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(speundef,    evand,       0x08, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE); ////
-GEN_SPE(evandc,      speundef,    0x09, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
-GEN_SPE(evxor,       evor,        0x0B, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
-GEN_SPE(evnor,       eveqv,       0x0C, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
-GEN_SPE(evmwumi,     evmwsmi,     0x0C, 0x11, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(evmwumia,    evmwsmia,    0x1C, 0x11, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(evmwumiaa,   evmwsmiaa,   0x0C, 0x15, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,    evorc,       0x0D, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE); ////
-GEN_SPE(evnand,      speundef,    0x0F, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
-GEN_SPE(evsrwu,      evsrws,      0x10, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
-GEN_SPE(evsrwiu,     evsrwis,     0x11, 0x08, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(evslw,       speundef,    0x12, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
-GEN_SPE(evslwi,      speundef,    0x13, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(evrlw,       evsplati,    0x14, 0x08, 0x00000000, 0x0000F800, PPC_SPE); //
-GEN_SPE(evrlwi,      evsplatfi,   0x15, 0x08, 0x00000000, 0x0000F800, PPC_SPE);
-GEN_SPE(evmergehi,   evmergelo,   0x16, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
-GEN_SPE(evmergehilo, evmergelohi, 0x17, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
-GEN_SPE(evcmpgtu,    evcmpgts,    0x18, 0x08, 0x00600000, 0x00600000, PPC_SPE); ////
-GEN_SPE(evcmpltu,    evcmplts,    0x19, 0x08, 0x00600000, 0x00600000, PPC_SPE); ////
-GEN_SPE(evcmpeq,     speundef,    0x1A, 0x08, 0x00600000, 0xFFFFFFFF, PPC_SPE); ////
-
-/* SPE load and stores */
-static inline void gen_addr_spe_imm_index(DisasContext *ctx, TCGv EA, int sh)
-{
-    target_ulong uimm = rB(ctx->opcode);
-
-    if (rA(ctx->opcode) == 0) {
-        tcg_gen_movi_tl(EA, uimm << sh);
-    } else {
-        tcg_gen_addi_tl(EA, cpu_gpr[rA(ctx->opcode)], uimm << sh);
-        if (NARROW_MODE(ctx)) {
-            tcg_gen_ext32u_tl(EA, EA);
-        }
-    }
-}
-
-static inline void gen_op_evldd(DisasContext *ctx, TCGv addr)
-{
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    gen_qemu_ld64(ctx, t0, addr);
-    gen_store_gpr64(rD(ctx->opcode), t0);
-    tcg_temp_free_i64(t0);
-}
-
-static inline void gen_op_evldw(DisasContext *ctx, TCGv addr)
-{
-    gen_qemu_ld32u(ctx, cpu_gprh[rD(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 4);
-    gen_qemu_ld32u(ctx, cpu_gpr[rD(ctx->opcode)], addr);
-}
+    }
 
-static inline void gen_op_evldh(DisasContext *ctx, TCGv addr)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_or_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_or_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_gen_add_tl(t0, t0, cpu_gpr[rB(ctx->opcode)]);
+    gen_helper_booke206_tlbsx(cpu_env, t0);
     tcg_temp_free(t0);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evlhhesplat(DisasContext *ctx, TCGv addr)
+/* tlbwe */
+static void gen_tlbwe_booke206(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(t0, t0, 16);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
-    tcg_temp_free(t0);
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+    gen_helper_booke206_tlbwe(cpu_env);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evlhhousplat(DisasContext *ctx, TCGv addr)
+static void gen_tlbivax_booke206(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
-    tcg_temp_free(t0);
-}
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv t0;
 
-static inline void gen_op_evlhhossplat(DisasContext *ctx, TCGv addr)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16s(ctx, t0, addr);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
+    CHK_SV;
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
+    gen_helper_booke206_tlbivax(cpu_env, t0);
     tcg_temp_free(t0);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evlwhe(DisasContext *ctx, TCGv addr)
+static void gen_tlbilx_booke206(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)], t0, 16);
-    tcg_temp_free(t0);
-}
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv t0;
 
-static inline void gen_op_evlwhou(DisasContext *ctx, TCGv addr)
-{
-    gen_qemu_ld16u(ctx, cpu_gprh[rD(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, cpu_gpr[rD(ctx->opcode)], addr);
-}
+    CHK_SV;
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
 
-static inline void gen_op_evlwhos(DisasContext *ctx, TCGv addr)
-{
-    gen_qemu_ld16s(ctx, cpu_gprh[rD(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16s(ctx, cpu_gpr[rD(ctx->opcode)], addr);
-}
+    switch((ctx->opcode >> 21) & 0x3) {
+    case 0:
+        gen_helper_booke206_tlbilx0(cpu_env, t0);
+        break;
+    case 1:
+        gen_helper_booke206_tlbilx1(cpu_env, t0);
+        break;
+    case 3:
+        gen_helper_booke206_tlbilx3(cpu_env, t0);
+        break;
+    default:
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        break;
+    }
 
-static inline void gen_op_evlwwsplat(DisasContext *ctx, TCGv addr)
-{
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld32u(ctx, t0, addr);
-    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
     tcg_temp_free(t0);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evlwhsplat(DisasContext *ctx, TCGv addr)
+
+/* wrtee */
+static void gen_wrtee(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
-    tcg_gen_or_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_ld16u(ctx, t0, addr);
-    tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)], t0, 16);
-    tcg_gen_or_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    TCGv t0;
+
+    CHK_SV;
+    t0 = tcg_temp_new();
+    tcg_gen_andi_tl(t0, cpu_gpr[rD(ctx->opcode)], (1 << MSR_EE));
+    tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
+    tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
     tcg_temp_free(t0);
+    /* Stop translation to have a chance to raise an exception
+     * if we just set msr_ee to 1
+     */
+    gen_stop_exception(ctx);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evstdd(DisasContext *ctx, TCGv addr)
+/* wrteei */
+static void gen_wrteei(DisasContext *ctx)
 {
-    TCGv_i64 t0 = tcg_temp_new_i64();
-    gen_load_gpr64(t0, rS(ctx->opcode));
-    gen_qemu_st64(ctx, t0, addr);
-    tcg_temp_free_i64(t0);
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+    if (ctx->opcode & 0x00008000) {
+        tcg_gen_ori_tl(cpu_msr, cpu_msr, (1 << MSR_EE));
+        /* Stop translation to have a chance to raise an exception */
+        gen_stop_exception(ctx);
+    } else {
+        tcg_gen_andi_tl(cpu_msr, cpu_msr, ~(1 << MSR_EE));
+    }
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-static inline void gen_op_evstdw(DisasContext *ctx, TCGv addr)
-{
-    gen_qemu_st32(ctx, cpu_gprh[rS(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 4);
-    gen_qemu_st32(ctx, cpu_gpr[rS(ctx->opcode)], addr);
-}
+/* PowerPC 440 specific instructions */
 
-static inline void gen_op_evstdh(DisasContext *ctx, TCGv addr)
+/* dlmzb */
+static void gen_dlmzb(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shri_tl(t0, cpu_gprh[rS(ctx->opcode)], 16);
-    gen_qemu_st16(ctx, t0, addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_st16(ctx, cpu_gprh[rS(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    tcg_gen_shri_tl(t0, cpu_gpr[rS(ctx->opcode)], 16);
-    gen_qemu_st16(ctx, t0, addr);
-    tcg_temp_free(t0);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_st16(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+    TCGv_i32 t0 = tcg_const_i32(Rc(ctx->opcode));
+    gen_helper_dlmzb(cpu_gpr[rA(ctx->opcode)], cpu_env,
+                     cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0);
+    tcg_temp_free_i32(t0);
 }
 
-static inline void gen_op_evstwhe(DisasContext *ctx, TCGv addr)
+/* mbar replaces eieio on 440 */
+static void gen_mbar(DisasContext *ctx)
 {
-    TCGv t0 = tcg_temp_new();
-    tcg_gen_shri_tl(t0, cpu_gprh[rS(ctx->opcode)], 16);
-    gen_qemu_st16(ctx, t0, addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    tcg_gen_shri_tl(t0, cpu_gpr[rS(ctx->opcode)], 16);
-    gen_qemu_st16(ctx, t0, addr);
-    tcg_temp_free(t0);
+    /* interpreted as no-op */
 }
 
-static inline void gen_op_evstwho(DisasContext *ctx, TCGv addr)
+/* msync replaces sync on 440 */
+static void gen_msync_4xx(DisasContext *ctx)
 {
-    gen_qemu_st16(ctx, cpu_gprh[rS(ctx->opcode)], addr);
-    gen_addr_add(ctx, addr, addr, 2);
-    gen_qemu_st16(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+    /* interpreted as no-op */
 }
 
-static inline void gen_op_evstwwe(DisasContext *ctx, TCGv addr)
+/* icbt */
+static void gen_icbt_440(DisasContext *ctx)
 {
-    gen_qemu_st32(ctx, cpu_gprh[rS(ctx->opcode)], addr);
+    /* interpreted as no-op */
+    /* XXX: specification say this is treated as a load by the MMU
+     *      but does not generate any exception
+     */
 }
 
-static inline void gen_op_evstwwo(DisasContext *ctx, TCGv addr)
+/* Embedded.Processor Control */
+
+static void gen_msgclr(DisasContext *ctx)
 {
-    gen_qemu_st32(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+    gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-#define GEN_SPEOP_LDST(name, opc2, sh)                                        \
-static void glue(gen_, name)(DisasContext *ctx)                                       \
-{                                                                             \
-    TCGv t0;                                                                  \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    gen_set_access_type(ctx, ACCESS_INT);                                     \
-    t0 = tcg_temp_new();                                                      \
-    if (Rc(ctx->opcode)) {                                                    \
-        gen_addr_spe_imm_index(ctx, t0, sh);                                  \
-    } else {                                                                  \
-        gen_addr_reg_index(ctx, t0);                                          \
-    }                                                                         \
-    gen_op_##name(ctx, t0);                                                   \
-    tcg_temp_free(t0);                                                        \
+static void gen_msgsnd(DisasContext *ctx)
+{
+#if defined(CONFIG_USER_ONLY)
+    GEN_PRIV;
+#else
+    CHK_SV;
+    gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]);
+#endif /* defined(CONFIG_USER_ONLY) */
 }
 
-GEN_SPEOP_LDST(evldd, 0x00, 3);
-GEN_SPEOP_LDST(evldw, 0x01, 3);
-GEN_SPEOP_LDST(evldh, 0x02, 3);
-GEN_SPEOP_LDST(evlhhesplat, 0x04, 1);
-GEN_SPEOP_LDST(evlhhousplat, 0x06, 1);
-GEN_SPEOP_LDST(evlhhossplat, 0x07, 1);
-GEN_SPEOP_LDST(evlwhe, 0x08, 2);
-GEN_SPEOP_LDST(evlwhou, 0x0A, 2);
-GEN_SPEOP_LDST(evlwhos, 0x0B, 2);
-GEN_SPEOP_LDST(evlwwsplat, 0x0C, 2);
-GEN_SPEOP_LDST(evlwhsplat, 0x0E, 2);
-
-GEN_SPEOP_LDST(evstdd, 0x10, 3);
-GEN_SPEOP_LDST(evstdw, 0x11, 3);
-GEN_SPEOP_LDST(evstdh, 0x12, 3);
-GEN_SPEOP_LDST(evstwhe, 0x18, 2);
-GEN_SPEOP_LDST(evstwho, 0x1A, 2);
-GEN_SPEOP_LDST(evstwwe, 0x1C, 2);
-GEN_SPEOP_LDST(evstwwo, 0x1E, 2);
-
-/* Multiply and add - TODO */
-#if 0
-GEN_SPE(speundef,       evmhessf,      0x01, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);//
-GEN_SPE(speundef,       evmhossf,      0x03, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmheumi,       evmhesmi,      0x04, 0x10, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhesmf,      0x05, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhoumi,       evmhosmi,      0x06, 0x10, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhosmf,      0x07, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhessfa,     0x11, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhossfa,     0x13, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmheumia,      evmhesmia,     0x14, 0x10, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhesmfa,     0x15, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhoumia,      evmhosmia,     0x16, 0x10, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhosmfa,     0x17, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-
-GEN_SPE(speundef,       evmwhssf,      0x03, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmwlumi,       speundef,      0x04, 0x11, 0x00000000, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(evmwhumi,       evmwhsmi,      0x06, 0x11, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwhsmf,      0x07, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwssf,       0x09, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwsmf,       0x0D, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwhssfa,     0x13, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmwlumia,      speundef,      0x14, 0x11, 0x00000000, 0xFFFFFFFF, PPC_SPE);
-GEN_SPE(evmwhumia,      evmwhsmia,     0x16, 0x11, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwhsmfa,     0x17, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwssfa,      0x19, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwsmfa,      0x1D, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-
-GEN_SPE(evadduiaaw,     evaddsiaaw,    0x00, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
-GEN_SPE(evsubfusiaaw,   evsubfssiaaw,  0x01, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
-GEN_SPE(evaddumiaaw,    evaddsmiaaw,   0x04, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
-GEN_SPE(evsubfumiaaw,   evsubfsmiaaw,  0x05, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
-GEN_SPE(evdivws,        evdivwu,       0x06, 0x13, 0x00000000, 0x00000000, PPC_SPE);
-
-GEN_SPE(evmheusiaaw,    evmhessiaaw,   0x00, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhessfaaw,   0x01, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhousiaaw,    evmhossiaaw,   0x02, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhossfaaw,   0x03, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmheumiaaw,    evmhesmiaaw,   0x04, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhesmfaaw,   0x05, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhoumiaaw,    evmhosmiaaw,   0x06, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhosmfaaw,   0x07, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhegumiaa,    evmhegsmiaa,   0x14, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhegsmfaa,   0x15, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhogumiaa,    evmhogsmiaa,   0x16, 0x14, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhogsmfaa,   0x17, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-
-GEN_SPE(evmwlusiaaw,    evmwlssiaaw,   0x00, 0x15, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(evmwlumiaaw,    evmwlsmiaaw,   0x04, 0x15, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwssfaa,     0x09, 0x15, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwsmfaa,     0x0D, 0x15, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-
-GEN_SPE(evmheusianw,    evmhessianw,   0x00, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhessfanw,   0x01, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhousianw,    evmhossianw,   0x02, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhossfanw,   0x03, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmheumianw,    evmhesmianw,   0x04, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhesmfanw,   0x05, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhoumianw,    evmhosmianw,   0x06, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhosmfanw,   0x07, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhegumian,    evmhegsmian,   0x14, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhegsmfan,   0x15, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmhigumian,    evmhigsmian,   0x16, 0x16, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmhogsmfan,   0x17, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-
-GEN_SPE(evmwlusianw,    evmwlssianw,   0x00, 0x17, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(evmwlumianw,    evmwlsmianw,   0x04, 0x17, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwssfan,     0x09, 0x17, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-GEN_SPE(evmwumian,      evmwsmian,     0x0C, 0x17, 0x00000000, 0x00000000, PPC_SPE);
-GEN_SPE(speundef,       evmwsmfan,     0x0D, 0x17, 0xFFFFFFFF, 0x00000000, PPC_SPE);
-#endif
 
-/***                      SPE floating-point extension                     ***/
-#define GEN_SPEFPUOP_CONV_32_32(name)                                         \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0 = tcg_temp_new_i32();                                         \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(t0, cpu_env, t0);                                       \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-    tcg_temp_free_i32(t0);                                                    \
-}
-#define GEN_SPEFPUOP_CONV_32_64(name)                                         \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
-    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
-    gen_load_gpr64(t0, rB(ctx->opcode));                                      \
-    gen_helper_##name(t1, cpu_env, t0);                                       \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);                        \
-    tcg_temp_free_i64(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
-}
-#define GEN_SPEFPUOP_CONV_64_32(name)                                         \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
-    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(t0, cpu_env, t1);                                       \
-    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
-    tcg_temp_free_i64(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
-}
-#define GEN_SPEFPUOP_CONV_64_64(name)                                         \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
-    gen_load_gpr64(t0, rB(ctx->opcode));                                      \
-    gen_helper_##name(t0, cpu_env, t0);                                       \
-    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
-    tcg_temp_free_i64(t0);                                                    \
-}
-#define GEN_SPEFPUOP_ARITH2_32_32(name)                                       \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-    t1 = tcg_temp_new_i32();                                                  \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(t0, cpu_env, t0, t1);                                   \
-    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
-}
-#define GEN_SPEFPUOP_ARITH2_64_64(name)                                       \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i64 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64();                                                  \
-    t1 = tcg_temp_new_i64();                                                  \
-    gen_load_gpr64(t0, rA(ctx->opcode));                                      \
-    gen_load_gpr64(t1, rB(ctx->opcode));                                      \
-    gen_helper_##name(t0, cpu_env, t0, t1);                                   \
-    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
-    tcg_temp_free_i64(t0);                                                    \
-    tcg_temp_free_i64(t1);                                                    \
-}
-#define GEN_SPEFPUOP_COMP_32(name)                                            \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i32 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i32();                                                  \
-    t1 = tcg_temp_new_i32();                                                  \
-                                                                              \
-    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
-    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
-                                                                              \
-    tcg_temp_free_i32(t0);                                                    \
-    tcg_temp_free_i32(t1);                                                    \
-}
-#define GEN_SPEFPUOP_COMP_64(name)                                            \
-static inline void gen_##name(DisasContext *ctx)                              \
-{                                                                             \
-    TCGv_i64 t0, t1;                                                          \
-    if (unlikely(!ctx->spe_enabled)) {                                        \
-        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
-        return;                                                               \
-    }                                                                         \
-    t0 = tcg_temp_new_i64();                                                  \
-    t1 = tcg_temp_new_i64();                                                  \
-    gen_load_gpr64(t0, rA(ctx->opcode));                                      \
-    gen_load_gpr64(t1, rB(ctx->opcode));                                      \
-    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
-    tcg_temp_free_i64(t0);                                                    \
-    tcg_temp_free_i64(t1);                                                    \
-}
-
-/* Single precision floating-point vectors operations */
-/* Arithmetic */
-GEN_SPEFPUOP_ARITH2_64_64(evfsadd);
-GEN_SPEFPUOP_ARITH2_64_64(evfssub);
-GEN_SPEFPUOP_ARITH2_64_64(evfsmul);
-GEN_SPEFPUOP_ARITH2_64_64(evfsdiv);
-static inline void gen_evfsabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    ~0x80000000);
-    tcg_gen_andi_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                    ~0x80000000);
-}
-static inline void gen_evfsnabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                   0x80000000);
-    tcg_gen_ori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                   0x80000000);
-}
-static inline void gen_evfsneg(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                    0x80000000);
-    tcg_gen_xori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                    0x80000000);
-}
-
-/* Conversion */
-GEN_SPEFPUOP_CONV_64_64(evfscfui);
-GEN_SPEFPUOP_CONV_64_64(evfscfsi);
-GEN_SPEFPUOP_CONV_64_64(evfscfuf);
-GEN_SPEFPUOP_CONV_64_64(evfscfsf);
-GEN_SPEFPUOP_CONV_64_64(evfsctui);
-GEN_SPEFPUOP_CONV_64_64(evfsctsi);
-GEN_SPEFPUOP_CONV_64_64(evfsctuf);
-GEN_SPEFPUOP_CONV_64_64(evfsctsf);
-GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
-GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
-
-/* Comparison */
-GEN_SPEFPUOP_COMP_64(evfscmpgt);
-GEN_SPEFPUOP_COMP_64(evfscmplt);
-GEN_SPEFPUOP_COMP_64(evfscmpeq);
-GEN_SPEFPUOP_COMP_64(evfststgt);
-GEN_SPEFPUOP_COMP_64(evfststlt);
-GEN_SPEFPUOP_COMP_64(evfststeq);
-
-/* Opcodes definitions */
-GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
-GEN_SPE(evfsabs,   evfsnabs,  0x02, 0x0A, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE); //
-GEN_SPE(evfsneg,   speundef,  0x03, 0x0A, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(evfsmul,   evfsdiv,   0x04, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
-GEN_SPE(evfscmpgt, evfscmplt, 0x06, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
-GEN_SPE(evfscmpeq, speundef,  0x07, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(evfscfui,  evfscfsi,  0x08, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(evfscfuf,  evfscfsf,  0x09, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(evfsctui,  evfsctsi,  0x0A, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(evfsctuf,  evfsctsf,  0x0B, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(evfsctuiz, speundef,  0x0C, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(evfsctsiz, speundef,  0x0D, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(evfststgt, evfststlt, 0x0E, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
-GEN_SPE(evfststeq, speundef,  0x0F, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-
-/* Single precision floating-point operations */
-/* Arithmetic */
-GEN_SPEFPUOP_ARITH2_32_32(efsadd);
-GEN_SPEFPUOP_ARITH2_32_32(efssub);
-GEN_SPEFPUOP_ARITH2_32_32(efsmul);
-GEN_SPEFPUOP_ARITH2_32_32(efsdiv);
-static inline void gen_efsabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], (target_long)~0x80000000LL);
-}
-static inline void gen_efsnabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000);
-}
-static inline void gen_efsneg(DisasContext *ctx)
+#if defined(TARGET_PPC64)
+static void gen_maddld(DisasContext *ctx)
 {
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000);
-}
-
-/* Conversion */
-GEN_SPEFPUOP_CONV_32_32(efscfui);
-GEN_SPEFPUOP_CONV_32_32(efscfsi);
-GEN_SPEFPUOP_CONV_32_32(efscfuf);
-GEN_SPEFPUOP_CONV_32_32(efscfsf);
-GEN_SPEFPUOP_CONV_32_32(efsctui);
-GEN_SPEFPUOP_CONV_32_32(efsctsi);
-GEN_SPEFPUOP_CONV_32_32(efsctuf);
-GEN_SPEFPUOP_CONV_32_32(efsctsf);
-GEN_SPEFPUOP_CONV_32_32(efsctuiz);
-GEN_SPEFPUOP_CONV_32_32(efsctsiz);
-GEN_SPEFPUOP_CONV_32_64(efscfd);
-
-/* Comparison */
-GEN_SPEFPUOP_COMP_32(efscmpgt);
-GEN_SPEFPUOP_COMP_32(efscmplt);
-GEN_SPEFPUOP_COMP_32(efscmpeq);
-GEN_SPEFPUOP_COMP_32(efststgt);
-GEN_SPEFPUOP_COMP_32(efststlt);
-GEN_SPEFPUOP_COMP_32(efststeq);
-
-/* Opcodes definitions */
-GEN_SPE(efsadd,   efssub,   0x00, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
-GEN_SPE(efsabs,   efsnabs,  0x02, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE); //
-GEN_SPE(efsneg,   speundef, 0x03, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(efsmul,   efsdiv,   0x04, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
-GEN_SPE(efscmpgt, efscmplt, 0x06, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
-GEN_SPE(efscmpeq, efscfd,   0x07, 0x0B, 0x00600000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(efscfui,  efscfsi,  0x08, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(efscfuf,  efscfsf,  0x09, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(efsctui,  efsctsi,  0x0A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(efsctuf,  efsctsf,  0x0B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
-GEN_SPE(efsctuiz, speundef, 0x0C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(efsctsiz, speundef, 0x0D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-GEN_SPE(efststgt, efststlt, 0x0E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
-GEN_SPE(efststeq, speundef, 0x0F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
-
-/* Double precision floating-point operations */
-/* Arithmetic */
-GEN_SPEFPUOP_ARITH2_64_64(efdadd);
-GEN_SPEFPUOP_ARITH2_64_64(efdsub);
-GEN_SPEFPUOP_ARITH2_64_64(efdmul);
-GEN_SPEFPUOP_ARITH2_64_64(efddiv);
-static inline void gen_efdabs(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_andi_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                    ~0x80000000);
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]);
+    tcg_temp_free_i64(t1);
 }
-static inline void gen_efdnabs(DisasContext *ctx)
+
+/* maddhd maddhdu */
+static void gen_maddhd_maddhdu(DisasContext *ctx)
 {
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_ori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                   0x80000000);
+    TCGv_i64 lo = tcg_temp_new_i64();
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    if (Rc(ctx->opcode)) {
+        tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
+                          cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_movi_i64(t1, 0);
+    } else {
+        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
+                          cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63);
+    }
+    tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi,
+                     cpu_gpr[rC(ctx->opcode)], t1);
+    tcg_temp_free_i64(lo);
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(t1);
 }
-static inline void gen_efdneg(DisasContext *ctx)
-{
-    if (unlikely(!ctx->spe_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_SPEU);
-        return;
-    }
-    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
-    tcg_gen_xori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
-                    0x80000000);
-}
-
-/* Conversion */
-GEN_SPEFPUOP_CONV_64_32(efdcfui);
-GEN_SPEFPUOP_CONV_64_32(efdcfsi);
-GEN_SPEFPUOP_CONV_64_32(efdcfuf);
-GEN_SPEFPUOP_CONV_64_32(efdcfsf);
-GEN_SPEFPUOP_CONV_32_64(efdctui);
-GEN_SPEFPUOP_CONV_32_64(efdctsi);
-GEN_SPEFPUOP_CONV_32_64(efdctuf);
-GEN_SPEFPUOP_CONV_32_64(efdctsf);
-GEN_SPEFPUOP_CONV_32_64(efdctuiz);
-GEN_SPEFPUOP_CONV_32_64(efdctsiz);
-GEN_SPEFPUOP_CONV_64_32(efdcfs);
-GEN_SPEFPUOP_CONV_64_64(efdcfuid);
-GEN_SPEFPUOP_CONV_64_64(efdcfsid);
-GEN_SPEFPUOP_CONV_64_64(efdctuidz);
-GEN_SPEFPUOP_CONV_64_64(efdctsidz);
-
-/* Comparison */
-GEN_SPEFPUOP_COMP_64(efdcmpgt);
-GEN_SPEFPUOP_COMP_64(efdcmplt);
-GEN_SPEFPUOP_COMP_64(efdcmpeq);
-GEN_SPEFPUOP_COMP_64(efdtstgt);
-GEN_SPEFPUOP_COMP_64(efdtstlt);
-GEN_SPEFPUOP_COMP_64(efdtsteq);
-
-/* Opcodes definitions */
-GEN_SPE(efdadd,    efdsub,    0x10, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdcfuid,  efdcfsid,  0x11, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdabs,    efdnabs,   0x12, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_DOUBLE); //
-GEN_SPE(efdneg,    speundef,  0x13, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
-GEN_SPE(efdmul,    efddiv,    0x14, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdctuidz, efdctsidz, 0x15, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdcmpgt,  efdcmplt,  0x16, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdcmpeq,  efdcfs,    0x17, 0x0B, 0x00600000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdcfui,   efdcfsi,   0x18, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdcfuf,   efdcfsf,   0x19, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdctui,   efdctsi,   0x1A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdctuf,   efdctsf,   0x1B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdctuiz,  speundef,  0x1C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
-GEN_SPE(efdctsiz,  speundef,  0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
-GEN_SPE(efdtstgt,  efdtstlt,  0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); //
-GEN_SPE(efdtsteq,  speundef,  0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
+#endif /* defined(TARGET_PPC64) */
 
 static void gen_tbegin(DisasContext *ctx)
 {
@@ -9865,18 +6194,33 @@ static inline void gen_##name(DisasContext *ctx)               \
 GEN_TM_PRIV_NOOP(treclaim);
 GEN_TM_PRIV_NOOP(trechkpt);
 
+#include "translate/fp-impl.inc.c"
+
+#include "translate/vmx-impl.inc.c"
+
+#include "translate/vsx-impl.inc.c"
+
+#include "translate/dfp-impl.inc.c"
+
+#include "translate/spe-impl.inc.c"
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x00400000, PPC_INTEGER),
 GEN_HANDLER(cmpi, 0x0B, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
-GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400000, PPC_INTEGER),
+GEN_HANDLER(cmpl, 0x1F, 0x00, 0x01, 0x00400001, PPC_INTEGER),
 GEN_HANDLER(cmpli, 0x0A, 0xFF, 0xFF, 0x00400000, PPC_INTEGER),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300),
+#endif
 GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL),
 GEN_HANDLER(addi, 0x0E, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addic, 0x0C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(addic_, "addic.", 0x0D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(addis, 0x0F, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
+GEN_HANDLER_E(addpcis, 0x13, 0x2, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER),
 GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER),
@@ -9891,6 +6235,7 @@ GEN_HANDLER(subfic, 0x08, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER),
+GEN_HANDLER_E(cnttzw, 0x1F, 0x1A, 0x10, 0x00000000, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(or, 0x1F, 0x1C, 0x0D, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(xor, 0x1F, 0x1C, 0x09, 0x00000000, PPC_INTEGER),
 GEN_HANDLER(ori, 0x18, 0xFF, 0xFF, 0x00000000, PPC_INTEGER),
@@ -9903,6 +6248,8 @@ GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD),
 GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B),
+GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(darn, 0x1F, 0x13, 0x17, 0x001CF801, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205),
 GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206),
 #endif
@@ -9919,25 +6266,11 @@ GEN_HANDLER(srad, 0x1F, 0x1A, 0x18, 0x00000000, PPC_64B),
 GEN_HANDLER2(sradi0, "sradi", 0x1F, 0x1A, 0x19, 0x00000000, PPC_64B),
 GEN_HANDLER2(sradi1, "sradi", 0x1F, 0x1B, 0x19, 0x00000000, PPC_64B),
 GEN_HANDLER(srd, 0x1F, 0x1B, 0x10, 0x00000000, PPC_64B),
+GEN_HANDLER2_E(extswsli0, "extswsli", 0x1F, 0x1A, 0x1B, 0x00000000,
+               PPC_NONE, PPC2_ISA300),
+GEN_HANDLER2_E(extswsli1, "extswsli", 0x1F, 0x1B, 0x1B, 0x00000000,
+               PPC_NONE, PPC2_ISA300),
 #endif
-GEN_HANDLER(frsqrtes, 0x3B, 0x1A, 0xFF, 0x001F07C0, PPC_FLOAT_FRSQRTES),
-GEN_HANDLER(fsqrt, 0x3F, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
-GEN_HANDLER(fsqrts, 0x3B, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
-GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT),
-GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT),
-GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT),
-GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT),
-GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT),
-GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT),
-GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
-GEN_HANDLER(mffs, 0x3F, 0x07, 0x12, 0x001FF800, PPC_FLOAT),
-GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
-GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
-GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x00000000, PPC_FLOAT),
-GEN_HANDLER(mtfsfi, 0x3F, 0x06, 0x04, 0x006e0800, PPC_FLOAT),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(ld, 0x3A, 0xFF, 0xFF, 0x00000000, PPC_64B),
 GEN_HANDLER(lq, 0x38, 0xFF, 0xFF, 0x00000000, PPC_64BX),
@@ -9969,7 +6302,7 @@ GEN_HANDLER(b, 0x12, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bc, 0x10, 0xFF, 0xFF, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bcctr, 0x13, 0x10, 0x10, 0x00000000, PPC_FLOW),
 GEN_HANDLER(bclr, 0x13, 0x10, 0x00, 0x00000000, PPC_FLOW),
-GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0, PPC_NONE, PPC2_BCTAR_ISA207),
+GEN_HANDLER_E(bctar, 0x13, 0x10, 0x11, 0x0000E000, PPC_NONE, PPC2_BCTAR_ISA207),
 GEN_HANDLER(mcrf, 0x13, 0x00, 0xFF, 0x00000001, PPC_INTEGER),
 GEN_HANDLER(rfi, 0x13, 0x12, 0x01, 0x03FF8001, PPC_FLOW),
 #if defined(TARGET_PPC64)
@@ -9995,6 +6328,7 @@ GEN_HANDLER(mftb, 0x1F, 0x13, 0x0B, 0x00000001, PPC_MFTB),
 GEN_HANDLER(mtcrf, 0x1F, 0x10, 0x04, 0x00000801, PPC_MISC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(mtmsrd, 0x1F, 0x12, 0x05, 0x001EF801, PPC_64B),
+GEN_HANDLER_E(setb, 0x1F, 0x00, 0x04, 0x0003F801, PPC_NONE, PPC2_ISA300),
 #endif
 GEN_HANDLER(mtmsr, 0x1F, 0x12, 0x04, 0x001EF801, PPC_MISC),
 GEN_HANDLER(mtspr, 0x1F, 0x13, 0x0E, 0x00000000, PPC_MISC),
@@ -10143,10 +6477,11 @@ GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
-GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
-GEN_HANDLER2(evsel1, "evsel", 0x04, 0x1d, 0x09, 0x00000000, PPC_SPE),
-GEN_HANDLER2(evsel2, "evsel", 0x04, 0x1e, 0x09, 0x00000000, PPC_SPE),
-GEN_HANDLER2(evsel3, "evsel", 0x04, 0x1f, 0x09, 0x00000000, PPC_SPE),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE,
+              PPC2_ISA300),
+GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
+#endif
 
 #undef GEN_INT_ARITH_ADD
 #undef GEN_INT_ARITH_ADD_CONST
@@ -10177,6 +6512,8 @@ GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD
@@ -10191,6 +6528,8 @@ GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(modsd, 0x1F, 0x09, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(modud, 0x1F, 0x09, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
 #undef GEN_INT_ARITH_MUL_HELPER
 #define GEN_INT_ARITH_MUL_HELPER(name, opc3)                                  \
@@ -10259,66 +6598,6 @@ GEN_PPC64_R2(rldcr, 0x1E, 0x09),
 GEN_PPC64_R4(rldimi, 0x1E, 0x06),
 #endif
 
-#undef _GEN_FLOAT_ACB
-#undef GEN_FLOAT_ACB
-#undef _GEN_FLOAT_AB
-#undef GEN_FLOAT_AB
-#undef _GEN_FLOAT_AC
-#undef GEN_FLOAT_AC
-#undef GEN_FLOAT_B
-#undef GEN_FLOAT_BS
-#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
-GEN_HANDLER(f##name, op1, op2, 0xFF, 0x00000000, type)
-#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
-_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type),                     \
-_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type)
-#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type)
-#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type),               \
-_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type)
-#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
-GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type)
-#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
-_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type),               \
-_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type)
-#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                           \
-GEN_HANDLER(f##name, 0x3F, op2, op3, 0x001F0000, type)
-#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
-GEN_HANDLER(f##name, op1, op2, 0xFF, 0x001F07C0, type)
-
-GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT),
-GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT),
-GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT),
-GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT),
-GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES),
-GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE),
-_GEN_FLOAT_ACB(sel, sel, 0x3F, 0x17, 0, 0, PPC_FLOAT_FSEL),
-GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT),
-GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT),
-GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT),
-GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT),
-GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT),
-GEN_HANDLER_E(ftdiv, 0x3F, 0x00, 0x04, 1, PPC_NONE, PPC2_FP_TST_ISA206),
-GEN_HANDLER_E(ftsqrt, 0x3F, 0x00, 0x05, 1, PPC_NONE, PPC2_FP_TST_ISA206),
-GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT),
-GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
-GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
-GEN_HANDLER_E(fcfid, 0x3F, 0x0E, 0x1A, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
-GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
-GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_HANDLER_E(fctidz, 0x3F, 0x0F, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
-GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT),
-GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT),
-GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT),
-GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT),
-
 #undef GEN_LD
 #undef GEN_LDU
 #undef GEN_LDUX
@@ -10345,12 +6624,12 @@ GEN_LDS(lwz, ld32u, 0x00, PPC_INTEGER)
 #if defined(TARGET_PPC64)
 GEN_LDUX(lwa, ld32s, 0x15, 0x0B, PPC_64B)
 GEN_LDX(lwa, ld32s, 0x15, 0x0A, PPC_64B)
-GEN_LDUX(ld, ld64, 0x15, 0x01, PPC_64B)
-GEN_LDX(ld, ld64, 0x15, 0x00, PPC_64B)
-GEN_LDX_E(ldbr, ld64ur, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE)
+GEN_LDUX(ld, ld64_i64, 0x15, 0x01, PPC_64B)
+GEN_LDX(ld, ld64_i64, 0x15, 0x00, PPC_64B)
+GEN_LDX_E(ldbr, ld64ur_i64, 0x14, 0x10, PPC_NONE, PPC2_DBRX, CHK_NONE)
 
 /* HV/P7 and later only */
-GEN_LDX_HVRM(ldcix, ld64, 0x15, 0x1b, PPC_CILDST)
+GEN_LDX_HVRM(ldcix, ld64_i64, 0x15, 0x1b, PPC_CILDST)
 GEN_LDX_HVRM(lwzcix, ld32u, 0x15, 0x18, PPC_CILDST)
 GEN_LDX_HVRM(lhzcix, ld16u, 0x15, 0x19, PPC_CILDST)
 GEN_LDX_HVRM(lbzcix, ld8u, 0x15, 0x1a, PPC_CILDST)
@@ -10381,10 +6660,10 @@ GEN_STS(stb, st8, 0x06, PPC_INTEGER)
 GEN_STS(sth, st16, 0x0C, PPC_INTEGER)
 GEN_STS(stw, st32, 0x04, PPC_INTEGER)
 #if defined(TARGET_PPC64)
-GEN_STUX(std, st64, 0x15, 0x05, PPC_64B)
-GEN_STX(std, st64, 0x15, 0x04, PPC_64B)
-GEN_STX_E(stdbr, st64r, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE)
-GEN_STX_HVRM(stdcix, st64, 0x15, 0x1f, PPC_CILDST)
+GEN_STUX(std, st64_i64, 0x15, 0x05, PPC_64B)
+GEN_STX(std, st64_i64, 0x15, 0x04, PPC_64B)
+GEN_STX_E(stdbr, st64r_i64, 0x14, 0x14, PPC_NONE, PPC2_DBRX, CHK_NONE)
+GEN_STX_HVRM(stdcix, st64_i64, 0x15, 0x1f, PPC_CILDST)
 GEN_STX_HVRM(stwcix, st32, 0x15, 0x1c, PPC_CILDST)
 GEN_STX_HVRM(sthcix, st16, 0x15, 0x1d, PPC_CILDST)
 GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
@@ -10392,57 +6671,6 @@ GEN_STX_HVRM(stbcix, st8, 0x15, 0x1e, PPC_CILDST)
 GEN_STX(sthbr, st16r, 0x16, 0x1C, PPC_INTEGER)
 GEN_STX(stwbr, st32r, 0x16, 0x14, PPC_INTEGER)
 
-#undef GEN_LDF
-#undef GEN_LDUF
-#undef GEN_LDUXF
-#undef GEN_LDXF
-#undef GEN_LDFS
-#define GEN_LDF(name, ldop, opc, type)                                        \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDUF(name, ldop, opc, type)                                       \
-GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_LDUXF(name, ldop, opc, type)                                      \
-GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
-#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
-GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_LDFS(name, ldop, op, type)                                        \
-GEN_LDF(name, ldop, op | 0x20, type)                                          \
-GEN_LDUF(name, ldop, op | 0x21, type)                                         \
-GEN_LDUXF(name, ldop, op | 0x01, type)                                        \
-GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
-
-GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT)
-GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT)
-GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206),
-GEN_HANDLER_E(lfdp, 0x39, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x00200001, PPC_NONE, PPC2_ISA205),
-
-#undef GEN_STF
-#undef GEN_STUF
-#undef GEN_STUXF
-#undef GEN_STXF
-#undef GEN_STFS
-#define GEN_STF(name, stop, opc, type)                                        \
-GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STUF(name, stop, opc, type)                                       \
-GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
-#define GEN_STUXF(name, stop, opc, type)                                      \
-GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
-#define GEN_STXF(name, stop, opc2, opc3, type)                                \
-GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
-#define GEN_STFS(name, stop, op, type)                                        \
-GEN_STF(name, stop, op | 0x20, type)                                          \
-GEN_STUF(name, stop, op | 0x21, type)                                         \
-GEN_STUXF(name, stop, op | 0x01, type)                                        \
-GEN_STXF(name, stop, 0x17, op | 0x00, type)
-
-GEN_STFS(stfd, st64, 0x16, PPC_FLOAT)
-GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT)
-GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX)
-GEN_HANDLER_E(stfdp, 0x3D, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205),
-GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205),
-
 #undef GEN_CRLOGIC
 #define GEN_CRLOGIC(name, tcg_op, opc)                                        \
 GEN_HANDLER(name, 0x13, 0x01, opc, 0x00000001, PPC_INTEGER)
@@ -10501,807 +6729,6 @@ GEN_MAC_HANDLER(mulhhwu, 0x08, 0x00),
 GEN_MAC_HANDLER(mullhw, 0x08, 0x0D),
 GEN_MAC_HANDLER(mullhwu, 0x08, 0x0C),
 
-#undef GEN_VR_LDX
-#undef GEN_VR_STX
-#undef GEN_VR_LVE
-#undef GEN_VR_STVE
-#define GEN_VR_LDX(name, opc2, opc3)                                          \
-GEN_HANDLER(name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
-#define GEN_VR_STX(name, opc2, opc3)                                          \
-GEN_HANDLER(st##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
-#define GEN_VR_LVE(name, opc2, opc3)                                    \
-    GEN_HANDLER(lve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
-#define GEN_VR_STVE(name, opc2, opc3)                                   \
-    GEN_HANDLER(stve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
-GEN_VR_LDX(lvx, 0x07, 0x03),
-GEN_VR_LDX(lvxl, 0x07, 0x0B),
-GEN_VR_LVE(bx, 0x07, 0x00),
-GEN_VR_LVE(hx, 0x07, 0x01),
-GEN_VR_LVE(wx, 0x07, 0x02),
-GEN_VR_STX(svx, 0x07, 0x07),
-GEN_VR_STX(svxl, 0x07, 0x0F),
-GEN_VR_STVE(bx, 0x07, 0x04),
-GEN_VR_STVE(hx, 0x07, 0x05),
-GEN_VR_STVE(wx, 0x07, 0x06),
-
-#undef GEN_VX_LOGICAL
-#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)                        \
-GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-
-#undef GEN_VX_LOGICAL_207
-#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \
-GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
-
-GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16),
-GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17),
-GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18),
-GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19),
-GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20),
-GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26),
-GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22),
-GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21),
-
-#undef GEN_VXFORM
-#define GEN_VXFORM(name, opc2, opc3)                                    \
-GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-
-#undef GEN_VXFORM_207
-#define GEN_VXFORM_207(name, opc2, opc3) \
-GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
-
-#undef GEN_VXFORM_DUAL
-#define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
-GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
-
-#undef GEN_VXRFORM_DUAL
-#define GEN_VXRFORM_DUAL(name0, name1, opc2, opc3, tp0, tp1) \
-GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, tp0, tp1), \
-GEN_HANDLER_E(name0##_##name1, 0x4, opc2, (opc3 | 0x10), 0x00000000, tp0, tp1),
-
-GEN_VXFORM(vaddubm, 0, 0),
-GEN_VXFORM(vadduhm, 0, 1),
-GEN_VXFORM(vadduwm, 0, 2),
-GEN_VXFORM_207(vaddudm, 0, 3),
-GEN_VXFORM_DUAL(vsububm, bcdadd, 0, 16, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_DUAL(vsubuhm, bcdsub, 0, 17, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vsubuwm, 0, 18),
-GEN_VXFORM_207(vsubudm, 0, 19),
-GEN_VXFORM(vmaxub, 1, 0),
-GEN_VXFORM(vmaxuh, 1, 1),
-GEN_VXFORM(vmaxuw, 1, 2),
-GEN_VXFORM_207(vmaxud, 1, 3),
-GEN_VXFORM(vmaxsb, 1, 4),
-GEN_VXFORM(vmaxsh, 1, 5),
-GEN_VXFORM(vmaxsw, 1, 6),
-GEN_VXFORM_207(vmaxsd, 1, 7),
-GEN_VXFORM(vminub, 1, 8),
-GEN_VXFORM(vminuh, 1, 9),
-GEN_VXFORM(vminuw, 1, 10),
-GEN_VXFORM_207(vminud, 1, 11),
-GEN_VXFORM(vminsb, 1, 12),
-GEN_VXFORM(vminsh, 1, 13),
-GEN_VXFORM(vminsw, 1, 14),
-GEN_VXFORM_207(vminsd, 1, 15),
-GEN_VXFORM(vavgub, 1, 16),
-GEN_VXFORM(vavguh, 1, 17),
-GEN_VXFORM(vavguw, 1, 18),
-GEN_VXFORM(vavgsb, 1, 20),
-GEN_VXFORM(vavgsh, 1, 21),
-GEN_VXFORM(vavgsw, 1, 22),
-GEN_VXFORM(vmrghb, 6, 0),
-GEN_VXFORM(vmrghh, 6, 1),
-GEN_VXFORM(vmrghw, 6, 2),
-GEN_VXFORM(vmrglb, 6, 4),
-GEN_VXFORM(vmrglh, 6, 5),
-GEN_VXFORM(vmrglw, 6, 6),
-GEN_VXFORM_207(vmrgew, 6, 30),
-GEN_VXFORM_207(vmrgow, 6, 26),
-GEN_VXFORM(vmuloub, 4, 0),
-GEN_VXFORM(vmulouh, 4, 1),
-GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vmulosb, 4, 4),
-GEN_VXFORM(vmulosh, 4, 5),
-GEN_VXFORM_207(vmulosw, 4, 6),
-GEN_VXFORM(vmuleub, 4, 8),
-GEN_VXFORM(vmuleuh, 4, 9),
-GEN_VXFORM_207(vmuleuw, 4, 10),
-GEN_VXFORM(vmulesb, 4, 12),
-GEN_VXFORM(vmulesh, 4, 13),
-GEN_VXFORM_207(vmulesw, 4, 14),
-GEN_VXFORM(vslb, 2, 4),
-GEN_VXFORM(vslh, 2, 5),
-GEN_VXFORM(vslw, 2, 6),
-GEN_VXFORM_207(vsld, 2, 23),
-GEN_VXFORM(vsrb, 2, 8),
-GEN_VXFORM(vsrh, 2, 9),
-GEN_VXFORM(vsrw, 2, 10),
-GEN_VXFORM_207(vsrd, 2, 27),
-GEN_VXFORM(vsrab, 2, 12),
-GEN_VXFORM(vsrah, 2, 13),
-GEN_VXFORM(vsraw, 2, 14),
-GEN_VXFORM_207(vsrad, 2, 15),
-GEN_VXFORM(vslo, 6, 16),
-GEN_VXFORM(vsro, 6, 17),
-GEN_VXFORM(vaddcuw, 0, 6),
-GEN_VXFORM(vsubcuw, 0, 22),
-GEN_VXFORM(vaddubs, 0, 8),
-GEN_VXFORM(vadduhs, 0, 9),
-GEN_VXFORM(vadduws, 0, 10),
-GEN_VXFORM(vaddsbs, 0, 12),
-GEN_VXFORM(vaddshs, 0, 13),
-GEN_VXFORM(vaddsws, 0, 14),
-GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE),
-GEN_VXFORM(vsubuws, 0, 26),
-GEN_VXFORM(vsubsbs, 0, 28),
-GEN_VXFORM(vsubshs, 0, 29),
-GEN_VXFORM(vsubsws, 0, 30),
-GEN_VXFORM_207(vadduqm, 0, 4),
-GEN_VXFORM_207(vaddcuq, 0, 5),
-GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_207(vsubuqm, 0, 20),
-GEN_VXFORM_207(vsubcuq, 0, 21),
-GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM(vrlb, 2, 0),
-GEN_VXFORM(vrlh, 2, 1),
-GEN_VXFORM(vrlw, 2, 2),
-GEN_VXFORM_207(vrld, 2, 3),
-GEN_VXFORM(vsl, 2, 7),
-GEN_VXFORM(vsr, 2, 11),
-GEN_VXFORM(vpkuhum, 7, 0),
-GEN_VXFORM(vpkuwum, 7, 1),
-GEN_VXFORM_207(vpkudum, 7, 17),
-GEN_VXFORM(vpkuhus, 7, 2),
-GEN_VXFORM(vpkuwus, 7, 3),
-GEN_VXFORM_207(vpkudus, 7, 19),
-GEN_VXFORM(vpkshus, 7, 4),
-GEN_VXFORM(vpkswus, 7, 5),
-GEN_VXFORM_207(vpksdus, 7, 21),
-GEN_VXFORM(vpkshss, 7, 6),
-GEN_VXFORM(vpkswss, 7, 7),
-GEN_VXFORM_207(vpksdss, 7, 23),
-GEN_VXFORM(vpkpx, 7, 12),
-GEN_VXFORM(vsum4ubs, 4, 24),
-GEN_VXFORM(vsum4sbs, 4, 28),
-GEN_VXFORM(vsum4shs, 4, 25),
-GEN_VXFORM(vsum2sws, 4, 26),
-GEN_VXFORM(vsumsws, 4, 30),
-GEN_VXFORM(vaddfp, 5, 0),
-GEN_VXFORM(vsubfp, 5, 1),
-GEN_VXFORM(vmaxfp, 5, 16),
-GEN_VXFORM(vminfp, 5, 17),
-
-#undef GEN_VXRFORM1
-#undef GEN_VXRFORM
-#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
-    GEN_HANDLER2(name, str, 0x4, opc2, opc3, 0x00000000, PPC_ALTIVEC),
-#define GEN_VXRFORM(name, opc2, opc3)                                \
-    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
-    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
-GEN_VXRFORM(vcmpequb, 3, 0)
-GEN_VXRFORM(vcmpequh, 3, 1)
-GEN_VXRFORM(vcmpequw, 3, 2)
-GEN_VXRFORM(vcmpgtsb, 3, 12)
-GEN_VXRFORM(vcmpgtsh, 3, 13)
-GEN_VXRFORM(vcmpgtsw, 3, 14)
-GEN_VXRFORM(vcmpgtub, 3, 8)
-GEN_VXRFORM(vcmpgtuh, 3, 9)
-GEN_VXRFORM(vcmpgtuw, 3, 10)
-GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE)
-GEN_VXRFORM(vcmpgefp, 3, 7)
-GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
-GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
-
-#undef GEN_VXFORM_SIMM
-#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
-    GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-GEN_VXFORM_SIMM(vspltisb, 6, 12),
-GEN_VXFORM_SIMM(vspltish, 6, 13),
-GEN_VXFORM_SIMM(vspltisw, 6, 14),
-
-#undef GEN_VXFORM_NOA
-#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
-    GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC)
-GEN_VXFORM_NOA(vupkhsb, 7, 8),
-GEN_VXFORM_NOA(vupkhsh, 7, 9),
-GEN_VXFORM_207(vupkhsw, 7, 25),
-GEN_VXFORM_NOA(vupklsb, 7, 10),
-GEN_VXFORM_NOA(vupklsh, 7, 11),
-GEN_VXFORM_207(vupklsw, 7, 27),
-GEN_VXFORM_NOA(vupkhpx, 7, 13),
-GEN_VXFORM_NOA(vupklpx, 7, 15),
-GEN_VXFORM_NOA(vrefp, 5, 4),
-GEN_VXFORM_NOA(vrsqrtefp, 5, 5),
-GEN_VXFORM_NOA(vexptefp, 5, 6),
-GEN_VXFORM_NOA(vlogefp, 5, 7),
-GEN_VXFORM_NOA(vrfim, 5, 11),
-GEN_VXFORM_NOA(vrfin, 5, 8),
-GEN_VXFORM_NOA(vrfip, 5, 10),
-GEN_VXFORM_NOA(vrfiz, 5, 9),
-
-#undef GEN_VXFORM_UIMM
-#define GEN_VXFORM_UIMM(name, opc2, opc3)                               \
-    GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
-GEN_VXFORM_UIMM(vspltb, 6, 8),
-GEN_VXFORM_UIMM(vsplth, 6, 9),
-GEN_VXFORM_UIMM(vspltw, 6, 10),
-GEN_VXFORM_UIMM(vcfux, 5, 12),
-GEN_VXFORM_UIMM(vcfsx, 5, 13),
-GEN_VXFORM_UIMM(vctuxs, 5, 14),
-GEN_VXFORM_UIMM(vctsxs, 5, 15),
-
-#undef GEN_VAFORM_PAIRED
-#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
-    GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC)
-GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16),
-GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18),
-GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19),
-GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20),
-GEN_VAFORM_PAIRED(vsel, vperm, 21),
-GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23),
-
-GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_DUAL(vclzh, vpopcnth, 1, 29, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_DUAL(vclzw, vpopcntw, 1, 30, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
-
-GEN_VXFORM_207(vbpermq, 6, 21),
-GEN_VXFORM_207(vgbbd, 6, 20),
-GEN_VXFORM_207(vpmsumb, 4, 16),
-GEN_VXFORM_207(vpmsumh, 4, 17),
-GEN_VXFORM_207(vpmsumw, 4, 18),
-GEN_VXFORM_207(vpmsumd, 4, 19),
-
-GEN_VXFORM_207(vsbox, 4, 23),
-
-GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207),
-GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207),
-
-GEN_VXFORM_207(vshasigmaw, 1, 26),
-GEN_VXFORM_207(vshasigmad, 1, 27),
-
-GEN_VXFORM_DUAL(vsldoi, vpermxor, 22, 0xFF, PPC_ALTIVEC, PPC_NONE),
-
-GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
-
-GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
-GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
-
-GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(mtvsrwz, 0x1F, 0x13, 0x07, 0x0000F800, PPC_NONE, PPC2_VSX207),
-#if defined(TARGET_PPC64)
-GEN_HANDLER_E(mfvsrd, 0x1F, 0x13, 0x01, 0x0000F800, PPC_NONE, PPC2_VSX207),
-GEN_HANDLER_E(mtvsrd, 0x1F, 0x13, 0x05, 0x0000F800, PPC_NONE, PPC2_VSX207),
-#endif
-
-#undef GEN_XX2FORM
-#define GEN_XX2FORM(name, opc2, opc3, fl2)                           \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
-
-#undef GEN_XX3FORM
-#define GEN_XX3FORM(name, opc2, opc3, fl2)                           \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
-
-#undef GEN_XX2IFORM
-#define GEN_XX2IFORM(name, opc2, opc3, fl2)                           \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 1, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 1, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 1, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 1, PPC_NONE, fl2)
-
-#undef GEN_XX3_RC_FORM
-#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2)                          \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2)
-
-#undef GEN_XX3FORM_DM
-#define GEN_XX3FORM_DM(name, opc2, opc3) \
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
-GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX)
-
-GEN_XX2FORM(xsabsdp, 0x12, 0x15, PPC2_VSX),
-GEN_XX2FORM(xsnabsdp, 0x12, 0x16, PPC2_VSX),
-GEN_XX2FORM(xsnegdp, 0x12, 0x17, PPC2_VSX),
-GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
-
-GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX),
-GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX),
-GEN_XX2FORM(xvnegdp, 0x12, 0x1F, PPC2_VSX),
-GEN_XX3FORM(xvcpsgndp, 0x00, 0x1E, PPC2_VSX),
-GEN_XX2FORM(xvabssp, 0x12, 0x19, PPC2_VSX),
-GEN_XX2FORM(xvnabssp, 0x12, 0x1A, PPC2_VSX),
-GEN_XX2FORM(xvnegsp, 0x12, 0x1B, PPC2_VSX),
-GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
-
-GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
-GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
-GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
-GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
-GEN_XX2FORM(xsredp,  0x14, 0x05, PPC2_VSX),
-GEN_XX2FORM(xssqrtdp,  0x16, 0x04, PPC2_VSX),
-GEN_XX2FORM(xsrsqrtedp,  0x14, 0x04, PPC2_VSX),
-GEN_XX3FORM(xstdivdp,  0x14, 0x07, PPC2_VSX),
-GEN_XX2FORM(xstsqrtdp,  0x14, 0x06, PPC2_VSX),
-GEN_XX3FORM(xsmaddadp, 0x04, 0x04, PPC2_VSX),
-GEN_XX3FORM(xsmaddmdp, 0x04, 0x05, PPC2_VSX),
-GEN_XX3FORM(xsmsubadp, 0x04, 0x06, PPC2_VSX),
-GEN_XX3FORM(xsmsubmdp, 0x04, 0x07, PPC2_VSX),
-GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
-GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
-GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
-GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
-GEN_XX2IFORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
-GEN_XX2IFORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
-GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
-GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
-GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
-GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207),
-GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX),
-GEN_XX2FORM(xscvspdpn, 0x16, 0x14, PPC2_VSX207),
-GEN_XX2FORM(xscvdpsxds, 0x10, 0x15, PPC2_VSX),
-GEN_XX2FORM(xscvdpsxws, 0x10, 0x05, PPC2_VSX),
-GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
-GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
-GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
-GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
-GEN_XX2FORM(xsrdpi, 0x12, 0x04, PPC2_VSX),
-GEN_XX2FORM(xsrdpic, 0x16, 0x06, PPC2_VSX),
-GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX),
-GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
-GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
-
-GEN_XX3FORM(xsaddsp, 0x00, 0x00, PPC2_VSX207),
-GEN_XX3FORM(xssubsp, 0x00, 0x01, PPC2_VSX207),
-GEN_XX3FORM(xsmulsp, 0x00, 0x02, PPC2_VSX207),
-GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
-GEN_XX2FORM(xsresp,  0x14, 0x01, PPC2_VSX207),
-GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207),
-GEN_XX2FORM(xssqrtsp,  0x16, 0x00, PPC2_VSX207),
-GEN_XX2FORM(xsrsqrtesp,  0x14, 0x00, PPC2_VSX207),
-GEN_XX3FORM(xsmaddasp, 0x04, 0x00, PPC2_VSX207),
-GEN_XX3FORM(xsmaddmsp, 0x04, 0x01, PPC2_VSX207),
-GEN_XX3FORM(xsmsubasp, 0x04, 0x02, PPC2_VSX207),
-GEN_XX3FORM(xsmsubmsp, 0x04, 0x03, PPC2_VSX207),
-GEN_XX3FORM(xsnmaddasp, 0x04, 0x10, PPC2_VSX207),
-GEN_XX3FORM(xsnmaddmsp, 0x04, 0x11, PPC2_VSX207),
-GEN_XX3FORM(xsnmsubasp, 0x04, 0x12, PPC2_VSX207),
-GEN_XX3FORM(xsnmsubmsp, 0x04, 0x13, PPC2_VSX207),
-GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207),
-GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207),
-
-GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
-GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
-GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
-GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
-GEN_XX2FORM(xvredp,  0x14, 0x0D, PPC2_VSX),
-GEN_XX2FORM(xvsqrtdp,  0x16, 0x0C, PPC2_VSX),
-GEN_XX2FORM(xvrsqrtedp,  0x14, 0x0C, PPC2_VSX),
-GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
-GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),
-GEN_XX3FORM(xvmaddadp, 0x04, 0x0C, PPC2_VSX),
-GEN_XX3FORM(xvmaddmdp, 0x04, 0x0D, PPC2_VSX),
-GEN_XX3FORM(xvmsubadp, 0x04, 0x0E, PPC2_VSX),
-GEN_XX3FORM(xvmsubmdp, 0x04, 0x0F, PPC2_VSX),
-GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
-GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
-GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
-GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
-GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
-GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
-GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
-GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX),
-GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX),
-GEN_XX2FORM(xvcvdpuxds, 0x10, 0x1C, PPC2_VSX),
-GEN_XX2FORM(xvcvdpuxws, 0x10, 0x0C, PPC2_VSX),
-GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
-GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
-GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
-GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
-GEN_XX2FORM(xvrdpi, 0x12, 0x0C, PPC2_VSX),
-GEN_XX2FORM(xvrdpic, 0x16, 0x0E, PPC2_VSX),
-GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX),
-GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
-GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
-
-GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
-GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
-GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
-GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
-GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
-GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
-GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
-GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
-GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),
-GEN_XX3FORM(xvmaddasp, 0x04, 0x08, PPC2_VSX),
-GEN_XX3FORM(xvmaddmsp, 0x04, 0x09, PPC2_VSX),
-GEN_XX3FORM(xvmsubasp, 0x04, 0x0A, PPC2_VSX),
-GEN_XX3FORM(xvmsubmsp, 0x04, 0x0B, PPC2_VSX),
-GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
-GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
-GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
-GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
-GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
-GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
-GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
-GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
-GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX),
-GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX),
-GEN_XX2FORM(xvcvspuxds, 0x10, 0x18, PPC2_VSX),
-GEN_XX2FORM(xvcvspuxws, 0x10, 0x08, PPC2_VSX),
-GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
-GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
-GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
-GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
-GEN_XX2FORM(xvrspi, 0x12, 0x08, PPC2_VSX),
-GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
-GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
-GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
-GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
-
-#undef VSX_LOGICAL
-#define VSX_LOGICAL(name, opc2, opc3, fl2) \
-GEN_XX3FORM(name, opc2, opc3, fl2)
-
-VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX),
-VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX),
-VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX),
-VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX),
-VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX),
-VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207),
-VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
-VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
-GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
-GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
-GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
-GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
-
-#define GEN_XXSEL_ROW(opc3) \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x19, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1A, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1B, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1C, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1D, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1E, opc3, 0, PPC_NONE, PPC2_VSX), \
-GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1F, opc3, 0, PPC_NONE, PPC2_VSX), \
-
-GEN_XXSEL_ROW(0x00)
-GEN_XXSEL_ROW(0x01)
-GEN_XXSEL_ROW(0x02)
-GEN_XXSEL_ROW(0x03)
-GEN_XXSEL_ROW(0x04)
-GEN_XXSEL_ROW(0x05)
-GEN_XXSEL_ROW(0x06)
-GEN_XXSEL_ROW(0x07)
-GEN_XXSEL_ROW(0x08)
-GEN_XXSEL_ROW(0x09)
-GEN_XXSEL_ROW(0x0A)
-GEN_XXSEL_ROW(0x0B)
-GEN_XXSEL_ROW(0x0C)
-GEN_XXSEL_ROW(0x0D)
-GEN_XXSEL_ROW(0x0E)
-GEN_XXSEL_ROW(0x0F)
-GEN_XXSEL_ROW(0x10)
-GEN_XXSEL_ROW(0x11)
-GEN_XXSEL_ROW(0x12)
-GEN_XXSEL_ROW(0x13)
-GEN_XXSEL_ROW(0x14)
-GEN_XXSEL_ROW(0x15)
-GEN_XXSEL_ROW(0x16)
-GEN_XXSEL_ROW(0x17)
-GEN_XXSEL_ROW(0x18)
-GEN_XXSEL_ROW(0x19)
-GEN_XXSEL_ROW(0x1A)
-GEN_XXSEL_ROW(0x1B)
-GEN_XXSEL_ROW(0x1C)
-GEN_XXSEL_ROW(0x1D)
-GEN_XXSEL_ROW(0x1E)
-GEN_XXSEL_ROW(0x1F)
-
-GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01),
-
-#undef GEN_DFP_T_A_B_Rc
-#undef GEN_DFP_BF_A_B
-#undef GEN_DFP_BF_A_DCM
-#undef GEN_DFP_T_B_U32_U32_Rc
-#undef GEN_DFP_T_A_B_I32_Rc
-#undef GEN_DFP_T_B_Rc
-#undef GEN_DFP_T_FPR_I32_Rc
-
-#define _GEN_DFP_LONG(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_LONGx2(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_LONGx4(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3B, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUAD(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3F, op1, op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUADx2(name, op1, op2, mask) \
-GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define _GEN_DFP_QUADx4(name, op1, op2, mask)                         \
-GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
-GEN_HANDLER_E(name, 0x3F, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
-
-#define GEN_DFP_T_A_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00210800)
-
-#define GEN_DFP_Tp_A_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00200800)
-
-#define GEN_DFP_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x001F0000)
-
-#define GEN_DFP_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x003F0800)
-
-#define GEN_DFP_Tp_B_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x003F0000)
-
-#define GEN_DFP_T_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x001F0800)
-
-#define GEN_DFP_BF_A_B(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00000001)
-
-#define GEN_DFP_BF_Ap_Bp(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00610801)
-
-#define GEN_DFP_BF_A_Bp(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00600801)
-
-#define GEN_DFP_BF_A_DCM(name, op1, op2) \
-_GEN_DFP_LONGx2(name, op1, op2, 0x00600001)
-
-#define GEN_DFP_BF_Ap_DCM(name, op1, op2) \
-_GEN_DFP_QUADx2(name, op1, op2, 0x00610001)
-
-#define GEN_DFP_T_A_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x02010800)
-
-#define GEN_DFP_Tp_A_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x02000800)
-
-#define GEN_DFP_TE_T_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_TE_Tp_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x00200800)
-
-#define GEN_DFP_R_T_B_RMC_Rc(name, op1, op2) \
-_GEN_DFP_LONGx4(name, op1, op2, 0x001E0000)
-
-#define GEN_DFP_R_Tp_Bp_RMC_Rc(name, op1, op2) \
-_GEN_DFP_QUADx4(name, op1, op2, 0x003E0800)
-
-#define GEN_DFP_SP_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x00070000)
-
-#define GEN_DFP_SP_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x00270800)
-
-#define GEN_DFP_S_T_B_Rc(name, op1, op2) \
-_GEN_DFP_LONG(name, op1, op2, 0x000F0000)
-
-#define GEN_DFP_S_Tp_Bp_Rc(name, op1, op2) \
-_GEN_DFP_QUAD(name, op1, op2, 0x002F0800)
-
-#define GEN_DFP_T_A_SH_Rc(name, op1, op2) \
-_GEN_DFP_LONGx2(name, op1, op2, 0x00000000)
-
-#define GEN_DFP_Tp_Ap_SH_Rc(name, op1, op2) \
-_GEN_DFP_QUADx2(name, op1, op2, 0x00210000)
-
-GEN_DFP_T_A_B_Rc(dadd, 0x02, 0x00),
-GEN_DFP_Tp_Ap_Bp_Rc(daddq, 0x02, 0x00),
-GEN_DFP_T_A_B_Rc(dsub, 0x02, 0x10),
-GEN_DFP_Tp_Ap_Bp_Rc(dsubq, 0x02, 0x10),
-GEN_DFP_T_A_B_Rc(dmul, 0x02, 0x01),
-GEN_DFP_Tp_Ap_Bp_Rc(dmulq, 0x02, 0x01),
-GEN_DFP_T_A_B_Rc(ddiv, 0x02, 0x11),
-GEN_DFP_Tp_Ap_Bp_Rc(ddivq, 0x02, 0x11),
-GEN_DFP_BF_A_B(dcmpu, 0x02, 0x14),
-GEN_DFP_BF_Ap_Bp(dcmpuq, 0x02, 0x14),
-GEN_DFP_BF_A_B(dcmpo, 0x02, 0x04),
-GEN_DFP_BF_Ap_Bp(dcmpoq, 0x02, 0x04),
-GEN_DFP_BF_A_DCM(dtstdc, 0x02, 0x06),
-GEN_DFP_BF_Ap_DCM(dtstdcq, 0x02, 0x06),
-GEN_DFP_BF_A_DCM(dtstdg, 0x02, 0x07),
-GEN_DFP_BF_Ap_DCM(dtstdgq, 0x02, 0x07),
-GEN_DFP_BF_A_B(dtstex, 0x02, 0x05),
-GEN_DFP_BF_Ap_Bp(dtstexq, 0x02, 0x05),
-GEN_DFP_BF_A_B(dtstsf, 0x02, 0x15),
-GEN_DFP_BF_A_Bp(dtstsfq, 0x02, 0x15),
-GEN_DFP_TE_T_B_RMC_Rc(dquai, 0x03, 0x02),
-GEN_DFP_TE_Tp_Bp_RMC_Rc(dquaiq, 0x03, 0x02),
-GEN_DFP_T_A_B_RMC_Rc(dqua, 0x03, 0x00),
-GEN_DFP_Tp_Ap_Bp_RMC_Rc(dquaq, 0x03, 0x00),
-GEN_DFP_T_A_B_RMC_Rc(drrnd, 0x03, 0x01),
-GEN_DFP_Tp_A_Bp_RMC_Rc(drrndq, 0x03, 0x01),
-GEN_DFP_R_T_B_RMC_Rc(drintx, 0x03, 0x03),
-GEN_DFP_R_Tp_Bp_RMC_Rc(drintxq, 0x03, 0x03),
-GEN_DFP_R_T_B_RMC_Rc(drintn, 0x03, 0x07),
-GEN_DFP_R_Tp_Bp_RMC_Rc(drintnq, 0x03, 0x07),
-GEN_DFP_T_B_Rc(dctdp, 0x02, 0x08),
-GEN_DFP_Tp_B_Rc(dctqpq, 0x02, 0x08),
-GEN_DFP_T_B_Rc(drsp, 0x02, 0x18),
-GEN_DFP_Tp_Bp_Rc(drdpq, 0x02, 0x18),
-GEN_DFP_T_B_Rc(dcffix, 0x02, 0x19),
-GEN_DFP_Tp_B_Rc(dcffixq, 0x02, 0x19),
-GEN_DFP_T_B_Rc(dctfix, 0x02, 0x09),
-GEN_DFP_T_Bp_Rc(dctfixq, 0x02, 0x09),
-GEN_DFP_SP_T_B_Rc(ddedpd, 0x02, 0x0a),
-GEN_DFP_SP_Tp_Bp_Rc(ddedpdq, 0x02, 0x0a),
-GEN_DFP_S_T_B_Rc(denbcd, 0x02, 0x1a),
-GEN_DFP_S_Tp_Bp_Rc(denbcdq, 0x02, 0x1a),
-GEN_DFP_T_B_Rc(dxex, 0x02, 0x0b),
-GEN_DFP_T_Bp_Rc(dxexq, 0x02, 0x0b),
-GEN_DFP_T_A_B_Rc(diex, 0x02, 0x1b),
-GEN_DFP_Tp_A_Bp_Rc(diexq, 0x02, 0x1b),
-GEN_DFP_T_A_SH_Rc(dscli, 0x02, 0x02),
-GEN_DFP_Tp_Ap_SH_Rc(dscliq, 0x02, 0x02),
-GEN_DFP_T_A_SH_Rc(dscri, 0x02, 0x03),
-GEN_DFP_Tp_Ap_SH_Rc(dscriq, 0x02, 0x03),
-
-#undef GEN_SPE
-#define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \
-    GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, PPC_NONE)
-GEN_SPE(evaddw,      speundef,    0x00, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evaddiw,     speundef,    0x01, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evsubfw,     speundef,    0x02, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evsubifw,    speundef,    0x03, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evabs,       evneg,       0x04, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
-GEN_SPE(evextsb,     evextsh,     0x05, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
-GEN_SPE(evrndw,      evcntlzw,    0x06, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
-GEN_SPE(evcntlsw,    brinc,       0x07, 0x08, 0x0000F800, 0x00000000, PPC_SPE),
-GEN_SPE(evmra,       speundef,    0x02, 0x13, 0x0000F800, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(speundef,    evand,       0x08, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE),
-GEN_SPE(evandc,      speundef,    0x09, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evxor,       evor,        0x0B, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evnor,       eveqv,       0x0C, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evmwumi,     evmwsmi,     0x0C, 0x11, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evmwumia,    evmwsmia,    0x1C, 0x11, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evmwumiaa,   evmwsmiaa,   0x0C, 0x15, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(speundef,    evorc,       0x0D, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE),
-GEN_SPE(evnand,      speundef,    0x0F, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evsrwu,      evsrws,      0x10, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evsrwiu,     evsrwis,     0x11, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evslw,       speundef,    0x12, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evslwi,      speundef,    0x13, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
-GEN_SPE(evrlw,       evsplati,    0x14, 0x08, 0x00000000, 0x0000F800, PPC_SPE),
-GEN_SPE(evrlwi,      evsplatfi,   0x15, 0x08, 0x00000000, 0x0000F800, PPC_SPE),
-GEN_SPE(evmergehi,   evmergelo,   0x16, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evmergehilo, evmergelohi, 0x17, 0x08, 0x00000000, 0x00000000, PPC_SPE),
-GEN_SPE(evcmpgtu,    evcmpgts,    0x18, 0x08, 0x00600000, 0x00600000, PPC_SPE),
-GEN_SPE(evcmpltu,    evcmplts,    0x19, 0x08, 0x00600000, 0x00600000, PPC_SPE),
-GEN_SPE(evcmpeq,     speundef,    0x1A, 0x08, 0x00600000, 0xFFFFFFFF, PPC_SPE),
-
-GEN_SPE(evfsadd,     evfssub,     0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
-GEN_SPE(evfsabs,     evfsnabs,    0x02, 0x0A, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE),
-GEN_SPE(evfsneg,     speundef,    0x03, 0x0A, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(evfsmul,     evfsdiv,     0x04, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
-GEN_SPE(evfscmpgt,   evfscmplt,   0x06, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
-GEN_SPE(evfscmpeq,   speundef,    0x07, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(evfscfui,    evfscfsi,    0x08, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(evfscfuf,    evfscfsf,    0x09, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(evfsctui,    evfsctsi,    0x0A, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(evfsctuf,    evfsctsf,    0x0B, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(evfsctuiz,   speundef,    0x0C, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(evfsctsiz,   speundef,    0x0D, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(evfststgt,   evfststlt,   0x0E, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
-GEN_SPE(evfststeq,   speundef,    0x0F, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-
-GEN_SPE(efsadd,      efssub,      0x00, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
-GEN_SPE(efsabs,      efsnabs,     0x02, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE),
-GEN_SPE(efsneg,      speundef,    0x03, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(efsmul,      efsdiv,      0x04, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
-GEN_SPE(efscmpgt,    efscmplt,    0x06, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
-GEN_SPE(efscmpeq,    efscfd,      0x07, 0x0B, 0x00600000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(efscfui,     efscfsi,     0x08, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(efscfuf,     efscfsf,     0x09, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(efsctui,     efsctsi,     0x0A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(efsctuf,     efsctsf,     0x0B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
-GEN_SPE(efsctuiz,    speundef,    0x0C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(efsctsiz,    speundef,    0x0D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-GEN_SPE(efststgt,    efststlt,    0x0E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
-GEN_SPE(efststeq,    speundef,    0x0F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
-
-GEN_SPE(efdadd,      efdsub,      0x10, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE),
-GEN_SPE(efdcfuid,    efdcfsid,    0x11, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdabs,      efdnabs,     0x12, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_DOUBLE),
-GEN_SPE(efdneg,      speundef,    0x13, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_DOUBLE),
-GEN_SPE(efdmul,      efddiv,      0x14, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE),
-GEN_SPE(efdctuidz,   efdctsidz,   0x15, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdcmpgt,    efdcmplt,    0x16, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE),
-GEN_SPE(efdcmpeq,    efdcfs,      0x17, 0x0B, 0x00600000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdcfui,     efdcfsi,     0x18, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdcfuf,     efdcfsf,     0x19, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdctui,     efdctsi,     0x1A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdctuf,     efdctsf,     0x1B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
-GEN_SPE(efdctuiz,    speundef,    0x1C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
-GEN_SPE(efdctsiz,    speundef,    0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
-GEN_SPE(efdtstgt,    efdtstlt,    0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE),
-GEN_SPE(efdtsteq,    speundef,    0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
-
-#undef GEN_SPEOP_LDST
-#define GEN_SPEOP_LDST(name, opc2, sh)                                        \
-GEN_HANDLER(name, 0x04, opc2, 0x0C, 0x00000000, PPC_SPE)
-GEN_SPEOP_LDST(evldd, 0x00, 3),
-GEN_SPEOP_LDST(evldw, 0x01, 3),
-GEN_SPEOP_LDST(evldh, 0x02, 3),
-GEN_SPEOP_LDST(evlhhesplat, 0x04, 1),
-GEN_SPEOP_LDST(evlhhousplat, 0x06, 1),
-GEN_SPEOP_LDST(evlhhossplat, 0x07, 1),
-GEN_SPEOP_LDST(evlwhe, 0x08, 2),
-GEN_SPEOP_LDST(evlwhou, 0x0A, 2),
-GEN_SPEOP_LDST(evlwhos, 0x0B, 2),
-GEN_SPEOP_LDST(evlwwsplat, 0x0C, 2),
-GEN_SPEOP_LDST(evlwhsplat, 0x0E, 2),
-
-GEN_SPEOP_LDST(evstdd, 0x10, 3),
-GEN_SPEOP_LDST(evstdw, 0x11, 3),
-GEN_SPEOP_LDST(evstdh, 0x12, 3),
-GEN_SPEOP_LDST(evstwhe, 0x18, 2),
-GEN_SPEOP_LDST(evstwho, 0x1A, 2),
-GEN_SPEOP_LDST(evstwwe, 0x1C, 2),
-GEN_SPEOP_LDST(evstwwo, 0x1E, 2),
-
 GEN_HANDLER2_E(tbegin, "tbegin", 0x1F, 0x0E, 0x14, 0x01DFF800, \
                PPC_NONE, PPC2_TM),
 GEN_HANDLER2_E(tend,   "tend",   0x1F, 0x0E, 0x15, 0x01FFF800, \
@@ -11324,6 +6751,16 @@ GEN_HANDLER2_E(treclaim, "treclaim", 0x1F, 0x0E, 0x1D, 0x03E0F800, \
                PPC_NONE, PPC2_TM),
 GEN_HANDLER2_E(trechkpt, "trechkpt", 0x1F, 0x0E, 0x1F, 0x03FFF800, \
                PPC_NONE, PPC2_TM),
+
+#include "translate/fp-ops.inc.c"
+
+#include "translate/vmx-ops.inc.c"
+
+#include "translate/vsx-ops.inc.c"
+
+#include "translate/dfp-ops.inc.c"
+
+#include "translate/spe-ops.inc.c"
 };
 
 #include "helper_regs.h"
@@ -11569,6 +7006,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
     ctx.insns_flags = env->insns_flags;
     ctx.insns_flags2 = env->insns_flags2;
     ctx.access_type = -1;
+    ctx.need_access_type = !(env->mmu_model & POWERPC_MMU_64B);
     ctx.le_mode = !!(env->hflags & (1 << MSR_LE));
     ctx.default_tcg_memop_mask = ctx.le_mode ? MO_LE : MO_BE;
 #if defined(TARGET_PPC64)
@@ -11650,9 +7088,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
         } else {
             ctx.opcode = cpu_ldl_code(env, ctx.nip);
         }
-        LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
-                    ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
-                    opc3(ctx.opcode), ctx.le_mode ? "little" : "big");
+        LOG_DISAS("translate opcode %08x (%02x %02x %02x %02x) (%s)\n",
+                  ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
+                  opc3(ctx.opcode), opc4(ctx.opcode),
+                  ctx.le_mode ? "little" : "big");
         ctx.nip += 4;
         table = env->opcodes;
         handler = table[opc1(ctx.opcode)];
@@ -11662,14 +7101,20 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
             if (is_indirect_opcode(handler)) {
                 table = ind_table(handler);
                 handler = table[opc3(ctx.opcode)];
+                if (is_indirect_opcode(handler)) {
+                    table = ind_table(handler);
+                    handler = table[opc4(ctx.opcode)];
+                }
             }
         }
         /* Is opcode *REALLY* valid ? */
         if (unlikely(handler->handler == &gen_invalid)) {
             qemu_log_mask(LOG_GUEST_ERROR, "invalid/unsupported opcode: "
-                          "%02x - %02x - %02x (%08x) " TARGET_FMT_lx " %d\n",
+                          "%02x - %02x - %02x - %02x (%08x) "
+                          TARGET_FMT_lx " %d\n",
                           opc1(ctx.opcode), opc2(ctx.opcode),
-                          opc3(ctx.opcode), ctx.opcode, ctx.nip - 4, (int)msr_ir);
+                          opc3(ctx.opcode), opc4(ctx.opcode),
+                          ctx.opcode, ctx.nip - 4, (int)msr_ir);
         } else {
             uint32_t inval;
 
@@ -11681,9 +7126,10 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
 
             if (unlikely((ctx.opcode & inval) != 0)) {
                 qemu_log_mask(LOG_GUEST_ERROR, "invalid bits: %08x for opcode: "
-                              "%02x - %02x - %02x (%08x) " TARGET_FMT_lx "\n",
-                              ctx.opcode & inval, opc1(ctx.opcode),
-                              opc2(ctx.opcode), opc3(ctx.opcode),
+                              "%02x - %02x - %02x - %02x (%08x) "
+                              TARGET_FMT_lx "\n", ctx.opcode & inval,
+                              opc1(ctx.opcode), opc2(ctx.opcode),
+                              opc3(ctx.opcode), opc4(ctx.opcode),
                               ctx.opcode, ctx.nip - 4);
                 gen_inval_exception(ctxp, POWERPC_EXCP_INVAL_INVAL);
                 break;
@@ -11699,7 +7145,7 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
                      ctx.exception != POWERPC_SYSCALL &&
                      ctx.exception != POWERPC_EXCP_TRAP &&
                      ctx.exception != POWERPC_EXCP_BRANCH)) {
-            gen_exception(ctxp, POWERPC_EXCP_TRACE);
+            gen_exception_nip(ctxp, POWERPC_EXCP_TRACE, ctx.nip);
         } else if (unlikely(((ctx.nip & (TARGET_PAGE_SIZE - 1)) == 0) ||
                             (cs->singlestep_enabled) ||
                             singlestep ||
@@ -11710,9 +7156,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
             break;
         }
         if (tcg_check_temp_count()) {
-            fprintf(stderr, "Opcode %02x %02x %02x (%08x) leaked temporaries\n",
-                    opc1(ctx.opcode), opc2(ctx.opcode), opc3(ctx.opcode),
-                    ctx.opcode);
+            fprintf(stderr, "Opcode %02x %02x %02x %02x (%08x) leaked "
+                    "temporaries\n", opc1(ctx.opcode), opc2(ctx.opcode),
+                    opc3(ctx.opcode), opc4(ctx.opcode), ctx.opcode);
             exit(1);
         }
     }
@@ -11738,9 +7184,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
         int flags;
         flags = env->bfd_mach;
         flags |= ctx.le_mode << 16;
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, ctx.nip - pc_start, flags);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-ppc/translate/dfp-impl.inc.c b/target-ppc/translate/dfp-impl.inc.c
new file mode 100644
index 0000000000..178d3044a7
--- /dev/null
+++ b/target-ppc/translate/dfp-impl.inc.c
@@ -0,0 +1,232 @@
+/*** Decimal Floating Point ***/
+
+static inline TCGv_ptr gen_fprp_ptr(int reg)
+{
+    TCGv_ptr r = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, fpr[reg]));
+    return r;
+}
+
+#define GEN_DFP_T_A_B_Rc(name)                   \
+static void gen_##name(DisasContext *ctx)        \
+{                                                \
+    TCGv_ptr rd, ra, rb;                         \
+    if (unlikely(!ctx->fpu_enabled)) {           \
+        gen_exception(ctx, POWERPC_EXCP_FPU);    \
+        return;                                  \
+    }                                            \
+    gen_update_nip(ctx, ctx->nip - 4);           \
+    rd = gen_fprp_ptr(rD(ctx->opcode));          \
+    ra = gen_fprp_ptr(rA(ctx->opcode));          \
+    rb = gen_fprp_ptr(rB(ctx->opcode));          \
+    gen_helper_##name(cpu_env, rd, ra, rb);      \
+    if (unlikely(Rc(ctx->opcode) != 0)) {        \
+        gen_set_cr1_from_fpscr(ctx);             \
+    }                                            \
+    tcg_temp_free_ptr(rd);                       \
+    tcg_temp_free_ptr(ra);                       \
+    tcg_temp_free_ptr(rb);                       \
+}
+
+#define GEN_DFP_BF_A_B(name)                      \
+static void gen_##name(DisasContext *ctx)         \
+{                                                 \
+    TCGv_ptr ra, rb;                              \
+    if (unlikely(!ctx->fpu_enabled)) {            \
+        gen_exception(ctx, POWERPC_EXCP_FPU);     \
+        return;                                   \
+    }                                             \
+    gen_update_nip(ctx, ctx->nip - 4);            \
+    ra = gen_fprp_ptr(rA(ctx->opcode));           \
+    rb = gen_fprp_ptr(rB(ctx->opcode));           \
+    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
+                      cpu_env, ra, rb);           \
+    tcg_temp_free_ptr(ra);                        \
+    tcg_temp_free_ptr(rb);                        \
+}
+
+#define GEN_DFP_BF_I_B(name)                      \
+static void gen_##name(DisasContext *ctx)         \
+{                                                 \
+    TCGv_i32 uim;                                 \
+    TCGv_ptr rb;                                  \
+    if (unlikely(!ctx->fpu_enabled)) {            \
+        gen_exception(ctx, POWERPC_EXCP_FPU);     \
+        return;                                   \
+    }                                             \
+    gen_update_nip(ctx, ctx->nip - 4);            \
+    uim = tcg_const_i32(UIMM5(ctx->opcode));      \
+    rb = gen_fprp_ptr(rB(ctx->opcode));           \
+    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
+                      cpu_env, uim, rb);          \
+    tcg_temp_free_i32(uim);                       \
+    tcg_temp_free_ptr(rb);                        \
+}
+
+#define GEN_DFP_BF_A_DCM(name)                    \
+static void gen_##name(DisasContext *ctx)         \
+{                                                 \
+    TCGv_ptr ra;                                  \
+    TCGv_i32 dcm;                                 \
+    if (unlikely(!ctx->fpu_enabled)) {            \
+        gen_exception(ctx, POWERPC_EXCP_FPU);     \
+        return;                                   \
+    }                                             \
+    gen_update_nip(ctx, ctx->nip - 4);            \
+    ra = gen_fprp_ptr(rA(ctx->opcode));           \
+    dcm = tcg_const_i32(DCM(ctx->opcode));        \
+    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
+                      cpu_env, ra, dcm);          \
+    tcg_temp_free_ptr(ra);                        \
+    tcg_temp_free_i32(dcm);                       \
+}
+
+#define GEN_DFP_T_B_U32_U32_Rc(name, u32f1, u32f2)    \
+static void gen_##name(DisasContext *ctx)             \
+{                                                     \
+    TCGv_ptr rt, rb;                                  \
+    TCGv_i32 u32_1, u32_2;                            \
+    if (unlikely(!ctx->fpu_enabled)) {                \
+        gen_exception(ctx, POWERPC_EXCP_FPU);         \
+        return;                                       \
+    }                                                 \
+    gen_update_nip(ctx, ctx->nip - 4);                \
+    rt = gen_fprp_ptr(rD(ctx->opcode));               \
+    rb = gen_fprp_ptr(rB(ctx->opcode));               \
+    u32_1 = tcg_const_i32(u32f1(ctx->opcode));        \
+    u32_2 = tcg_const_i32(u32f2(ctx->opcode));        \
+    gen_helper_##name(cpu_env, rt, rb, u32_1, u32_2); \
+    if (unlikely(Rc(ctx->opcode) != 0)) {             \
+        gen_set_cr1_from_fpscr(ctx);                  \
+    }                                                 \
+    tcg_temp_free_ptr(rt);                            \
+    tcg_temp_free_ptr(rb);                            \
+    tcg_temp_free_i32(u32_1);                         \
+    tcg_temp_free_i32(u32_2);                         \
+}
+
+#define GEN_DFP_T_A_B_I32_Rc(name, i32fld)       \
+static void gen_##name(DisasContext *ctx)        \
+{                                                \
+    TCGv_ptr rt, ra, rb;                         \
+    TCGv_i32 i32;                                \
+    if (unlikely(!ctx->fpu_enabled)) {           \
+        gen_exception(ctx, POWERPC_EXCP_FPU);    \
+        return;                                  \
+    }                                            \
+    gen_update_nip(ctx, ctx->nip - 4);           \
+    rt = gen_fprp_ptr(rD(ctx->opcode));          \
+    ra = gen_fprp_ptr(rA(ctx->opcode));          \
+    rb = gen_fprp_ptr(rB(ctx->opcode));          \
+    i32 = tcg_const_i32(i32fld(ctx->opcode));    \
+    gen_helper_##name(cpu_env, rt, ra, rb, i32); \
+    if (unlikely(Rc(ctx->opcode) != 0)) {        \
+        gen_set_cr1_from_fpscr(ctx);             \
+    }                                            \
+    tcg_temp_free_ptr(rt);                       \
+    tcg_temp_free_ptr(rb);                       \
+    tcg_temp_free_ptr(ra);                       \
+    tcg_temp_free_i32(i32);                      \
+    }
+
+#define GEN_DFP_T_B_Rc(name)                     \
+static void gen_##name(DisasContext *ctx)        \
+{                                                \
+    TCGv_ptr rt, rb;                             \
+    if (unlikely(!ctx->fpu_enabled)) {           \
+        gen_exception(ctx, POWERPC_EXCP_FPU);    \
+        return;                                  \
+    }                                            \
+    gen_update_nip(ctx, ctx->nip - 4);           \
+    rt = gen_fprp_ptr(rD(ctx->opcode));          \
+    rb = gen_fprp_ptr(rB(ctx->opcode));          \
+    gen_helper_##name(cpu_env, rt, rb);          \
+    if (unlikely(Rc(ctx->opcode) != 0)) {        \
+        gen_set_cr1_from_fpscr(ctx);             \
+    }                                            \
+    tcg_temp_free_ptr(rt);                       \
+    tcg_temp_free_ptr(rb);                       \
+    }
+
+#define GEN_DFP_T_FPR_I32_Rc(name, fprfld, i32fld) \
+static void gen_##name(DisasContext *ctx)          \
+{                                                  \
+    TCGv_ptr rt, rs;                               \
+    TCGv_i32 i32;                                  \
+    if (unlikely(!ctx->fpu_enabled)) {             \
+        gen_exception(ctx, POWERPC_EXCP_FPU);      \
+        return;                                    \
+    }                                              \
+    gen_update_nip(ctx, ctx->nip - 4);             \
+    rt = gen_fprp_ptr(rD(ctx->opcode));            \
+    rs = gen_fprp_ptr(fprfld(ctx->opcode));        \
+    i32 = tcg_const_i32(i32fld(ctx->opcode));      \
+    gen_helper_##name(cpu_env, rt, rs, i32);       \
+    if (unlikely(Rc(ctx->opcode) != 0)) {          \
+        gen_set_cr1_from_fpscr(ctx);               \
+    }                                              \
+    tcg_temp_free_ptr(rt);                         \
+    tcg_temp_free_ptr(rs);                         \
+    tcg_temp_free_i32(i32);                        \
+}
+
+GEN_DFP_T_A_B_Rc(dadd)
+GEN_DFP_T_A_B_Rc(daddq)
+GEN_DFP_T_A_B_Rc(dsub)
+GEN_DFP_T_A_B_Rc(dsubq)
+GEN_DFP_T_A_B_Rc(dmul)
+GEN_DFP_T_A_B_Rc(dmulq)
+GEN_DFP_T_A_B_Rc(ddiv)
+GEN_DFP_T_A_B_Rc(ddivq)
+GEN_DFP_BF_A_B(dcmpu)
+GEN_DFP_BF_A_B(dcmpuq)
+GEN_DFP_BF_A_B(dcmpo)
+GEN_DFP_BF_A_B(dcmpoq)
+GEN_DFP_BF_A_DCM(dtstdc)
+GEN_DFP_BF_A_DCM(dtstdcq)
+GEN_DFP_BF_A_DCM(dtstdg)
+GEN_DFP_BF_A_DCM(dtstdgq)
+GEN_DFP_BF_A_B(dtstex)
+GEN_DFP_BF_A_B(dtstexq)
+GEN_DFP_BF_A_B(dtstsf)
+GEN_DFP_BF_A_B(dtstsfq)
+GEN_DFP_BF_I_B(dtstsfi)
+GEN_DFP_BF_I_B(dtstsfiq)
+GEN_DFP_T_B_U32_U32_Rc(dquai, SIMM5, RMC)
+GEN_DFP_T_B_U32_U32_Rc(dquaiq, SIMM5, RMC)
+GEN_DFP_T_A_B_I32_Rc(dqua, RMC)
+GEN_DFP_T_A_B_I32_Rc(dquaq, RMC)
+GEN_DFP_T_A_B_I32_Rc(drrnd, RMC)
+GEN_DFP_T_A_B_I32_Rc(drrndq, RMC)
+GEN_DFP_T_B_U32_U32_Rc(drintx, FPW, RMC)
+GEN_DFP_T_B_U32_U32_Rc(drintxq, FPW, RMC)
+GEN_DFP_T_B_U32_U32_Rc(drintn, FPW, RMC)
+GEN_DFP_T_B_U32_U32_Rc(drintnq, FPW, RMC)
+GEN_DFP_T_B_Rc(dctdp)
+GEN_DFP_T_B_Rc(dctqpq)
+GEN_DFP_T_B_Rc(drsp)
+GEN_DFP_T_B_Rc(drdpq)
+GEN_DFP_T_B_Rc(dcffix)
+GEN_DFP_T_B_Rc(dcffixq)
+GEN_DFP_T_B_Rc(dctfix)
+GEN_DFP_T_B_Rc(dctfixq)
+GEN_DFP_T_FPR_I32_Rc(ddedpd, rB, SP)
+GEN_DFP_T_FPR_I32_Rc(ddedpdq, rB, SP)
+GEN_DFP_T_FPR_I32_Rc(denbcd, rB, SP)
+GEN_DFP_T_FPR_I32_Rc(denbcdq, rB, SP)
+GEN_DFP_T_B_Rc(dxex)
+GEN_DFP_T_B_Rc(dxexq)
+GEN_DFP_T_A_B_Rc(diex)
+GEN_DFP_T_A_B_Rc(diexq)
+GEN_DFP_T_FPR_I32_Rc(dscli, rA, DCM)
+GEN_DFP_T_FPR_I32_Rc(dscliq, rA, DCM)
+GEN_DFP_T_FPR_I32_Rc(dscri, rA, DCM)
+GEN_DFP_T_FPR_I32_Rc(dscriq, rA, DCM)
+
+#undef GEN_DFP_T_A_B_Rc
+#undef GEN_DFP_BF_A_B
+#undef GEN_DFP_BF_A_DCM
+#undef GEN_DFP_T_B_U32_U32_Rc
+#undef GEN_DFP_T_A_B_I32_Rc
+#undef GEN_DFP_T_B_Rc
+#undef GEN_DFP_T_FPR_I32_Rc
diff --git a/target-ppc/translate/dfp-ops.inc.c b/target-ppc/translate/dfp-ops.inc.c
new file mode 100644
index 0000000000..6ef38e5712
--- /dev/null
+++ b/target-ppc/translate/dfp-ops.inc.c
@@ -0,0 +1,165 @@
+#define _GEN_DFP_LONG(name, op1, op2, mask) \
+GEN_HANDLER_E(name, 0x3B, op1, op2, mask, PPC_NONE, PPC2_DFP)
+
+#define _GEN_DFP_LONG_300(name, op1, op2, mask)                   \
+GEN_HANDLER_E(name, 0x3B, op1, op2, mask, PPC_NONE, PPC2_ISA300)
+
+#define _GEN_DFP_LONGx2(name, op1, op2, mask) \
+GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
+
+#define _GEN_DFP_LONGx4(name, op1, op2, mask) \
+GEN_HANDLER_E(name, 0x3B, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3B, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3B, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3B, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
+
+#define _GEN_DFP_QUAD(name, op1, op2, mask) \
+GEN_HANDLER_E(name, 0x3F, op1, op2, mask, PPC_NONE, PPC2_DFP)
+
+#define _GEN_DFP_QUAD_300(name, op1, op2, mask)             \
+GEN_HANDLER_E(name, 0x3F, op1, op2, mask, PPC_NONE, PPC2_ISA300)
+
+#define _GEN_DFP_QUADx2(name, op1, op2, mask) \
+GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP)
+
+#define _GEN_DFP_QUADx4(name, op1, op2, mask)                         \
+GEN_HANDLER_E(name, 0x3F, op1, 0x00 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3F, op1, 0x08 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3F, op1, 0x10 | op2, mask, PPC_NONE, PPC2_DFP), \
+GEN_HANDLER_E(name, 0x3F, op1, 0x18 | op2, mask, PPC_NONE, PPC2_DFP)
+
+#define GEN_DFP_T_A_B_Rc(name, op1, op2) \
+_GEN_DFP_LONG(name, op1, op2, 0x00000000)
+
+#define GEN_DFP_Tp_Ap_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x00210800)
+
+#define GEN_DFP_Tp_A_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x00200800)
+
+#define GEN_DFP_T_B_Rc(name, op1, op2) \
+_GEN_DFP_LONG(name, op1, op2, 0x001F0000)
+
+#define GEN_DFP_Tp_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x003F0800)
+
+#define GEN_DFP_Tp_B_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x003F0000)
+
+#define GEN_DFP_T_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x001F0800)
+
+#define GEN_DFP_BF_A_B(name, op1, op2) \
+_GEN_DFP_LONG(name, op1, op2, 0x00000001)
+
+#define GEN_DFP_BF_A_B_300(name, op1, op2)          \
+_GEN_DFP_LONG_300(name, op1, op2, 0x00400001)
+
+#define GEN_DFP_BF_Ap_Bp(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x00610801)
+
+#define GEN_DFP_BF_A_Bp(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x00600801)
+
+#define GEN_DFP_BF_A_Bp_300(name, op1, op2)     \
+_GEN_DFP_QUAD_300(name, op1, op2, 0x00400001)
+
+#define GEN_DFP_BF_A_DCM(name, op1, op2) \
+_GEN_DFP_LONGx2(name, op1, op2, 0x00600001)
+
+#define GEN_DFP_BF_Ap_DCM(name, op1, op2) \
+_GEN_DFP_QUADx2(name, op1, op2, 0x00610001)
+
+#define GEN_DFP_T_A_B_RMC_Rc(name, op1, op2) \
+_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
+
+#define GEN_DFP_Tp_Ap_Bp_RMC_Rc(name, op1, op2) \
+_GEN_DFP_QUADx4(name, op1, op2, 0x02010800)
+
+#define GEN_DFP_Tp_A_Bp_RMC_Rc(name, op1, op2) \
+_GEN_DFP_QUADx4(name, op1, op2, 0x02000800)
+
+#define GEN_DFP_TE_T_B_RMC_Rc(name, op1, op2) \
+_GEN_DFP_LONGx4(name, op1, op2, 0x00000000)
+
+#define GEN_DFP_TE_Tp_Bp_RMC_Rc(name, op1, op2) \
+_GEN_DFP_QUADx4(name, op1, op2, 0x00200800)
+
+#define GEN_DFP_R_T_B_RMC_Rc(name, op1, op2) \
+_GEN_DFP_LONGx4(name, op1, op2, 0x001E0000)
+
+#define GEN_DFP_R_Tp_Bp_RMC_Rc(name, op1, op2) \
+_GEN_DFP_QUADx4(name, op1, op2, 0x003E0800)
+
+#define GEN_DFP_SP_T_B_Rc(name, op1, op2) \
+_GEN_DFP_LONG(name, op1, op2, 0x00070000)
+
+#define GEN_DFP_SP_Tp_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x00270800)
+
+#define GEN_DFP_S_T_B_Rc(name, op1, op2) \
+_GEN_DFP_LONG(name, op1, op2, 0x000F0000)
+
+#define GEN_DFP_S_Tp_Bp_Rc(name, op1, op2) \
+_GEN_DFP_QUAD(name, op1, op2, 0x002F0800)
+
+#define GEN_DFP_T_A_SH_Rc(name, op1, op2) \
+_GEN_DFP_LONGx2(name, op1, op2, 0x00000000)
+
+#define GEN_DFP_Tp_Ap_SH_Rc(name, op1, op2) \
+_GEN_DFP_QUADx2(name, op1, op2, 0x00210000)
+
+GEN_DFP_T_A_B_Rc(dadd, 0x02, 0x00),
+GEN_DFP_Tp_Ap_Bp_Rc(daddq, 0x02, 0x00),
+GEN_DFP_T_A_B_Rc(dsub, 0x02, 0x10),
+GEN_DFP_Tp_Ap_Bp_Rc(dsubq, 0x02, 0x10),
+GEN_DFP_T_A_B_Rc(dmul, 0x02, 0x01),
+GEN_DFP_Tp_Ap_Bp_Rc(dmulq, 0x02, 0x01),
+GEN_DFP_T_A_B_Rc(ddiv, 0x02, 0x11),
+GEN_DFP_Tp_Ap_Bp_Rc(ddivq, 0x02, 0x11),
+GEN_DFP_BF_A_B(dcmpu, 0x02, 0x14),
+GEN_DFP_BF_Ap_Bp(dcmpuq, 0x02, 0x14),
+GEN_DFP_BF_A_B(dcmpo, 0x02, 0x04),
+GEN_DFP_BF_Ap_Bp(dcmpoq, 0x02, 0x04),
+GEN_DFP_BF_A_DCM(dtstdc, 0x02, 0x06),
+GEN_DFP_BF_Ap_DCM(dtstdcq, 0x02, 0x06),
+GEN_DFP_BF_A_DCM(dtstdg, 0x02, 0x07),
+GEN_DFP_BF_Ap_DCM(dtstdgq, 0x02, 0x07),
+GEN_DFP_BF_A_B(dtstex, 0x02, 0x05),
+GEN_DFP_BF_Ap_Bp(dtstexq, 0x02, 0x05),
+GEN_DFP_BF_A_B(dtstsf, 0x02, 0x15),
+GEN_DFP_BF_A_Bp(dtstsfq, 0x02, 0x15),
+GEN_DFP_BF_A_B_300(dtstsfi, 0x03, 0x15),
+GEN_DFP_BF_A_Bp_300(dtstsfiq, 0x03, 0x15),
+GEN_DFP_TE_T_B_RMC_Rc(dquai, 0x03, 0x02),
+GEN_DFP_TE_Tp_Bp_RMC_Rc(dquaiq, 0x03, 0x02),
+GEN_DFP_T_A_B_RMC_Rc(dqua, 0x03, 0x00),
+GEN_DFP_Tp_Ap_Bp_RMC_Rc(dquaq, 0x03, 0x00),
+GEN_DFP_T_A_B_RMC_Rc(drrnd, 0x03, 0x01),
+GEN_DFP_Tp_A_Bp_RMC_Rc(drrndq, 0x03, 0x01),
+GEN_DFP_R_T_B_RMC_Rc(drintx, 0x03, 0x03),
+GEN_DFP_R_Tp_Bp_RMC_Rc(drintxq, 0x03, 0x03),
+GEN_DFP_R_T_B_RMC_Rc(drintn, 0x03, 0x07),
+GEN_DFP_R_Tp_Bp_RMC_Rc(drintnq, 0x03, 0x07),
+GEN_DFP_T_B_Rc(dctdp, 0x02, 0x08),
+GEN_DFP_Tp_B_Rc(dctqpq, 0x02, 0x08),
+GEN_DFP_T_B_Rc(drsp, 0x02, 0x18),
+GEN_DFP_Tp_Bp_Rc(drdpq, 0x02, 0x18),
+GEN_DFP_T_B_Rc(dcffix, 0x02, 0x19),
+GEN_DFP_Tp_B_Rc(dcffixq, 0x02, 0x19),
+GEN_DFP_T_B_Rc(dctfix, 0x02, 0x09),
+GEN_DFP_T_Bp_Rc(dctfixq, 0x02, 0x09),
+GEN_DFP_SP_T_B_Rc(ddedpd, 0x02, 0x0a),
+GEN_DFP_SP_Tp_Bp_Rc(ddedpdq, 0x02, 0x0a),
+GEN_DFP_S_T_B_Rc(denbcd, 0x02, 0x1a),
+GEN_DFP_S_Tp_Bp_Rc(denbcdq, 0x02, 0x1a),
+GEN_DFP_T_B_Rc(dxex, 0x02, 0x0b),
+GEN_DFP_T_Bp_Rc(dxexq, 0x02, 0x0b),
+GEN_DFP_T_A_B_Rc(diex, 0x02, 0x1b),
+GEN_DFP_Tp_A_Bp_Rc(diexq, 0x02, 0x1b),
+GEN_DFP_T_A_SH_Rc(dscli, 0x02, 0x02),
+GEN_DFP_Tp_Ap_SH_Rc(dscliq, 0x02, 0x02),
+GEN_DFP_T_A_SH_Rc(dscri, 0x02, 0x03),
+GEN_DFP_Tp_Ap_SH_Rc(dscriq, 0x02, 0x03),
diff --git a/target-ppc/translate/fp-impl.inc.c b/target-ppc/translate/fp-impl.inc.c
new file mode 100644
index 0000000000..872af7b56f
--- /dev/null
+++ b/target-ppc/translate/fp-impl.inc.c
@@ -0,0 +1,1070 @@
+/*
+ * translate-fp.c
+ *
+ * Standard FPU translation
+ */
+
+static inline void gen_reset_fpstatus(void)
+{
+    gen_helper_reset_fpstatus(cpu_env);
+}
+
+static inline void gen_compute_fprf(TCGv_i64 arg)
+{
+    gen_helper_compute_fprf(cpu_env, arg);
+    gen_helper_float_check_status(cpu_env);
+}
+
+#if defined(TARGET_PPC64)
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+    TCGv_i32 tmp = tcg_temp_new_i32();
+    tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
+    tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
+    tcg_temp_free_i32(tmp);
+}
+#else
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+    tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
+}
+#endif
+
+/***                       Floating-Point arithmetic                       ***/
+#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
+static void gen_f##name(DisasContext *ctx)                                    \
+{                                                                             \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_reset_fpstatus();                                                     \
+    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
+                     cpu_fpr[rA(ctx->opcode)],                                \
+                     cpu_fpr[rC(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);     \
+    if (isfloat) {                                                            \
+        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
+                        cpu_fpr[rD(ctx->opcode)]);                            \
+    }                                                                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
+}
+
+#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
+_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type);                     \
+_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type);
+
+#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
+static void gen_f##name(DisasContext *ctx)                                    \
+{                                                                             \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_reset_fpstatus();                                                     \
+    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
+                     cpu_fpr[rA(ctx->opcode)],                                \
+                     cpu_fpr[rB(ctx->opcode)]);                               \
+    if (isfloat) {                                                            \
+        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
+                        cpu_fpr[rD(ctx->opcode)]);                            \
+    }                                                                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
+}
+#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
+_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
+_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
+
+#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
+static void gen_f##name(DisasContext *ctx)                                    \
+{                                                                             \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_reset_fpstatus();                                                     \
+    gen_helper_f##op(cpu_fpr[rD(ctx->opcode)], cpu_env,                       \
+                     cpu_fpr[rA(ctx->opcode)],                                \
+                     cpu_fpr[rC(ctx->opcode)]);                               \
+    if (isfloat) {                                                            \
+        gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,                    \
+                        cpu_fpr[rD(ctx->opcode)]);                            \
+    }                                                                         \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
+}
+#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
+_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);               \
+_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type);
+
+#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                           \
+static void gen_f##name(DisasContext *ctx)                                    \
+{                                                                             \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_reset_fpstatus();                                                     \
+    gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
+                       cpu_fpr[rB(ctx->opcode)]);                             \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
+}
+
+#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
+static void gen_f##name(DisasContext *ctx)                                    \
+{                                                                             \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_reset_fpstatus();                                                     \
+    gen_helper_f##name(cpu_fpr[rD(ctx->opcode)], cpu_env,                     \
+                       cpu_fpr[rB(ctx->opcode)]);                             \
+    if (set_fprf) {                                                           \
+        gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);                           \
+    }                                                                         \
+    if (unlikely(Rc(ctx->opcode) != 0)) {                                     \
+        gen_set_cr1_from_fpscr(ctx);                                          \
+    }                                                                         \
+}
+
+/* fadd - fadds */
+GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT);
+/* fdiv - fdivs */
+GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT);
+/* fmul - fmuls */
+GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT);
+
+/* fre */
+GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT);
+
+/* fres */
+GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES);
+
+/* frsqrte */
+GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE);
+
+/* frsqrtes */
+static void gen_frsqrtes(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    gen_helper_frsqrte(cpu_fpr[rD(ctx->opcode)], cpu_env,
+                       cpu_fpr[rB(ctx->opcode)]);
+    gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
+                    cpu_fpr[rD(ctx->opcode)]);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* fsel */
+_GEN_FLOAT_ACB(sel, sel, 0x3F, 0x17, 0, 0, PPC_FLOAT_FSEL);
+/* fsub - fsubs */
+GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT);
+/* Optional: */
+
+/* fsqrt */
+static void gen_fsqrt(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env,
+                     cpu_fpr[rB(ctx->opcode)]);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+static void gen_fsqrts(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    gen_helper_fsqrt(cpu_fpr[rD(ctx->opcode)], cpu_env,
+                     cpu_fpr[rB(ctx->opcode)]);
+    gen_helper_frsp(cpu_fpr[rD(ctx->opcode)], cpu_env,
+                    cpu_fpr[rD(ctx->opcode)]);
+    gen_compute_fprf(cpu_fpr[rD(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/***                     Floating-Point multiply-and-add                   ***/
+/* fmadd - fmadds */
+GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT);
+/* fmsub - fmsubs */
+GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT);
+/* fnmadd - fnmadds */
+GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT);
+/* fnmsub - fnmsubs */
+GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT);
+
+/***                     Floating-Point round & convert                    ***/
+/* fctiw */
+GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT);
+/* fctiwu */
+GEN_FLOAT_B(ctiwu, 0x0E, 0x04, 0, PPC2_FP_CVT_ISA206);
+/* fctiwz */
+GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
+/* fctiwuz */
+GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
+/* frsp */
+GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
+/* fcfid */
+GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC2_FP_CVT_S64);
+/* fcfids */
+GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
+/* fcfidu */
+GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
+/* fcfidus */
+GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
+/* fctid */
+GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64);
+/* fctidu */
+GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
+/* fctidz */
+GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64);
+/* fctidu */
+GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
+
+/* frin */
+GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT);
+/* friz */
+GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT);
+/* frip */
+GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT);
+/* frim */
+GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
+
+static void gen_ftdiv(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+                     cpu_fpr[rB(ctx->opcode)]);
+}
+
+static void gen_ftsqrt(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+}
+
+
+
+/***                         Floating-Point compare                        ***/
+
+/* fcmpo */
+static void gen_fcmpo(DisasContext *ctx)
+{
+    TCGv_i32 crf;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    crf = tcg_const_i32(crfD(ctx->opcode));
+    gen_helper_fcmpo(cpu_env, cpu_fpr[rA(ctx->opcode)],
+                     cpu_fpr[rB(ctx->opcode)], crf);
+    tcg_temp_free_i32(crf);
+    gen_helper_float_check_status(cpu_env);
+}
+
+/* fcmpu */
+static void gen_fcmpu(DisasContext *ctx)
+{
+    TCGv_i32 crf;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    crf = tcg_const_i32(crfD(ctx->opcode));
+    gen_helper_fcmpu(cpu_env, cpu_fpr[rA(ctx->opcode)],
+                     cpu_fpr[rB(ctx->opcode)], crf);
+    tcg_temp_free_i32(crf);
+    gen_helper_float_check_status(cpu_env);
+}
+
+/***                         Floating-point move                           ***/
+/* fabs */
+/* XXX: beware that fabs never checks for NaNs nor update FPSCR */
+static void gen_fabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_andi_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
+                     ~(1ULL << 63));
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* fmr  - fmr. */
+/* XXX: beware that fmr never checks for NaNs nor update FPSCR */
+static void gen_fmr(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_mov_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* fnabs */
+/* XXX: beware that fnabs never checks for NaNs nor update FPSCR */
+static void gen_fnabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_ori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
+                    1ULL << 63);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* fneg */
+/* XXX: beware that fneg never checks for NaNs nor update FPSCR */
+static void gen_fneg(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_xori_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)],
+                     1ULL << 63);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* fcpsgn: PowerPC 2.05 specification */
+/* XXX: beware that fcpsgn never checks for NaNs nor update FPSCR */
+static void gen_fcpsgn(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+                        cpu_fpr[rB(ctx->opcode)], 0, 63);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+static void gen_fmrgew(DisasContext *ctx)
+{
+    TCGv_i64 b0;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    b0 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(b0, cpu_fpr[rB(ctx->opcode)], 32);
+    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+                        b0, 0, 32);
+    tcg_temp_free_i64(b0);
+}
+
+static void gen_fmrgow(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    tcg_gen_deposit_i64(cpu_fpr[rD(ctx->opcode)],
+                        cpu_fpr[rB(ctx->opcode)],
+                        cpu_fpr[rA(ctx->opcode)],
+                        32, 32);
+}
+
+/***                  Floating-Point status & ctrl register                ***/
+
+/* mcrfs */
+static void gen_mcrfs(DisasContext *ctx)
+{
+    TCGv tmp = tcg_temp_new();
+    TCGv_i32 tmask;
+    TCGv_i64 tnew_fpscr = tcg_temp_new_i64();
+    int bfa;
+    int nibble;
+    int shift;
+
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    bfa = crfS(ctx->opcode);
+    nibble = 7 - bfa;
+    shift = 4 * nibble;
+    tcg_gen_shri_tl(tmp, cpu_fpscr, shift);
+    tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx->opcode)], tmp);
+    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 0xf);
+    tcg_temp_free(tmp);
+    tcg_gen_extu_tl_i64(tnew_fpscr, cpu_fpscr);
+    /* Only the exception bits (including FX) should be cleared if read */
+    tcg_gen_andi_i64(tnew_fpscr, tnew_fpscr, ~((0xF << shift) & FP_EX_CLEAR_BITS));
+    /* FEX and VX need to be updated, so don't set fpscr directly */
+    tmask = tcg_const_i32(1 << nibble);
+    gen_helper_store_fpscr(cpu_env, tnew_fpscr, tmask);
+    tcg_temp_free_i32(tmask);
+    tcg_temp_free_i64(tnew_fpscr);
+}
+
+/* mffs */
+static void gen_mffs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_reset_fpstatus();
+    tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx->opcode)], cpu_fpscr);
+    if (unlikely(Rc(ctx->opcode))) {
+        gen_set_cr1_from_fpscr(ctx);
+    }
+}
+
+/* mtfsb0 */
+static void gen_mtfsb0(DisasContext *ctx)
+{
+    uint8_t crb;
+
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    crb = 31 - crbD(ctx->opcode);
+    gen_reset_fpstatus();
+    if (likely(crb != FPSCR_FEX && crb != FPSCR_VX)) {
+        TCGv_i32 t0;
+        t0 = tcg_const_i32(crb);
+        gen_helper_fpscr_clrbit(cpu_env, t0);
+        tcg_temp_free_i32(t0);
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
+        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+    }
+}
+
+/* mtfsb1 */
+static void gen_mtfsb1(DisasContext *ctx)
+{
+    uint8_t crb;
+
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    crb = 31 - crbD(ctx->opcode);
+    gen_reset_fpstatus();
+    /* XXX: we pretend we can only do IEEE floating-point computations */
+    if (likely(crb != FPSCR_FEX && crb != FPSCR_VX && crb != FPSCR_NI)) {
+        TCGv_i32 t0;
+        t0 = tcg_const_i32(crb);
+        gen_helper_fpscr_setbit(cpu_env, t0);
+        tcg_temp_free_i32(t0);
+    }
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
+        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+    }
+    /* We can raise a differed exception */
+    gen_helper_float_check_status(cpu_env);
+}
+
+/* mtfsf */
+static void gen_mtfsf(DisasContext *ctx)
+{
+    TCGv_i32 t0;
+    int flm, l, w;
+
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    flm = FPFLM(ctx->opcode);
+    l = FPL(ctx->opcode);
+    w = FPW(ctx->opcode);
+    if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) {
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        return;
+    }
+    gen_reset_fpstatus();
+    if (l) {
+        t0 = tcg_const_i32((ctx->insns_flags2 & PPC2_ISA205) ? 0xffff : 0xff);
+    } else {
+        t0 = tcg_const_i32(flm << (w * 8));
+    }
+    gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx->opcode)], t0);
+    tcg_temp_free_i32(t0);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
+        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+    }
+    /* We can raise a differed exception */
+    gen_helper_float_check_status(cpu_env);
+}
+
+/* mtfsfi */
+static void gen_mtfsfi(DisasContext *ctx)
+{
+    int bf, sh, w;
+    TCGv_i64 t0;
+    TCGv_i32 t1;
+
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    w = FPW(ctx->opcode);
+    bf = FPBF(ctx->opcode);
+    if (unlikely(w & !(ctx->insns_flags2 & PPC2_ISA205))) {
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+        return;
+    }
+    sh = (8 * w) + 7 - bf;
+    gen_reset_fpstatus();
+    t0 = tcg_const_i64(((uint64_t)FPIMM(ctx->opcode)) << (4 * sh));
+    t1 = tcg_const_i32(1 << sh);
+    gen_helper_store_fpscr(cpu_env, t0, t1);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i32(t1);
+    if (unlikely(Rc(ctx->opcode) != 0)) {
+        tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
+        tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
+    }
+    /* We can raise a differed exception */
+    gen_helper_float_check_status(cpu_env);
+}
+
+/***                         Floating-point load                           ***/
+#define GEN_LDF(name, ldop, opc, type)                                        \
+static void glue(gen_, name)(DisasContext *ctx)                                       \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_imm_index(ctx, EA, 0);                                           \
+    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_LDUF(name, ldop, opc, type)                                       \
+static void glue(gen_, name##u)(DisasContext *ctx)                                    \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_imm_index(ctx, EA, 0);                                           \
+    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
+    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_LDUXF(name, ldop, opc, type)                                      \
+static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
+    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
+static void glue(gen_, name##x)(DisasContext *ctx)                                    \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    gen_qemu_##ldop(ctx, cpu_fpr[rD(ctx->opcode)], EA);                       \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_LDFS(name, ldop, op, type)                                        \
+GEN_LDF(name, ldop, op | 0x20, type);                                         \
+GEN_LDUF(name, ldop, op | 0x21, type);                                        \
+GEN_LDUXF(name, ldop, op | 0x01, type);                                       \
+GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
+
+static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+{
+    TCGv t0 = tcg_temp_new();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    gen_qemu_ld32u(ctx, t0, arg2);
+    tcg_gen_trunc_tl_i32(t1, t0);
+    tcg_temp_free(t0);
+    gen_helper_float32_to_float64(arg1, cpu_env, t1);
+    tcg_temp_free_i32(t1);
+}
+
+ /* lfd lfdu lfdux lfdx */
+GEN_LDFS(lfd, ld64_i64, 0x12, PPC_FLOAT);
+ /* lfs lfsu lfsux lfsx */
+GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT);
+
+/* lfdp */
+static void gen_lfdp(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_imm_index(ctx, EA, 0);
+    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
+       necessary 64-bit byteswap already. */
+    if (unlikely(ctx->le_mode)) {
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    } else {
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+    }
+    tcg_temp_free(EA);
+}
+
+/* lfdpx */
+static void gen_lfdpx(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
+       necessary 64-bit byteswap already. */
+    if (unlikely(ctx->le_mode)) {
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    } else {
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_ld64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+    }
+    tcg_temp_free(EA);
+}
+
+/* lfiwax */
+static void gen_lfiwax(DisasContext *ctx)
+{
+    TCGv EA;
+    TCGv t0;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_ld32s(ctx, t0, EA);
+    tcg_gen_ext_tl_i64(cpu_fpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(EA);
+    tcg_temp_free(t0);
+}
+
+/* lfiwzx */
+static void gen_lfiwzx(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_ld32u_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    tcg_temp_free(EA);
+}
+/***                         Floating-point store                          ***/
+#define GEN_STF(name, stop, opc, type)                                        \
+static void glue(gen_, name)(DisasContext *ctx)                                       \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_imm_index(ctx, EA, 0);                                           \
+    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_STUF(name, stop, opc, type)                                       \
+static void glue(gen_, name##u)(DisasContext *ctx)                                    \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_imm_index(ctx, EA, 0);                                           \
+    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
+    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_STUXF(name, stop, opc, type)                                      \
+static void glue(gen_, name##ux)(DisasContext *ctx)                                   \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    if (unlikely(rA(ctx->opcode) == 0)) {                                     \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);                   \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
+    tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], EA);                             \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_STXF(name, stop, opc2, opc3, type)                                \
+static void glue(gen_, name##x)(DisasContext *ctx)                                    \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->fpu_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_FPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_FLOAT);                                   \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    gen_qemu_##stop(ctx, cpu_fpr[rS(ctx->opcode)], EA);                       \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_STFS(name, stop, op, type)                                        \
+GEN_STF(name, stop, op | 0x20, type);                                         \
+GEN_STUF(name, stop, op | 0x21, type);                                        \
+GEN_STUXF(name, stop, op | 0x01, type);                                       \
+GEN_STXF(name, stop, 0x17, op | 0x00, type)
+
+static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv t1 = tcg_temp_new();
+    gen_helper_float64_to_float32(t0, cpu_env, arg1);
+    tcg_gen_extu_i32_tl(t1, t0);
+    tcg_temp_free_i32(t0);
+    gen_qemu_st32(ctx, t1, arg2);
+    tcg_temp_free(t1);
+}
+
+/* stfd stfdu stfdux stfdx */
+GEN_STFS(stfd, st64_i64, 0x16, PPC_FLOAT);
+/* stfs stfsu stfsux stfsx */
+GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT);
+
+/* stfdp */
+static void gen_stfdp(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_imm_index(ctx, EA, 0);
+    /* We only need to swap high and low halves. gen_qemu_st64_i64 does
+       necessary 64-bit byteswap already. */
+    if (unlikely(ctx->le_mode)) {
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    } else {
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+    }
+    tcg_temp_free(EA);
+}
+
+/* stfdpx */
+static void gen_stfdpx(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->fpu_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_FPU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    /* We only need to swap high and low halves. gen_qemu_st64_i64 does
+       necessary 64-bit byteswap already. */
+    if (unlikely(ctx->le_mode)) {
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+    } else {
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode)], EA);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_qemu_st64_i64(ctx, cpu_fpr[rD(ctx->opcode) + 1], EA);
+    }
+    tcg_temp_free(EA);
+}
+
+/* Optional: */
+static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_trunc_i64_tl(t0, arg1),
+    gen_qemu_st32(ctx, t0, arg2);
+    tcg_temp_free(t0);
+}
+/* stfiwx */
+GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX);
+
+/* POWER2 specific instructions */
+/* Quad manipulation (load/store two floats at a time) */
+
+/* lfq */
+static void gen_lfq(DisasContext *ctx)
+{
+    int rd = rD(ctx->opcode);
+    TCGv t0;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_imm_index(ctx, t0, 0);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[rd], t0);
+    gen_addr_add(ctx, t0, t0, 8);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[(rd + 1) % 32], t0);
+    tcg_temp_free(t0);
+}
+
+/* lfqu */
+static void gen_lfqu(DisasContext *ctx)
+{
+    int ra = rA(ctx->opcode);
+    int rd = rD(ctx->opcode);
+    TCGv t0, t1;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_addr_imm_index(ctx, t0, 0);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[rd], t0);
+    gen_addr_add(ctx, t1, t0, 8);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[(rd + 1) % 32], t1);
+    if (ra != 0)
+        tcg_gen_mov_tl(cpu_gpr[ra], t0);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* lfqux */
+static void gen_lfqux(DisasContext *ctx)
+{
+    int ra = rA(ctx->opcode);
+    int rd = rD(ctx->opcode);
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    TCGv t0, t1;
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[rd], t0);
+    t1 = tcg_temp_new();
+    gen_addr_add(ctx, t1, t0, 8);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[(rd + 1) % 32], t1);
+    tcg_temp_free(t1);
+    if (ra != 0)
+        tcg_gen_mov_tl(cpu_gpr[ra], t0);
+    tcg_temp_free(t0);
+}
+
+/* lfqx */
+static void gen_lfqx(DisasContext *ctx)
+{
+    int rd = rD(ctx->opcode);
+    TCGv t0;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[rd], t0);
+    gen_addr_add(ctx, t0, t0, 8);
+    gen_qemu_ld64_i64(ctx, cpu_fpr[(rd + 1) % 32], t0);
+    tcg_temp_free(t0);
+}
+
+/* stfq */
+static void gen_stfq(DisasContext *ctx)
+{
+    int rd = rD(ctx->opcode);
+    TCGv t0;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_imm_index(ctx, t0, 0);
+    gen_qemu_st64_i64(ctx, cpu_fpr[rd], t0);
+    gen_addr_add(ctx, t0, t0, 8);
+    gen_qemu_st64_i64(ctx, cpu_fpr[(rd + 1) % 32], t0);
+    tcg_temp_free(t0);
+}
+
+/* stfqu */
+static void gen_stfqu(DisasContext *ctx)
+{
+    int ra = rA(ctx->opcode);
+    int rd = rD(ctx->opcode);
+    TCGv t0, t1;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_imm_index(ctx, t0, 0);
+    gen_qemu_st64_i64(ctx, cpu_fpr[rd], t0);
+    t1 = tcg_temp_new();
+    gen_addr_add(ctx, t1, t0, 8);
+    gen_qemu_st64_i64(ctx, cpu_fpr[(rd + 1) % 32], t1);
+    tcg_temp_free(t1);
+    if (ra != 0)
+        tcg_gen_mov_tl(cpu_gpr[ra], t0);
+    tcg_temp_free(t0);
+}
+
+/* stfqux */
+static void gen_stfqux(DisasContext *ctx)
+{
+    int ra = rA(ctx->opcode);
+    int rd = rD(ctx->opcode);
+    TCGv t0, t1;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
+    gen_qemu_st64_i64(ctx, cpu_fpr[rd], t0);
+    t1 = tcg_temp_new();
+    gen_addr_add(ctx, t1, t0, 8);
+    gen_qemu_st64_i64(ctx, cpu_fpr[(rd + 1) % 32], t1);
+    tcg_temp_free(t1);
+    if (ra != 0)
+        tcg_gen_mov_tl(cpu_gpr[ra], t0);
+    tcg_temp_free(t0);
+}
+
+/* stfqx */
+static void gen_stfqx(DisasContext *ctx)
+{
+    int rd = rD(ctx->opcode);
+    TCGv t0;
+    gen_set_access_type(ctx, ACCESS_FLOAT);
+    t0 = tcg_temp_new();
+    gen_addr_reg_index(ctx, t0);
+    gen_qemu_st64_i64(ctx, cpu_fpr[rd], t0);
+    gen_addr_add(ctx, t0, t0, 8);
+    gen_qemu_st64_i64(ctx, cpu_fpr[(rd + 1) % 32], t0);
+    tcg_temp_free(t0);
+}
+
+#undef _GEN_FLOAT_ACB
+#undef GEN_FLOAT_ACB
+#undef _GEN_FLOAT_AB
+#undef GEN_FLOAT_AB
+#undef _GEN_FLOAT_AC
+#undef GEN_FLOAT_AC
+#undef GEN_FLOAT_B
+#undef GEN_FLOAT_BS
+
+#undef GEN_LDF
+#undef GEN_LDUF
+#undef GEN_LDUXF
+#undef GEN_LDXF
+#undef GEN_LDFS
+
+#undef GEN_STF
+#undef GEN_STUF
+#undef GEN_STUXF
+#undef GEN_STXF
+#undef GEN_STFS
diff --git a/target-ppc/translate/fp-ops.inc.c b/target-ppc/translate/fp-ops.inc.c
new file mode 100644
index 0000000000..d36ab4ecc3
--- /dev/null
+++ b/target-ppc/translate/fp-ops.inc.c
@@ -0,0 +1,111 @@
+#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)           \
+GEN_HANDLER(f##name, op1, op2, 0xFF, 0x00000000, type)
+#define GEN_FLOAT_ACB(name, op2, set_fprf, type)                              \
+_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type),                     \
+_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type)
+#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
+GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type)
+#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)                        \
+_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type),               \
+_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type)
+#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type)     \
+GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type)
+#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)                        \
+_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type),               \
+_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type)
+#define GEN_FLOAT_B(name, op2, op3, set_fprf, type)                           \
+GEN_HANDLER(f##name, 0x3F, op2, op3, 0x001F0000, type)
+#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type)                          \
+GEN_HANDLER(f##name, op1, op2, 0xFF, 0x001F07C0, type)
+
+GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT),
+GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT),
+GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT),
+GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT),
+GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES),
+GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE),
+_GEN_FLOAT_ACB(sel, sel, 0x3F, 0x17, 0, 0, PPC_FLOAT_FSEL),
+GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT),
+GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT),
+GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT),
+GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT),
+GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT),
+GEN_HANDLER_E(ftdiv, 0x3F, 0x00, 0x04, 1, PPC_NONE, PPC2_FP_TST_ISA206),
+GEN_HANDLER_E(ftsqrt, 0x3F, 0x00, 0x05, 1, PPC_NONE, PPC2_FP_TST_ISA206),
+GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT),
+GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
+GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
+GEN_HANDLER_E(fcfid, 0x3F, 0x0E, 0x1A, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
+GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
+GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(fctidz, 0x3F, 0x0F, 0x19, 0x001F0000, PPC_NONE, PPC2_FP_CVT_S64),
+GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT),
+GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT),
+GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT),
+GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT),
+
+#define GEN_LDF(name, ldop, opc, type)                                        \
+GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
+#define GEN_LDUF(name, ldop, opc, type)                                       \
+GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
+#define GEN_LDUXF(name, ldop, opc, type)                                      \
+GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
+#define GEN_LDXF(name, ldop, opc2, opc3, type)                                \
+GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
+#define GEN_LDFS(name, ldop, op, type)                                        \
+GEN_LDF(name, ldop, op | 0x20, type)                                          \
+GEN_LDUF(name, ldop, op | 0x21, type)                                         \
+GEN_LDUXF(name, ldop, op | 0x01, type)                                        \
+GEN_LDXF(name, ldop, 0x17, op | 0x00, type)
+
+GEN_LDFS(lfd, ld64, 0x12, PPC_FLOAT)
+GEN_LDFS(lfs, ld32fs, 0x10, PPC_FLOAT)
+GEN_HANDLER_E(lfiwax, 0x1f, 0x17, 0x1a, 0x00000001, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(lfiwzx, 0x1f, 0x17, 0x1b, 0x1, PPC_NONE, PPC2_FP_CVT_ISA206),
+GEN_HANDLER_E(lfdp, 0x39, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(lfdpx, 0x1F, 0x17, 0x18, 0x00200001, PPC_NONE, PPC2_ISA205),
+
+#define GEN_STF(name, stop, opc, type)                                        \
+GEN_HANDLER(name, opc, 0xFF, 0xFF, 0x00000000, type),
+#define GEN_STUF(name, stop, opc, type)                                       \
+GEN_HANDLER(name##u, opc, 0xFF, 0xFF, 0x00000000, type),
+#define GEN_STUXF(name, stop, opc, type)                                      \
+GEN_HANDLER(name##ux, 0x1F, 0x17, opc, 0x00000001, type),
+#define GEN_STXF(name, stop, opc2, opc3, type)                                \
+GEN_HANDLER(name##x, 0x1F, opc2, opc3, 0x00000001, type),
+#define GEN_STFS(name, stop, op, type)                                        \
+GEN_STF(name, stop, op | 0x20, type)                                          \
+GEN_STUF(name, stop, op | 0x21, type)                                         \
+GEN_STUXF(name, stop, op | 0x01, type)                                        \
+GEN_STXF(name, stop, 0x17, op | 0x00, type)
+
+GEN_STFS(stfd, st64_i64, 0x16, PPC_FLOAT)
+GEN_STFS(stfs, st32fs, 0x14, PPC_FLOAT)
+GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX)
+GEN_HANDLER_E(stfdp, 0x3D, 0xFF, 0xFF, 0x00200003, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205),
+
+GEN_HANDLER(frsqrtes, 0x3B, 0x1A, 0xFF, 0x001F07C0, PPC_FLOAT_FRSQRTES),
+GEN_HANDLER(fsqrt, 0x3F, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
+GEN_HANDLER(fsqrts, 0x3B, 0x16, 0xFF, 0x001F07C0, PPC_FLOAT_FSQRT),
+GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT),
+GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT),
+GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT),
+GEN_HANDLER(fmr, 0x3F, 0x08, 0x02, 0x001F0000, PPC_FLOAT),
+GEN_HANDLER(fnabs, 0x3F, 0x08, 0x04, 0x001F0000, PPC_FLOAT),
+GEN_HANDLER(fneg, 0x3F, 0x08, 0x01, 0x001F0000, PPC_FLOAT),
+GEN_HANDLER_E(fcpsgn, 0x3F, 0x08, 0x00, 0x00000000, PPC_NONE, PPC2_ISA205),
+GEN_HANDLER_E(fmrgew, 0x3F, 0x06, 0x1E, 0x00000001, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(fmrgow, 0x3F, 0x06, 0x1A, 0x00000001, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER(mcrfs, 0x3F, 0x00, 0x02, 0x0063F801, PPC_FLOAT),
+GEN_HANDLER(mffs, 0x3F, 0x07, 0x12, 0x001FF800, PPC_FLOAT),
+GEN_HANDLER(mtfsb0, 0x3F, 0x06, 0x02, 0x001FF800, PPC_FLOAT),
+GEN_HANDLER(mtfsb1, 0x3F, 0x06, 0x01, 0x001FF800, PPC_FLOAT),
+GEN_HANDLER(mtfsf, 0x3F, 0x07, 0x16, 0x00000000, PPC_FLOAT),
+GEN_HANDLER(mtfsfi, 0x3F, 0x06, 0x04, 0x006e0800, PPC_FLOAT),
diff --git a/target-ppc/translate/spe-impl.inc.c b/target-ppc/translate/spe-impl.inc.c
new file mode 100644
index 0000000000..8c1c16c63e
--- /dev/null
+++ b/target-ppc/translate/spe-impl.inc.c
@@ -0,0 +1,1229 @@
+/*
+ * translate-spe.c
+ *
+ * Freescale SPE extension translation
+ */
+
+/***                           SPE extension                               ***/
+/* Register moves */
+
+static inline void gen_evmra(DisasContext *ctx)
+{
+
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    /* tmp := rA_lo + rA_hi << 32 */
+    tcg_gen_concat_tl_i64(tmp, cpu_gpr[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
+
+    /* spe_acc := tmp */
+    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
+    tcg_temp_free_i64(tmp);
+
+    /* rD := rA */
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
+}
+
+static inline void gen_load_gpr64(TCGv_i64 t, int reg)
+{
+    tcg_gen_concat_tl_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
+}
+
+static inline void gen_store_gpr64(int reg, TCGv_i64 t)
+{
+    tcg_gen_extr_i64_tl(cpu_gpr[reg], cpu_gprh[reg], t);
+}
+
+#define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type)         \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)                    \
+{                                                                             \
+    if (Rc(ctx->opcode))                                                      \
+        gen_##name1(ctx);                                                     \
+    else                                                                      \
+        gen_##name0(ctx);                                                     \
+}
+
+/* Handler for undefined SPE opcodes */
+static inline void gen_speundef(DisasContext *ctx)
+{
+    gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);
+}
+
+/* SPE logic */
+#define GEN_SPEOP_LOGIC2(name, tcg_op)                                        \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    tcg_op(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],                \
+           cpu_gpr[rB(ctx->opcode)]);                                         \
+    tcg_op(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],              \
+           cpu_gprh[rB(ctx->opcode)]);                                        \
+}
+
+GEN_SPEOP_LOGIC2(evand, tcg_gen_and_tl);
+GEN_SPEOP_LOGIC2(evandc, tcg_gen_andc_tl);
+GEN_SPEOP_LOGIC2(evxor, tcg_gen_xor_tl);
+GEN_SPEOP_LOGIC2(evor, tcg_gen_or_tl);
+GEN_SPEOP_LOGIC2(evnor, tcg_gen_nor_tl);
+GEN_SPEOP_LOGIC2(eveqv, tcg_gen_eqv_tl);
+GEN_SPEOP_LOGIC2(evorc, tcg_gen_orc_tl);
+GEN_SPEOP_LOGIC2(evnand, tcg_gen_nand_tl);
+
+/* SPE logic immediate */
+#define GEN_SPEOP_TCG_LOGIC_IMM2(name, tcg_opi)                               \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
+    tcg_opi(t0, t0, rB(ctx->opcode));                                         \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
+    tcg_opi(t0, t0, rB(ctx->opcode));                                         \
+    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+}
+GEN_SPEOP_TCG_LOGIC_IMM2(evslwi, tcg_gen_shli_i32);
+GEN_SPEOP_TCG_LOGIC_IMM2(evsrwiu, tcg_gen_shri_i32);
+GEN_SPEOP_TCG_LOGIC_IMM2(evsrwis, tcg_gen_sari_i32);
+GEN_SPEOP_TCG_LOGIC_IMM2(evrlwi, tcg_gen_rotli_i32);
+
+/* SPE arithmetic */
+#define GEN_SPEOP_ARITH1(name, tcg_op)                                        \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
+    tcg_op(t0, t0);                                                           \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
+    tcg_op(t0, t0);                                                           \
+    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+}
+
+static inline void gen_op_evabs(TCGv_i32 ret, TCGv_i32 arg1)
+{
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+
+    tcg_gen_brcondi_i32(TCG_COND_GE, arg1, 0, l1);
+    tcg_gen_neg_i32(ret, arg1);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_mov_i32(ret, arg1);
+    gen_set_label(l2);
+}
+GEN_SPEOP_ARITH1(evabs, gen_op_evabs);
+GEN_SPEOP_ARITH1(evneg, tcg_gen_neg_i32);
+GEN_SPEOP_ARITH1(evextsb, tcg_gen_ext8s_i32);
+GEN_SPEOP_ARITH1(evextsh, tcg_gen_ext16s_i32);
+static inline void gen_op_evrndw(TCGv_i32 ret, TCGv_i32 arg1)
+{
+    tcg_gen_addi_i32(ret, arg1, 0x8000);
+    tcg_gen_ext16u_i32(ret, ret);
+}
+GEN_SPEOP_ARITH1(evrndw, gen_op_evrndw);
+GEN_SPEOP_ARITH1(evcntlsw, gen_helper_cntlsw32);
+GEN_SPEOP_ARITH1(evcntlzw, gen_helper_cntlzw32);
+
+#define GEN_SPEOP_ARITH2(name, tcg_op)                                        \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0, t1;                                                          \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+    t1 = tcg_temp_new_i32();                                                  \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
+    tcg_op(t0, t0, t1);                                                       \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                      \
+    tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]);                      \
+    tcg_op(t0, t0, t1);                                                       \
+    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+    tcg_temp_free_i32(t1);                                                    \
+}
+
+static inline void gen_op_evsrwu(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+    TCGv_i32 t0 = tcg_temp_local_new_i32();
+
+    /* No error here: 6 bits are used */
+    tcg_gen_andi_i32(t0, arg2, 0x3F);
+    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
+    tcg_gen_shr_i32(ret, arg1, t0);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_movi_i32(ret, 0);
+    gen_set_label(l2);
+    tcg_temp_free_i32(t0);
+}
+GEN_SPEOP_ARITH2(evsrwu, gen_op_evsrwu);
+static inline void gen_op_evsrws(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+    TCGv_i32 t0 = tcg_temp_local_new_i32();
+
+    /* No error here: 6 bits are used */
+    tcg_gen_andi_i32(t0, arg2, 0x3F);
+    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
+    tcg_gen_sar_i32(ret, arg1, t0);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_movi_i32(ret, 0);
+    gen_set_label(l2);
+    tcg_temp_free_i32(t0);
+}
+GEN_SPEOP_ARITH2(evsrws, gen_op_evsrws);
+static inline void gen_op_evslw(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+    TCGv_i32 t0 = tcg_temp_local_new_i32();
+
+    /* No error here: 6 bits are used */
+    tcg_gen_andi_i32(t0, arg2, 0x3F);
+    tcg_gen_brcondi_i32(TCG_COND_GE, t0, 32, l1);
+    tcg_gen_shl_i32(ret, arg1, t0);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_movi_i32(ret, 0);
+    gen_set_label(l2);
+    tcg_temp_free_i32(t0);
+}
+GEN_SPEOP_ARITH2(evslw, gen_op_evslw);
+static inline void gen_op_evrlw(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(t0, arg2, 0x1F);
+    tcg_gen_rotl_i32(ret, arg1, t0);
+    tcg_temp_free_i32(t0);
+}
+GEN_SPEOP_ARITH2(evrlw, gen_op_evrlw);
+static inline void gen_evmergehi(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
+}
+GEN_SPEOP_ARITH2(evaddw, tcg_gen_add_i32);
+static inline void gen_op_evsubf(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    tcg_gen_sub_i32(ret, arg2, arg1);
+}
+GEN_SPEOP_ARITH2(evsubfw, gen_op_evsubf);
+
+/* SPE arithmetic immediate */
+#define GEN_SPEOP_ARITH_IMM2(name, tcg_op)                                    \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0;                                                              \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);                       \
+    tcg_op(t0, t0, rA(ctx->opcode));                                          \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rB(ctx->opcode)]);                      \
+    tcg_op(t0, t0, rA(ctx->opcode));                                          \
+    tcg_gen_extu_i32_tl(cpu_gprh[rD(ctx->opcode)], t0);                       \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+}
+GEN_SPEOP_ARITH_IMM2(evaddiw, tcg_gen_addi_i32);
+GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
+
+/* SPE comparison */
+#define GEN_SPEOP_COMP(name, tcg_cond)                                        \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    TCGLabel *l1 = gen_new_label();                                           \
+    TCGLabel *l2 = gen_new_label();                                           \
+    TCGLabel *l3 = gen_new_label();                                           \
+    TCGLabel *l4 = gen_new_label();                                           \
+                                                                              \
+    tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);    \
+    tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);    \
+    tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);  \
+    tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);  \
+                                                                              \
+    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                     \
+                       cpu_gpr[rB(ctx->opcode)], l1);                         \
+    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                          \
+    tcg_gen_br(l2);                                                           \
+    gen_set_label(l1);                                                        \
+    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                              \
+                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                  \
+    gen_set_label(l2);                                                        \
+    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                    \
+                       cpu_gprh[rB(ctx->opcode)], l3);                        \
+    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  \
+                     ~(CRF_CH | CRF_CH_AND_CL));                              \
+    tcg_gen_br(l4);                                                           \
+    gen_set_label(l3);                                                        \
+    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],   \
+                    CRF_CH | CRF_CH_OR_CL);                                   \
+    gen_set_label(l4);                                                        \
+}
+GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
+GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
+GEN_SPEOP_COMP(evcmpltu, TCG_COND_LTU);
+GEN_SPEOP_COMP(evcmplts, TCG_COND_LT);
+GEN_SPEOP_COMP(evcmpeq, TCG_COND_EQ);
+
+/* SPE misc */
+static inline void gen_brinc(DisasContext *ctx)
+{
+    /* Note: brinc is usable even if SPE is disabled */
+    gen_helper_brinc(cpu_gpr[rD(ctx->opcode)],
+                     cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+}
+static inline void gen_evmergelo(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+}
+static inline void gen_evmergehilo(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
+}
+static inline void gen_evmergelohi(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    if (rD(ctx->opcode) == rA(ctx->opcode)) {
+        TCGv tmp = tcg_temp_new();
+        tcg_gen_mov_tl(tmp, cpu_gpr[rA(ctx->opcode)]);
+        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+        tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], tmp);
+        tcg_temp_free(tmp);
+    } else {
+        tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+        tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    }
+}
+static inline void gen_evsplati(DisasContext *ctx)
+{
+    uint64_t imm = ((int32_t)(rA(ctx->opcode) << 27)) >> 27;
+
+    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
+    tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
+}
+static inline void gen_evsplatfi(DisasContext *ctx)
+{
+    uint64_t imm = rA(ctx->opcode) << 27;
+
+    tcg_gen_movi_tl(cpu_gpr[rD(ctx->opcode)], imm);
+    tcg_gen_movi_tl(cpu_gprh[rD(ctx->opcode)], imm);
+}
+
+static inline void gen_evsel(DisasContext *ctx)
+{
+    TCGLabel *l1 = gen_new_label();
+    TCGLabel *l2 = gen_new_label();
+    TCGLabel *l3 = gen_new_label();
+    TCGLabel *l4 = gen_new_label();
+    TCGv_i32 t0 = tcg_temp_local_new_i32();
+
+    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
+    tcg_gen_br(l2);
+    gen_set_label(l1);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+    gen_set_label(l2);
+    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_br(l4);
+    gen_set_label(l3);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    gen_set_label(l4);
+    tcg_temp_free_i32(t0);
+}
+
+static void gen_evsel0(DisasContext *ctx)
+{
+    gen_evsel(ctx);
+}
+
+static void gen_evsel1(DisasContext *ctx)
+{
+    gen_evsel(ctx);
+}
+
+static void gen_evsel2(DisasContext *ctx)
+{
+    gen_evsel(ctx);
+}
+
+static void gen_evsel3(DisasContext *ctx)
+{
+    gen_evsel(ctx);
+}
+
+/* Multiply */
+
+static inline void gen_evmwumi(DisasContext *ctx)
+{
+    TCGv_i64 t0, t1;
+
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    /* t0 := rA; t1 := rB */
+    tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ext32u_i64(t0, t0);
+    tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_ext32u_i64(t1, t1);
+
+    tcg_gen_mul_i64(t0, t0, t1);  /* t0 := rA * rB */
+
+    gen_store_gpr64(rD(ctx->opcode), t0); /* rD := t0 */
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static inline void gen_evmwumia(DisasContext *ctx)
+{
+    TCGv_i64 tmp;
+
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
+    gen_evmwumi(ctx);            /* rD := rA * rB */
+
+    tmp = tcg_temp_new_i64();
+
+    /* acc := rD */
+    gen_load_gpr64(tmp, rD(ctx->opcode));
+    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
+    tcg_temp_free_i64(tmp);
+}
+
+static inline void gen_evmwumiaa(DisasContext *ctx)
+{
+    TCGv_i64 acc;
+    TCGv_i64 tmp;
+
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
+    gen_evmwumi(ctx);           /* rD := rA * rB */
+
+    acc = tcg_temp_new_i64();
+    tmp = tcg_temp_new_i64();
+
+    /* tmp := rD */
+    gen_load_gpr64(tmp, rD(ctx->opcode));
+
+    /* Load acc */
+    tcg_gen_ld_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
+
+    /* acc := tmp + acc */
+    tcg_gen_add_i64(acc, acc, tmp);
+
+    /* Store acc */
+    tcg_gen_st_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
+
+    /* rD := acc */
+    gen_store_gpr64(rD(ctx->opcode), acc);
+
+    tcg_temp_free_i64(acc);
+    tcg_temp_free_i64(tmp);
+}
+
+static inline void gen_evmwsmi(DisasContext *ctx)
+{
+    TCGv_i64 t0, t1;
+
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+
+    /* t0 := rA; t1 := rB */
+    tcg_gen_extu_tl_i64(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ext32s_i64(t0, t0);
+    tcg_gen_extu_tl_i64(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_ext32s_i64(t1, t1);
+
+    tcg_gen_mul_i64(t0, t0, t1);  /* t0 := rA * rB */
+
+    gen_store_gpr64(rD(ctx->opcode), t0); /* rD := t0 */
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static inline void gen_evmwsmia(DisasContext *ctx)
+{
+    TCGv_i64 tmp;
+
+    gen_evmwsmi(ctx);            /* rD := rA * rB */
+
+    tmp = tcg_temp_new_i64();
+
+    /* acc := rD */
+    gen_load_gpr64(tmp, rD(ctx->opcode));
+    tcg_gen_st_i64(tmp, cpu_env, offsetof(CPUPPCState, spe_acc));
+
+    tcg_temp_free_i64(tmp);
+}
+
+static inline void gen_evmwsmiaa(DisasContext *ctx)
+{
+    TCGv_i64 acc = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+
+    gen_evmwsmi(ctx);           /* rD := rA * rB */
+
+    acc = tcg_temp_new_i64();
+    tmp = tcg_temp_new_i64();
+
+    /* tmp := rD */
+    gen_load_gpr64(tmp, rD(ctx->opcode));
+
+    /* Load acc */
+    tcg_gen_ld_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
+
+    /* acc := tmp + acc */
+    tcg_gen_add_i64(acc, acc, tmp);
+
+    /* Store acc */
+    tcg_gen_st_i64(acc, cpu_env, offsetof(CPUPPCState, spe_acc));
+
+    /* rD := acc */
+    gen_store_gpr64(rD(ctx->opcode), acc);
+
+    tcg_temp_free_i64(acc);
+    tcg_temp_free_i64(tmp);
+}
+
+GEN_SPE(evaddw,      speundef,    0x00, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
+GEN_SPE(evaddiw,     speundef,    0x01, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(evsubfw,     speundef,    0x02, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
+GEN_SPE(evsubifw,    speundef,    0x03, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(evabs,       evneg,       0x04, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
+GEN_SPE(evextsb,     evextsh,     0x05, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
+GEN_SPE(evrndw,      evcntlzw,    0x06, 0x08, 0x0000F800, 0x0000F800, PPC_SPE); ////
+GEN_SPE(evcntlsw,    brinc,       0x07, 0x08, 0x0000F800, 0x00000000, PPC_SPE); //
+GEN_SPE(evmra,       speundef,    0x02, 0x13, 0x0000F800, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(speundef,    evand,       0x08, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE); ////
+GEN_SPE(evandc,      speundef,    0x09, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
+GEN_SPE(evxor,       evor,        0x0B, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
+GEN_SPE(evnor,       eveqv,       0x0C, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
+GEN_SPE(evmwumi,     evmwsmi,     0x0C, 0x11, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(evmwumia,    evmwsmia,    0x1C, 0x11, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(evmwumiaa,   evmwsmiaa,   0x0C, 0x15, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,    evorc,       0x0D, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE); ////
+GEN_SPE(evnand,      speundef,    0x0F, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
+GEN_SPE(evsrwu,      evsrws,      0x10, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
+GEN_SPE(evsrwiu,     evsrwis,     0x11, 0x08, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(evslw,       speundef,    0x12, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE); ////
+GEN_SPE(evslwi,      speundef,    0x13, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(evrlw,       evsplati,    0x14, 0x08, 0x00000000, 0x0000F800, PPC_SPE); //
+GEN_SPE(evrlwi,      evsplatfi,   0x15, 0x08, 0x00000000, 0x0000F800, PPC_SPE);
+GEN_SPE(evmergehi,   evmergelo,   0x16, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
+GEN_SPE(evmergehilo, evmergelohi, 0x17, 0x08, 0x00000000, 0x00000000, PPC_SPE); ////
+GEN_SPE(evcmpgtu,    evcmpgts,    0x18, 0x08, 0x00600000, 0x00600000, PPC_SPE); ////
+GEN_SPE(evcmpltu,    evcmplts,    0x19, 0x08, 0x00600000, 0x00600000, PPC_SPE); ////
+GEN_SPE(evcmpeq,     speundef,    0x1A, 0x08, 0x00600000, 0xFFFFFFFF, PPC_SPE); ////
+
+/* SPE load and stores */
+static inline void gen_addr_spe_imm_index(DisasContext *ctx, TCGv EA, int sh)
+{
+    target_ulong uimm = rB(ctx->opcode);
+
+    if (rA(ctx->opcode) == 0) {
+        tcg_gen_movi_tl(EA, uimm << sh);
+    } else {
+        tcg_gen_addi_tl(EA, cpu_gpr[rA(ctx->opcode)], uimm << sh);
+        if (NARROW_MODE(ctx)) {
+            tcg_gen_ext32u_tl(EA, EA);
+        }
+    }
+}
+
+static inline void gen_op_evldd(DisasContext *ctx, TCGv addr)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    gen_qemu_ld64_i64(ctx, t0, addr);
+    gen_store_gpr64(rD(ctx->opcode), t0);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_op_evldw(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_ld32u(ctx, cpu_gprh[rD(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 4);
+    gen_qemu_ld32u(ctx, cpu_gpr[rD(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evldh(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_or_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_or_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlhhesplat(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(t0, t0, 16);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlhhousplat(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlhhossplat(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16s(ctx, t0, addr);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlwhe(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)], t0, 16);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlwhou(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_ld16u(ctx, cpu_gprh[rD(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, cpu_gpr[rD(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evlwhos(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_ld16s(ctx, cpu_gprh[rD(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16s(ctx, cpu_gpr[rD(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evlwwsplat(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld32u(ctx, t0, addr);
+    tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], t0);
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evlwhsplat(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gprh[rD(ctx->opcode)], t0, 16);
+    tcg_gen_or_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_ld16u(ctx, t0, addr);
+    tcg_gen_shli_tl(cpu_gpr[rD(ctx->opcode)], t0, 16);
+    tcg_gen_or_tl(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rD(ctx->opcode)], t0);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evstdd(DisasContext *ctx, TCGv addr)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    gen_load_gpr64(t0, rS(ctx->opcode));
+    gen_qemu_st64_i64(ctx, t0, addr);
+    tcg_temp_free_i64(t0);
+}
+
+static inline void gen_op_evstdw(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_st32(ctx, cpu_gprh[rS(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 4);
+    gen_qemu_st32(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evstdh(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_shri_tl(t0, cpu_gprh[rS(ctx->opcode)], 16);
+    gen_qemu_st16(ctx, t0, addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_st16(ctx, cpu_gprh[rS(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    tcg_gen_shri_tl(t0, cpu_gpr[rS(ctx->opcode)], 16);
+    gen_qemu_st16(ctx, t0, addr);
+    tcg_temp_free(t0);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_st16(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evstwhe(DisasContext *ctx, TCGv addr)
+{
+    TCGv t0 = tcg_temp_new();
+    tcg_gen_shri_tl(t0, cpu_gprh[rS(ctx->opcode)], 16);
+    gen_qemu_st16(ctx, t0, addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    tcg_gen_shri_tl(t0, cpu_gpr[rS(ctx->opcode)], 16);
+    gen_qemu_st16(ctx, t0, addr);
+    tcg_temp_free(t0);
+}
+
+static inline void gen_op_evstwho(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_st16(ctx, cpu_gprh[rS(ctx->opcode)], addr);
+    gen_addr_add(ctx, addr, addr, 2);
+    gen_qemu_st16(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evstwwe(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_st32(ctx, cpu_gprh[rS(ctx->opcode)], addr);
+}
+
+static inline void gen_op_evstwwo(DisasContext *ctx, TCGv addr)
+{
+    gen_qemu_st32(ctx, cpu_gpr[rS(ctx->opcode)], addr);
+}
+
+#define GEN_SPEOP_LDST(name, opc2, sh)                                        \
+static void glue(gen_, name)(DisasContext *ctx)                                       \
+{                                                                             \
+    TCGv t0;                                                                  \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                                     \
+    t0 = tcg_temp_new();                                                      \
+    if (Rc(ctx->opcode)) {                                                    \
+        gen_addr_spe_imm_index(ctx, t0, sh);                                  \
+    } else {                                                                  \
+        gen_addr_reg_index(ctx, t0);                                          \
+    }                                                                         \
+    gen_op_##name(ctx, t0);                                                   \
+    tcg_temp_free(t0);                                                        \
+}
+
+GEN_SPEOP_LDST(evldd, 0x00, 3);
+GEN_SPEOP_LDST(evldw, 0x01, 3);
+GEN_SPEOP_LDST(evldh, 0x02, 3);
+GEN_SPEOP_LDST(evlhhesplat, 0x04, 1);
+GEN_SPEOP_LDST(evlhhousplat, 0x06, 1);
+GEN_SPEOP_LDST(evlhhossplat, 0x07, 1);
+GEN_SPEOP_LDST(evlwhe, 0x08, 2);
+GEN_SPEOP_LDST(evlwhou, 0x0A, 2);
+GEN_SPEOP_LDST(evlwhos, 0x0B, 2);
+GEN_SPEOP_LDST(evlwwsplat, 0x0C, 2);
+GEN_SPEOP_LDST(evlwhsplat, 0x0E, 2);
+
+GEN_SPEOP_LDST(evstdd, 0x10, 3);
+GEN_SPEOP_LDST(evstdw, 0x11, 3);
+GEN_SPEOP_LDST(evstdh, 0x12, 3);
+GEN_SPEOP_LDST(evstwhe, 0x18, 2);
+GEN_SPEOP_LDST(evstwho, 0x1A, 2);
+GEN_SPEOP_LDST(evstwwe, 0x1C, 2);
+GEN_SPEOP_LDST(evstwwo, 0x1E, 2);
+
+/* Multiply and add - TODO */
+#if 0
+GEN_SPE(speundef,       evmhessf,      0x01, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);//
+GEN_SPE(speundef,       evmhossf,      0x03, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmheumi,       evmhesmi,      0x04, 0x10, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhesmf,      0x05, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhoumi,       evmhosmi,      0x06, 0x10, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhosmf,      0x07, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhessfa,     0x11, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhossfa,     0x13, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmheumia,      evmhesmia,     0x14, 0x10, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhesmfa,     0x15, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhoumia,      evmhosmia,     0x16, 0x10, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhosmfa,     0x17, 0x10, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+
+GEN_SPE(speundef,       evmwhssf,      0x03, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmwlumi,       speundef,      0x04, 0x11, 0x00000000, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(evmwhumi,       evmwhsmi,      0x06, 0x11, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwhsmf,      0x07, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwssf,       0x09, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwsmf,       0x0D, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwhssfa,     0x13, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmwlumia,      speundef,      0x14, 0x11, 0x00000000, 0xFFFFFFFF, PPC_SPE);
+GEN_SPE(evmwhumia,      evmwhsmia,     0x16, 0x11, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwhsmfa,     0x17, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwssfa,      0x19, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwsmfa,      0x1D, 0x11, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+
+GEN_SPE(evadduiaaw,     evaddsiaaw,    0x00, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
+GEN_SPE(evsubfusiaaw,   evsubfssiaaw,  0x01, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
+GEN_SPE(evaddumiaaw,    evaddsmiaaw,   0x04, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
+GEN_SPE(evsubfumiaaw,   evsubfsmiaaw,  0x05, 0x13, 0x0000F800, 0x0000F800, PPC_SPE);
+GEN_SPE(evdivws,        evdivwu,       0x06, 0x13, 0x00000000, 0x00000000, PPC_SPE);
+
+GEN_SPE(evmheusiaaw,    evmhessiaaw,   0x00, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhessfaaw,   0x01, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhousiaaw,    evmhossiaaw,   0x02, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhossfaaw,   0x03, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmheumiaaw,    evmhesmiaaw,   0x04, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhesmfaaw,   0x05, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhoumiaaw,    evmhosmiaaw,   0x06, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhosmfaaw,   0x07, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhegumiaa,    evmhegsmiaa,   0x14, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhegsmfaa,   0x15, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhogumiaa,    evmhogsmiaa,   0x16, 0x14, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhogsmfaa,   0x17, 0x14, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+
+GEN_SPE(evmwlusiaaw,    evmwlssiaaw,   0x00, 0x15, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(evmwlumiaaw,    evmwlsmiaaw,   0x04, 0x15, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwssfaa,     0x09, 0x15, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwsmfaa,     0x0D, 0x15, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+
+GEN_SPE(evmheusianw,    evmhessianw,   0x00, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhessfanw,   0x01, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhousianw,    evmhossianw,   0x02, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhossfanw,   0x03, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmheumianw,    evmhesmianw,   0x04, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhesmfanw,   0x05, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhoumianw,    evmhosmianw,   0x06, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhosmfanw,   0x07, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhegumian,    evmhegsmian,   0x14, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhegsmfan,   0x15, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmhigumian,    evmhigsmian,   0x16, 0x16, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmhogsmfan,   0x17, 0x16, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+
+GEN_SPE(evmwlusianw,    evmwlssianw,   0x00, 0x17, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(evmwlumianw,    evmwlsmianw,   0x04, 0x17, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwssfan,     0x09, 0x17, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+GEN_SPE(evmwumian,      evmwsmian,     0x0C, 0x17, 0x00000000, 0x00000000, PPC_SPE);
+GEN_SPE(speundef,       evmwsmfan,     0x0D, 0x17, 0xFFFFFFFF, 0x00000000, PPC_SPE);
+#endif
+
+/***                      SPE floating-point extension                     ***/
+#define GEN_SPEFPUOP_CONV_32_32(name)                                         \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0 = tcg_temp_new_i32();                                         \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);                       \
+    gen_helper_##name(t0, cpu_env, t0);                                       \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+    tcg_temp_free_i32(t0);                                                    \
+}
+#define GEN_SPEFPUOP_CONV_32_64(name)                                         \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
+    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
+    gen_load_gpr64(t0, rB(ctx->opcode));                                      \
+    gen_helper_##name(t1, cpu_env, t0);                                       \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1);                        \
+    tcg_temp_free_i64(t0);                                                    \
+    tcg_temp_free_i32(t1);                                                    \
+}
+#define GEN_SPEFPUOP_CONV_64_32(name)                                         \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
+    TCGv_i32 t1 = tcg_temp_new_i32();                                         \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
+    gen_helper_##name(t0, cpu_env, t1);                                       \
+    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
+    tcg_temp_free_i64(t0);                                                    \
+    tcg_temp_free_i32(t1);                                                    \
+}
+#define GEN_SPEFPUOP_CONV_64_64(name)                                         \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i64 t0 = tcg_temp_new_i64();                                         \
+    gen_load_gpr64(t0, rB(ctx->opcode));                                      \
+    gen_helper_##name(t0, cpu_env, t0);                                       \
+    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
+    tcg_temp_free_i64(t0);                                                    \
+}
+#define GEN_SPEFPUOP_ARITH2_32_32(name)                                       \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0, t1;                                                          \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+    t1 = tcg_temp_new_i32();                                                  \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
+    gen_helper_##name(t0, cpu_env, t0, t1);                                   \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                        \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+    tcg_temp_free_i32(t1);                                                    \
+}
+#define GEN_SPEFPUOP_ARITH2_64_64(name)                                       \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i64 t0, t1;                                                          \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i64();                                                  \
+    t1 = tcg_temp_new_i64();                                                  \
+    gen_load_gpr64(t0, rA(ctx->opcode));                                      \
+    gen_load_gpr64(t1, rB(ctx->opcode));                                      \
+    gen_helper_##name(t0, cpu_env, t0, t1);                                   \
+    gen_store_gpr64(rD(ctx->opcode), t0);                                     \
+    tcg_temp_free_i64(t0);                                                    \
+    tcg_temp_free_i64(t1);                                                    \
+}
+#define GEN_SPEFPUOP_COMP_32(name)                                            \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i32 t0, t1;                                                          \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i32();                                                  \
+    t1 = tcg_temp_new_i32();                                                  \
+                                                                              \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                       \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                       \
+    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+                                                                              \
+    tcg_temp_free_i32(t0);                                                    \
+    tcg_temp_free_i32(t1);                                                    \
+}
+#define GEN_SPEFPUOP_COMP_64(name)                                            \
+static inline void gen_##name(DisasContext *ctx)                              \
+{                                                                             \
+    TCGv_i64 t0, t1;                                                          \
+    if (unlikely(!ctx->spe_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_SPEU);                                \
+        return;                                                               \
+    }                                                                         \
+    t0 = tcg_temp_new_i64();                                                  \
+    t1 = tcg_temp_new_i64();                                                  \
+    gen_load_gpr64(t0, rA(ctx->opcode));                                      \
+    gen_load_gpr64(t1, rB(ctx->opcode));                                      \
+    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);           \
+    tcg_temp_free_i64(t0);                                                    \
+    tcg_temp_free_i64(t1);                                                    \
+}
+
+/* Single precision floating-point vectors operations */
+/* Arithmetic */
+GEN_SPEFPUOP_ARITH2_64_64(evfsadd);
+GEN_SPEFPUOP_ARITH2_64_64(evfssub);
+GEN_SPEFPUOP_ARITH2_64_64(evfsmul);
+GEN_SPEFPUOP_ARITH2_64_64(evfsdiv);
+static inline void gen_evfsabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                    ~0x80000000);
+    tcg_gen_andi_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                    ~0x80000000);
+}
+static inline void gen_evfsnabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                   0x80000000);
+    tcg_gen_ori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                   0x80000000);
+}
+static inline void gen_evfsneg(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                    0x80000000);
+    tcg_gen_xori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                    0x80000000);
+}
+
+/* Conversion */
+GEN_SPEFPUOP_CONV_64_64(evfscfui);
+GEN_SPEFPUOP_CONV_64_64(evfscfsi);
+GEN_SPEFPUOP_CONV_64_64(evfscfuf);
+GEN_SPEFPUOP_CONV_64_64(evfscfsf);
+GEN_SPEFPUOP_CONV_64_64(evfsctui);
+GEN_SPEFPUOP_CONV_64_64(evfsctsi);
+GEN_SPEFPUOP_CONV_64_64(evfsctuf);
+GEN_SPEFPUOP_CONV_64_64(evfsctsf);
+GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
+GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
+
+/* Comparison */
+GEN_SPEFPUOP_COMP_64(evfscmpgt);
+GEN_SPEFPUOP_COMP_64(evfscmplt);
+GEN_SPEFPUOP_COMP_64(evfscmpeq);
+GEN_SPEFPUOP_COMP_64(evfststgt);
+GEN_SPEFPUOP_COMP_64(evfststlt);
+GEN_SPEFPUOP_COMP_64(evfststeq);
+
+/* Opcodes definitions */
+GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
+GEN_SPE(evfsabs,   evfsnabs,  0x02, 0x0A, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE); //
+GEN_SPE(evfsneg,   speundef,  0x03, 0x0A, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(evfsmul,   evfsdiv,   0x04, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
+GEN_SPE(evfscmpgt, evfscmplt, 0x06, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
+GEN_SPE(evfscmpeq, speundef,  0x07, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(evfscfui,  evfscfsi,  0x08, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(evfscfuf,  evfscfsf,  0x09, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(evfsctui,  evfsctsi,  0x0A, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(evfsctuf,  evfsctsf,  0x0B, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(evfsctuiz, speundef,  0x0C, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(evfsctsiz, speundef,  0x0D, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(evfststgt, evfststlt, 0x0E, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
+GEN_SPE(evfststeq, speundef,  0x0F, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+
+/* Single precision floating-point operations */
+/* Arithmetic */
+GEN_SPEFPUOP_ARITH2_32_32(efsadd);
+GEN_SPEFPUOP_ARITH2_32_32(efssub);
+GEN_SPEFPUOP_ARITH2_32_32(efsmul);
+GEN_SPEFPUOP_ARITH2_32_32(efsdiv);
+static inline void gen_efsabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_andi_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], (target_long)~0x80000000LL);
+}
+static inline void gen_efsnabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_ori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000);
+}
+static inline void gen_efsneg(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_xori_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], 0x80000000);
+}
+
+/* Conversion */
+GEN_SPEFPUOP_CONV_32_32(efscfui);
+GEN_SPEFPUOP_CONV_32_32(efscfsi);
+GEN_SPEFPUOP_CONV_32_32(efscfuf);
+GEN_SPEFPUOP_CONV_32_32(efscfsf);
+GEN_SPEFPUOP_CONV_32_32(efsctui);
+GEN_SPEFPUOP_CONV_32_32(efsctsi);
+GEN_SPEFPUOP_CONV_32_32(efsctuf);
+GEN_SPEFPUOP_CONV_32_32(efsctsf);
+GEN_SPEFPUOP_CONV_32_32(efsctuiz);
+GEN_SPEFPUOP_CONV_32_32(efsctsiz);
+GEN_SPEFPUOP_CONV_32_64(efscfd);
+
+/* Comparison */
+GEN_SPEFPUOP_COMP_32(efscmpgt);
+GEN_SPEFPUOP_COMP_32(efscmplt);
+GEN_SPEFPUOP_COMP_32(efscmpeq);
+GEN_SPEFPUOP_COMP_32(efststgt);
+GEN_SPEFPUOP_COMP_32(efststlt);
+GEN_SPEFPUOP_COMP_32(efststeq);
+
+/* Opcodes definitions */
+GEN_SPE(efsadd,   efssub,   0x00, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
+GEN_SPE(efsabs,   efsnabs,  0x02, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE); //
+GEN_SPE(efsneg,   speundef, 0x03, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(efsmul,   efsdiv,   0x04, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE); //
+GEN_SPE(efscmpgt, efscmplt, 0x06, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
+GEN_SPE(efscmpeq, efscfd,   0x07, 0x0B, 0x00600000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(efscfui,  efscfsi,  0x08, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(efscfuf,  efscfsf,  0x09, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(efsctui,  efsctsi,  0x0A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(efsctuf,  efsctsf,  0x0B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE); //
+GEN_SPE(efsctuiz, speundef, 0x0C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(efsctsiz, speundef, 0x0D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+GEN_SPE(efststgt, efststlt, 0x0E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE); //
+GEN_SPE(efststeq, speundef, 0x0F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE); //
+
+/* Double precision floating-point operations */
+/* Arithmetic */
+GEN_SPEFPUOP_ARITH2_64_64(efdadd);
+GEN_SPEFPUOP_ARITH2_64_64(efdsub);
+GEN_SPEFPUOP_ARITH2_64_64(efdmul);
+GEN_SPEFPUOP_ARITH2_64_64(efddiv);
+static inline void gen_efdabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_andi_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                    ~0x80000000);
+}
+static inline void gen_efdnabs(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                   0x80000000);
+}
+static inline void gen_efdneg(DisasContext *ctx)
+{
+    if (unlikely(!ctx->spe_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_SPEU);
+        return;
+    }
+    tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_xori_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)],
+                    0x80000000);
+}
+
+/* Conversion */
+GEN_SPEFPUOP_CONV_64_32(efdcfui);
+GEN_SPEFPUOP_CONV_64_32(efdcfsi);
+GEN_SPEFPUOP_CONV_64_32(efdcfuf);
+GEN_SPEFPUOP_CONV_64_32(efdcfsf);
+GEN_SPEFPUOP_CONV_32_64(efdctui);
+GEN_SPEFPUOP_CONV_32_64(efdctsi);
+GEN_SPEFPUOP_CONV_32_64(efdctuf);
+GEN_SPEFPUOP_CONV_32_64(efdctsf);
+GEN_SPEFPUOP_CONV_32_64(efdctuiz);
+GEN_SPEFPUOP_CONV_32_64(efdctsiz);
+GEN_SPEFPUOP_CONV_64_32(efdcfs);
+GEN_SPEFPUOP_CONV_64_64(efdcfuid);
+GEN_SPEFPUOP_CONV_64_64(efdcfsid);
+GEN_SPEFPUOP_CONV_64_64(efdctuidz);
+GEN_SPEFPUOP_CONV_64_64(efdctsidz);
+
+/* Comparison */
+GEN_SPEFPUOP_COMP_64(efdcmpgt);
+GEN_SPEFPUOP_COMP_64(efdcmplt);
+GEN_SPEFPUOP_COMP_64(efdcmpeq);
+GEN_SPEFPUOP_COMP_64(efdtstgt);
+GEN_SPEFPUOP_COMP_64(efdtstlt);
+GEN_SPEFPUOP_COMP_64(efdtsteq);
+
+/* Opcodes definitions */
+GEN_SPE(efdadd,    efdsub,    0x10, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdcfuid,  efdcfsid,  0x11, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdabs,    efdnabs,   0x12, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_DOUBLE); //
+GEN_SPE(efdneg,    speundef,  0x13, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
+GEN_SPE(efdmul,    efddiv,    0x14, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdctuidz, efdctsidz, 0x15, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdcmpgt,  efdcmplt,  0x16, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdcmpeq,  efdcfs,    0x17, 0x0B, 0x00600000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdcfui,   efdcfsi,   0x18, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdcfuf,   efdcfsf,   0x19, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdctui,   efdctsi,   0x1A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdctuf,   efdctsf,   0x1B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdctuiz,  speundef,  0x1C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
+GEN_SPE(efdctsiz,  speundef,  0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
+GEN_SPE(efdtstgt,  efdtstlt,  0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE); //
+GEN_SPE(efdtsteq,  speundef,  0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE); //
+
+#undef GEN_SPE
+#undef GEN_SPEOP_LDST
diff --git a/target-ppc/translate/spe-ops.inc.c b/target-ppc/translate/spe-ops.inc.c
new file mode 100644
index 0000000000..7efe8b8746
--- /dev/null
+++ b/target-ppc/translate/spe-ops.inc.c
@@ -0,0 +1,105 @@
+GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
+GEN_HANDLER2(evsel1, "evsel", 0x04, 0x1d, 0x09, 0x00000000, PPC_SPE),
+GEN_HANDLER2(evsel2, "evsel", 0x04, 0x1e, 0x09, 0x00000000, PPC_SPE),
+GEN_HANDLER2(evsel3, "evsel", 0x04, 0x1f, 0x09, 0x00000000, PPC_SPE),
+
+#define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \
+    GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, PPC_NONE)
+GEN_SPE(evaddw,      speundef,    0x00, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evaddiw,     speundef,    0x01, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evsubfw,     speundef,    0x02, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evsubifw,    speundef,    0x03, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evabs,       evneg,       0x04, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
+GEN_SPE(evextsb,     evextsh,     0x05, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
+GEN_SPE(evrndw,      evcntlzw,    0x06, 0x08, 0x0000F800, 0x0000F800, PPC_SPE),
+GEN_SPE(evcntlsw,    brinc,       0x07, 0x08, 0x0000F800, 0x00000000, PPC_SPE),
+GEN_SPE(evmra,       speundef,    0x02, 0x13, 0x0000F800, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(speundef,    evand,       0x08, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE),
+GEN_SPE(evandc,      speundef,    0x09, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evxor,       evor,        0x0B, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evnor,       eveqv,       0x0C, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evmwumi,     evmwsmi,     0x0C, 0x11, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evmwumia,    evmwsmia,    0x1C, 0x11, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evmwumiaa,   evmwsmiaa,   0x0C, 0x15, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(speundef,    evorc,       0x0D, 0x08, 0xFFFFFFFF, 0x00000000, PPC_SPE),
+GEN_SPE(evnand,      speundef,    0x0F, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evsrwu,      evsrws,      0x10, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evsrwiu,     evsrwis,     0x11, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evslw,       speundef,    0x12, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evslwi,      speundef,    0x13, 0x08, 0x00000000, 0xFFFFFFFF, PPC_SPE),
+GEN_SPE(evrlw,       evsplati,    0x14, 0x08, 0x00000000, 0x0000F800, PPC_SPE),
+GEN_SPE(evrlwi,      evsplatfi,   0x15, 0x08, 0x00000000, 0x0000F800, PPC_SPE),
+GEN_SPE(evmergehi,   evmergelo,   0x16, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evmergehilo, evmergelohi, 0x17, 0x08, 0x00000000, 0x00000000, PPC_SPE),
+GEN_SPE(evcmpgtu,    evcmpgts,    0x18, 0x08, 0x00600000, 0x00600000, PPC_SPE),
+GEN_SPE(evcmpltu,    evcmplts,    0x19, 0x08, 0x00600000, 0x00600000, PPC_SPE),
+GEN_SPE(evcmpeq,     speundef,    0x1A, 0x08, 0x00600000, 0xFFFFFFFF, PPC_SPE),
+
+GEN_SPE(evfsadd,     evfssub,     0x00, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
+GEN_SPE(evfsabs,     evfsnabs,    0x02, 0x0A, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE),
+GEN_SPE(evfsneg,     speundef,    0x03, 0x0A, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(evfsmul,     evfsdiv,     0x04, 0x0A, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
+GEN_SPE(evfscmpgt,   evfscmplt,   0x06, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
+GEN_SPE(evfscmpeq,   speundef,    0x07, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(evfscfui,    evfscfsi,    0x08, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(evfscfuf,    evfscfsf,    0x09, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(evfsctui,    evfsctsi,    0x0A, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(evfsctuf,    evfsctsf,    0x0B, 0x0A, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(evfsctuiz,   speundef,    0x0C, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(evfsctsiz,   speundef,    0x0D, 0x0A, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(evfststgt,   evfststlt,   0x0E, 0x0A, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
+GEN_SPE(evfststeq,   speundef,    0x0F, 0x0A, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+
+GEN_SPE(efsadd,      efssub,      0x00, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
+GEN_SPE(efsabs,      efsnabs,     0x02, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_SINGLE),
+GEN_SPE(efsneg,      speundef,    0x03, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(efsmul,      efsdiv,      0x04, 0x0B, 0x00000000, 0x00000000, PPC_SPE_SINGLE),
+GEN_SPE(efscmpgt,    efscmplt,    0x06, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
+GEN_SPE(efscmpeq,    efscfd,      0x07, 0x0B, 0x00600000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(efscfui,     efscfsi,     0x08, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(efscfuf,     efscfsf,     0x09, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(efsctui,     efsctsi,     0x0A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(efsctuf,     efsctsf,     0x0B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_SINGLE),
+GEN_SPE(efsctuiz,    speundef,    0x0C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(efsctsiz,    speundef,    0x0D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+GEN_SPE(efststgt,    efststlt,    0x0E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_SINGLE),
+GEN_SPE(efststeq,    speundef,    0x0F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_SINGLE),
+
+GEN_SPE(efdadd,      efdsub,      0x10, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE),
+GEN_SPE(efdcfuid,    efdcfsid,    0x11, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdabs,      efdnabs,     0x12, 0x0B, 0x0000F800, 0x0000F800, PPC_SPE_DOUBLE),
+GEN_SPE(efdneg,      speundef,    0x13, 0x0B, 0x0000F800, 0xFFFFFFFF, PPC_SPE_DOUBLE),
+GEN_SPE(efdmul,      efddiv,      0x14, 0x0B, 0x00000000, 0x00000000, PPC_SPE_DOUBLE),
+GEN_SPE(efdctuidz,   efdctsidz,   0x15, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdcmpgt,    efdcmplt,    0x16, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE),
+GEN_SPE(efdcmpeq,    efdcfs,      0x17, 0x0B, 0x00600000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdcfui,     efdcfsi,     0x18, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdcfuf,     efdcfsf,     0x19, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdctui,     efdctsi,     0x1A, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdctuf,     efdctsf,     0x1B, 0x0B, 0x00180000, 0x00180000, PPC_SPE_DOUBLE),
+GEN_SPE(efdctuiz,    speundef,    0x1C, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
+GEN_SPE(efdctsiz,    speundef,    0x1D, 0x0B, 0x00180000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
+GEN_SPE(efdtstgt,    efdtstlt,    0x1E, 0x0B, 0x00600000, 0x00600000, PPC_SPE_DOUBLE),
+GEN_SPE(efdtsteq,    speundef,    0x1F, 0x0B, 0x00600000, 0xFFFFFFFF, PPC_SPE_DOUBLE),
+
+#define GEN_SPEOP_LDST(name, opc2, sh)                                        \
+GEN_HANDLER(name, 0x04, opc2, 0x0C, 0x00000000, PPC_SPE)
+GEN_SPEOP_LDST(evldd, 0x00, 3),
+GEN_SPEOP_LDST(evldw, 0x01, 3),
+GEN_SPEOP_LDST(evldh, 0x02, 3),
+GEN_SPEOP_LDST(evlhhesplat, 0x04, 1),
+GEN_SPEOP_LDST(evlhhousplat, 0x06, 1),
+GEN_SPEOP_LDST(evlhhossplat, 0x07, 1),
+GEN_SPEOP_LDST(evlwhe, 0x08, 2),
+GEN_SPEOP_LDST(evlwhou, 0x0A, 2),
+GEN_SPEOP_LDST(evlwhos, 0x0B, 2),
+GEN_SPEOP_LDST(evlwwsplat, 0x0C, 2),
+GEN_SPEOP_LDST(evlwhsplat, 0x0E, 2),
+
+GEN_SPEOP_LDST(evstdd, 0x10, 3),
+GEN_SPEOP_LDST(evstdw, 0x11, 3),
+GEN_SPEOP_LDST(evstdh, 0x12, 3),
+GEN_SPEOP_LDST(evstwhe, 0x18, 2),
+GEN_SPEOP_LDST(evstwho, 0x1A, 2),
+GEN_SPEOP_LDST(evstwwe, 0x1C, 2),
+GEN_SPEOP_LDST(evstwwo, 0x1E, 2),
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
new file mode 100644
index 0000000000..7143eb3a39
--- /dev/null
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -0,0 +1,1113 @@
+/*
+ * translate/vmx-impl.c
+ *
+ * Altivec/VMX translation
+ */
+
+/***                      Altivec vector extension                         ***/
+/* Altivec registers moves */
+
+static inline TCGv_ptr gen_avr_ptr(int reg)
+{
+    TCGv_ptr r = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(r, cpu_env, offsetof(CPUPPCState, avr[reg]));
+    return r;
+}
+
+#define GEN_VR_LDX(name, opc2, opc3)                                          \
+static void glue(gen_, name)(DisasContext *ctx)                                       \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->altivec_enabled)) {                                    \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                                     \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
+    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does       \
+       necessary 64-bit byteswap already. */                                  \
+    if (ctx->le_mode) {                                                       \
+        gen_qemu_ld64_i64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                \
+        tcg_gen_addi_tl(EA, EA, 8);                                           \
+        gen_qemu_ld64_i64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                \
+    } else {                                                                  \
+        gen_qemu_ld64_i64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                \
+        tcg_gen_addi_tl(EA, EA, 8);                                           \
+        gen_qemu_ld64_i64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                \
+    }                                                                         \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_VR_STX(name, opc2, opc3)                                          \
+static void gen_st##name(DisasContext *ctx)                                   \
+{                                                                             \
+    TCGv EA;                                                                  \
+    if (unlikely(!ctx->altivec_enabled)) {                                    \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
+        return;                                                               \
+    }                                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                                     \
+    EA = tcg_temp_new();                                                      \
+    gen_addr_reg_index(ctx, EA);                                              \
+    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
+    /* We only need to swap high and low halves. gen_qemu_st64_i64 does       \
+       necessary 64-bit byteswap already. */                                  \
+    if (ctx->le_mode) {                                                       \
+        gen_qemu_st64_i64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                \
+        tcg_gen_addi_tl(EA, EA, 8);                                           \
+        gen_qemu_st64_i64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                \
+    } else {                                                                  \
+        gen_qemu_st64_i64(ctx, cpu_avrh[rD(ctx->opcode)], EA);                \
+        tcg_gen_addi_tl(EA, EA, 8);                                           \
+        gen_qemu_st64_i64(ctx, cpu_avrl[rD(ctx->opcode)], EA);                \
+    }                                                                         \
+    tcg_temp_free(EA);                                                        \
+}
+
+#define GEN_VR_LVE(name, opc2, opc3, size)                              \
+static void gen_lve##name(DisasContext *ctx)                            \
+    {                                                                   \
+        TCGv EA;                                                        \
+        TCGv_ptr rs;                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        gen_set_access_type(ctx, ACCESS_INT);                           \
+        EA = tcg_temp_new();                                            \
+        gen_addr_reg_index(ctx, EA);                                    \
+        if (size > 1) {                                                 \
+            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
+        }                                                               \
+        rs = gen_avr_ptr(rS(ctx->opcode));                              \
+        gen_helper_lve##name(cpu_env, rs, EA);                          \
+        tcg_temp_free(EA);                                              \
+        tcg_temp_free_ptr(rs);                                          \
+    }
+
+#define GEN_VR_STVE(name, opc2, opc3, size)                             \
+static void gen_stve##name(DisasContext *ctx)                           \
+    {                                                                   \
+        TCGv EA;                                                        \
+        TCGv_ptr rs;                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        gen_set_access_type(ctx, ACCESS_INT);                           \
+        EA = tcg_temp_new();                                            \
+        gen_addr_reg_index(ctx, EA);                                    \
+        if (size > 1) {                                                 \
+            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
+        }                                                               \
+        rs = gen_avr_ptr(rS(ctx->opcode));                              \
+        gen_helper_stve##name(cpu_env, rs, EA);                         \
+        tcg_temp_free(EA);                                              \
+        tcg_temp_free_ptr(rs);                                          \
+    }
+
+GEN_VR_LDX(lvx, 0x07, 0x03);
+/* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
+GEN_VR_LDX(lvxl, 0x07, 0x0B);
+
+GEN_VR_LVE(bx, 0x07, 0x00, 1);
+GEN_VR_LVE(hx, 0x07, 0x01, 2);
+GEN_VR_LVE(wx, 0x07, 0x02, 4);
+
+GEN_VR_STX(svx, 0x07, 0x07);
+/* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
+GEN_VR_STX(svxl, 0x07, 0x0F);
+
+GEN_VR_STVE(bx, 0x07, 0x04, 1);
+GEN_VR_STVE(hx, 0x07, 0x05, 2);
+GEN_VR_STVE(wx, 0x07, 0x06, 4);
+
+static void gen_lvsl(DisasContext *ctx)
+{
+    TCGv_ptr rd;
+    TCGv EA;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_lvsl(rd, EA);
+    tcg_temp_free(EA);
+    tcg_temp_free_ptr(rd);
+}
+
+static void gen_lvsr(DisasContext *ctx)
+{
+    TCGv_ptr rd;
+    TCGv EA;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_lvsr(rd, EA);
+    tcg_temp_free(EA);
+    tcg_temp_free_ptr(rd);
+}
+
+static void gen_mfvscr(DisasContext *ctx)
+{
+    TCGv_i32 t;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0);
+    t = tcg_temp_new_i32();
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, vscr));
+    tcg_gen_extu_i32_i64(cpu_avrl[rD(ctx->opcode)], t);
+    tcg_temp_free_i32(t);
+}
+
+static void gen_mtvscr(DisasContext *ctx)
+{
+    TCGv_ptr p;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    p = gen_avr_ptr(rB(ctx->opcode));
+    gen_helper_mtvscr(cpu_env, p);
+    tcg_temp_free_ptr(p);
+}
+
+#define GEN_VX_VMUL10(name, add_cin, ret_carry)                         \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_i64 t0 = tcg_temp_new_i64();                                   \
+    TCGv_i64 t1 = tcg_temp_new_i64();                                   \
+    TCGv_i64 t2 = tcg_temp_new_i64();                                   \
+    TCGv_i64 ten, z;                                                    \
+                                                                        \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+                                                                        \
+    ten = tcg_const_i64(10);                                            \
+    z = tcg_const_i64(0);                                               \
+                                                                        \
+    if (add_cin) {                                                      \
+        tcg_gen_mulu2_i64(t0, t1, cpu_avrl[rA(ctx->opcode)], ten);      \
+        tcg_gen_andi_i64(t2, cpu_avrl[rB(ctx->opcode)], 0xF);           \
+        tcg_gen_add2_i64(cpu_avrl[rD(ctx->opcode)], t2, t0, t1, t2, z); \
+    } else {                                                            \
+        tcg_gen_mulu2_i64(cpu_avrl[rD(ctx->opcode)], t2,                \
+                          cpu_avrl[rA(ctx->opcode)], ten);              \
+    }                                                                   \
+                                                                        \
+    if (ret_carry) {                                                    \
+        tcg_gen_mulu2_i64(t0, t1, cpu_avrh[rA(ctx->opcode)], ten);      \
+        tcg_gen_add2_i64(t0, cpu_avrl[rD(ctx->opcode)], t0, t1, t2, z); \
+        tcg_gen_movi_i64(cpu_avrh[rD(ctx->opcode)], 0);                 \
+    } else {                                                            \
+        tcg_gen_mul_i64(t0, cpu_avrh[rA(ctx->opcode)], ten);            \
+        tcg_gen_add_i64(cpu_avrh[rD(ctx->opcode)], t0, t2);             \
+    }                                                                   \
+                                                                        \
+    tcg_temp_free_i64(t0);                                              \
+    tcg_temp_free_i64(t1);                                              \
+    tcg_temp_free_i64(t2);                                              \
+    tcg_temp_free_i64(ten);                                             \
+    tcg_temp_free_i64(z);                                               \
+}                                                                       \
+
+GEN_VX_VMUL10(vmul10uq, 0, 0);
+GEN_VX_VMUL10(vmul10euq, 1, 0);
+GEN_VX_VMUL10(vmul10cuq, 0, 1);
+GEN_VX_VMUL10(vmul10ecuq, 1, 1);
+
+/* Logical operations */
+#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)                        \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+{                                                                       \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    tcg_op(cpu_avrh[rD(ctx->opcode)], cpu_avrh[rA(ctx->opcode)], cpu_avrh[rB(ctx->opcode)]); \
+    tcg_op(cpu_avrl[rD(ctx->opcode)], cpu_avrl[rA(ctx->opcode)], cpu_avrl[rB(ctx->opcode)]); \
+}
+
+GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16);
+GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17);
+GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18);
+GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19);
+GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20);
+GEN_VX_LOGICAL(veqv, tcg_gen_eqv_i64, 2, 26);
+GEN_VX_LOGICAL(vnand, tcg_gen_nand_i64, 2, 22);
+GEN_VX_LOGICAL(vorc, tcg_gen_orc_i64, 2, 21);
+
+#define GEN_VXFORM(name, opc2, opc3)                                    \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+{                                                                       \
+    TCGv_ptr ra, rb, rd;                                                \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
+    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
+    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
+    gen_helper_##name (rd, ra, rb);                                     \
+    tcg_temp_free_ptr(ra);                                              \
+    tcg_temp_free_ptr(rb);                                              \
+    tcg_temp_free_ptr(rd);                                              \
+}
+
+#define GEN_VXFORM_ENV(name, opc2, opc3)                                \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_ptr ra, rb, rd;                                                \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
+    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
+    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
+    gen_helper_##name(cpu_env, rd, ra, rb);                             \
+    tcg_temp_free_ptr(ra);                                              \
+    tcg_temp_free_ptr(rb);                                              \
+    tcg_temp_free_ptr(rd);                                              \
+}
+
+#define GEN_VXFORM3(name, opc2, opc3)                                   \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_ptr ra, rb, rc, rd;                                            \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
+    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
+    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
+    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
+    gen_helper_##name(rd, ra, rb, rc);                                  \
+    tcg_temp_free_ptr(ra);                                              \
+    tcg_temp_free_ptr(rb);                                              \
+    tcg_temp_free_ptr(rc);                                              \
+    tcg_temp_free_ptr(rd);                                              \
+}
+
+/*
+ * Support for Altivec instruction pairs that use bit 31 (Rc) as
+ * an opcode bit.  In general, these pairs come from different
+ * versions of the ISA, so we must also support a pair of flags for
+ * each instruction.
+ */
+#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)          \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
+{                                                                      \
+    if ((Rc(ctx->opcode) == 0) &&                                      \
+        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
+        gen_##name0(ctx);                                              \
+    } else if ((Rc(ctx->opcode) == 1) &&                               \
+        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
+        gen_##name1(ctx);                                              \
+    } else {                                                           \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
+    }                                                                  \
+}
+
+/* Adds support to provide invalid mask */
+#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0,                \
+                            name1, flg1, flg2_1, inval1)                \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
+{                                                                       \
+    if ((Rc(ctx->opcode) == 0) &&                                       \
+        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) &&  \
+        !(ctx->opcode & inval0)) {                                      \
+        gen_##name0(ctx);                                               \
+    } else if ((Rc(ctx->opcode) == 1) &&                                \
+               ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
+               !(ctx->opcode & inval1)) {                               \
+        gen_##name1(ctx);                                               \
+    } else {                                                            \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);             \
+    }                                                                   \
+}
+
+GEN_VXFORM(vaddubm, 0, 0);
+GEN_VXFORM_DUAL_EXT(vaddubm, PPC_ALTIVEC, PPC_NONE, 0,       \
+                    vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
+GEN_VXFORM(vadduhm, 0, 1);
+GEN_VXFORM_DUAL(vadduhm, PPC_ALTIVEC, PPC_NONE,  \
+                vmul10ecuq, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vadduwm, 0, 2);
+GEN_VXFORM(vaddudm, 0, 3);
+GEN_VXFORM(vsububm, 0, 16);
+GEN_VXFORM(vsubuhm, 0, 17);
+GEN_VXFORM(vsubuwm, 0, 18);
+GEN_VXFORM(vsubudm, 0, 19);
+GEN_VXFORM(vmaxub, 1, 0);
+GEN_VXFORM(vmaxuh, 1, 1);
+GEN_VXFORM(vmaxuw, 1, 2);
+GEN_VXFORM(vmaxud, 1, 3);
+GEN_VXFORM(vmaxsb, 1, 4);
+GEN_VXFORM(vmaxsh, 1, 5);
+GEN_VXFORM(vmaxsw, 1, 6);
+GEN_VXFORM(vmaxsd, 1, 7);
+GEN_VXFORM(vminub, 1, 8);
+GEN_VXFORM(vminuh, 1, 9);
+GEN_VXFORM(vminuw, 1, 10);
+GEN_VXFORM(vminud, 1, 11);
+GEN_VXFORM(vminsb, 1, 12);
+GEN_VXFORM(vminsh, 1, 13);
+GEN_VXFORM(vminsw, 1, 14);
+GEN_VXFORM(vminsd, 1, 15);
+GEN_VXFORM(vavgub, 1, 16);
+GEN_VXFORM(vabsdub, 1, 16);
+GEN_VXFORM_DUAL(vavgub, PPC_ALTIVEC, PPC_NONE, \
+                vabsdub, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vavguh, 1, 17);
+GEN_VXFORM(vabsduh, 1, 17);
+GEN_VXFORM_DUAL(vavguh, PPC_ALTIVEC, PPC_NONE, \
+                vabsduh, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vavguw, 1, 18);
+GEN_VXFORM(vabsduw, 1, 18);
+GEN_VXFORM_DUAL(vavguw, PPC_ALTIVEC, PPC_NONE, \
+                vabsduw, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vavgsb, 1, 20);
+GEN_VXFORM(vavgsh, 1, 21);
+GEN_VXFORM(vavgsw, 1, 22);
+GEN_VXFORM(vmrghb, 6, 0);
+GEN_VXFORM(vmrghh, 6, 1);
+GEN_VXFORM(vmrghw, 6, 2);
+GEN_VXFORM(vmrglb, 6, 4);
+GEN_VXFORM(vmrglh, 6, 5);
+GEN_VXFORM(vmrglw, 6, 6);
+
+static void gen_vmrgew(DisasContext *ctx)
+{
+    TCGv_i64 tmp;
+    int VT, VA, VB;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    VT = rD(ctx->opcode);
+    VA = rA(ctx->opcode);
+    VB = rB(ctx->opcode);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_shri_i64(tmp, cpu_avrh[VB], 32);
+    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VA], tmp, 0, 32);
+    tcg_gen_shri_i64(tmp, cpu_avrl[VB], 32);
+    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VA], tmp, 0, 32);
+    tcg_temp_free_i64(tmp);
+}
+
+static void gen_vmrgow(DisasContext *ctx)
+{
+    int VT, VA, VB;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    VT = rD(ctx->opcode);
+    VA = rA(ctx->opcode);
+    VB = rB(ctx->opcode);
+
+    tcg_gen_deposit_i64(cpu_avrh[VT], cpu_avrh[VB], cpu_avrh[VA], 32, 32);
+    tcg_gen_deposit_i64(cpu_avrl[VT], cpu_avrl[VB], cpu_avrl[VA], 32, 32);
+}
+
+GEN_VXFORM(vmuloub, 4, 0);
+GEN_VXFORM(vmulouh, 4, 1);
+GEN_VXFORM(vmulouw, 4, 2);
+GEN_VXFORM(vmuluwm, 4, 2);
+GEN_VXFORM_DUAL(vmulouw, PPC_ALTIVEC, PPC_NONE,
+                vmuluwm, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vmulosb, 4, 4);
+GEN_VXFORM(vmulosh, 4, 5);
+GEN_VXFORM(vmulosw, 4, 6);
+GEN_VXFORM(vmuleub, 4, 8);
+GEN_VXFORM(vmuleuh, 4, 9);
+GEN_VXFORM(vmuleuw, 4, 10);
+GEN_VXFORM(vmulesb, 4, 12);
+GEN_VXFORM(vmulesh, 4, 13);
+GEN_VXFORM(vmulesw, 4, 14);
+GEN_VXFORM(vslb, 2, 4);
+GEN_VXFORM(vslh, 2, 5);
+GEN_VXFORM(vslw, 2, 6);
+GEN_VXFORM(vrlwnm, 2, 6);
+GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \
+                vrlwnm, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vsld, 2, 23);
+GEN_VXFORM(vsrb, 2, 8);
+GEN_VXFORM(vsrh, 2, 9);
+GEN_VXFORM(vsrw, 2, 10);
+GEN_VXFORM(vsrd, 2, 27);
+GEN_VXFORM(vsrab, 2, 12);
+GEN_VXFORM(vsrah, 2, 13);
+GEN_VXFORM(vsraw, 2, 14);
+GEN_VXFORM(vsrad, 2, 15);
+GEN_VXFORM(vsrv, 2, 28);
+GEN_VXFORM(vslv, 2, 29);
+GEN_VXFORM(vslo, 6, 16);
+GEN_VXFORM(vsro, 6, 17);
+GEN_VXFORM(vaddcuw, 0, 6);
+GEN_VXFORM(vsubcuw, 0, 22);
+GEN_VXFORM_ENV(vaddubs, 0, 8);
+GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0,       \
+                    vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
+GEN_VXFORM_ENV(vadduhs, 0, 9);
+GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \
+                vmul10euq, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM_ENV(vadduws, 0, 10);
+GEN_VXFORM_ENV(vaddsbs, 0, 12);
+GEN_VXFORM_ENV(vaddshs, 0, 13);
+GEN_VXFORM_ENV(vaddsws, 0, 14);
+GEN_VXFORM_ENV(vsububs, 0, 24);
+GEN_VXFORM_ENV(vsubuhs, 0, 25);
+GEN_VXFORM_ENV(vsubuws, 0, 26);
+GEN_VXFORM_ENV(vsubsbs, 0, 28);
+GEN_VXFORM_ENV(vsubshs, 0, 29);
+GEN_VXFORM_ENV(vsubsws, 0, 30);
+GEN_VXFORM(vadduqm, 0, 4);
+GEN_VXFORM(vaddcuq, 0, 5);
+GEN_VXFORM3(vaddeuqm, 30, 0);
+GEN_VXFORM3(vaddecuq, 30, 0);
+GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
+            vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vsubuqm, 0, 20);
+GEN_VXFORM(vsubcuq, 0, 21);
+GEN_VXFORM3(vsubeuqm, 31, 0);
+GEN_VXFORM3(vsubecuq, 31, 0);
+GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
+            vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vrlb, 2, 0);
+GEN_VXFORM(vrlh, 2, 1);
+GEN_VXFORM(vrlw, 2, 2);
+GEN_VXFORM(vrlwmi, 2, 2);
+GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
+                vrlwmi, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vrld, 2, 3);
+GEN_VXFORM(vrldmi, 2, 3);
+GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
+                vrldmi, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vsl, 2, 7);
+GEN_VXFORM(vrldnm, 2, 7);
+GEN_VXFORM_DUAL(vsl, PPC_ALTIVEC, PPC_NONE, \
+                vrldnm, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM(vsr, 2, 11);
+GEN_VXFORM_ENV(vpkuhum, 7, 0);
+GEN_VXFORM_ENV(vpkuwum, 7, 1);
+GEN_VXFORM_ENV(vpkudum, 7, 17);
+GEN_VXFORM_ENV(vpkuhus, 7, 2);
+GEN_VXFORM_ENV(vpkuwus, 7, 3);
+GEN_VXFORM_ENV(vpkudus, 7, 19);
+GEN_VXFORM_ENV(vpkshus, 7, 4);
+GEN_VXFORM_ENV(vpkswus, 7, 5);
+GEN_VXFORM_ENV(vpksdus, 7, 21);
+GEN_VXFORM_ENV(vpkshss, 7, 6);
+GEN_VXFORM_ENV(vpkswss, 7, 7);
+GEN_VXFORM_ENV(vpksdss, 7, 23);
+GEN_VXFORM(vpkpx, 7, 12);
+GEN_VXFORM_ENV(vsum4ubs, 4, 24);
+GEN_VXFORM_ENV(vsum4sbs, 4, 28);
+GEN_VXFORM_ENV(vsum4shs, 4, 25);
+GEN_VXFORM_ENV(vsum2sws, 4, 26);
+GEN_VXFORM_ENV(vsumsws, 4, 30);
+GEN_VXFORM_ENV(vaddfp, 5, 0);
+GEN_VXFORM_ENV(vsubfp, 5, 1);
+GEN_VXFORM_ENV(vmaxfp, 5, 16);
+GEN_VXFORM_ENV(vminfp, 5, 17);
+
+#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr ra, rb, rd;                                            \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        ra = gen_avr_ptr(rA(ctx->opcode));                              \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##opname(cpu_env, rd, ra, rb);                       \
+        tcg_temp_free_ptr(ra);                                          \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXRFORM(name, opc2, opc3)                                \
+    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
+    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
+
+/*
+ * Support for Altivec instructions that use bit 31 (Rc) as an opcode
+ * bit but also use bit 21 as an actual Rc bit.  In general, thse pairs
+ * come from different versions of the ISA, so we must also support a
+ * pair of flags for each instruction.
+ */
+#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)     \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
+{                                                                      \
+    if ((Rc(ctx->opcode) == 0) &&                                      \
+        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
+        if (Rc21(ctx->opcode) == 0) {                                  \
+            gen_##name0(ctx);                                          \
+        } else {                                                       \
+            gen_##name0##_(ctx);                                       \
+        }                                                              \
+    } else if ((Rc(ctx->opcode) == 1) &&                               \
+        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
+        if (Rc21(ctx->opcode) == 0) {                                  \
+            gen_##name1(ctx);                                          \
+        } else {                                                       \
+            gen_##name1##_(ctx);                                       \
+        }                                                              \
+    } else {                                                           \
+        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
+    }                                                                  \
+}
+
+GEN_VXRFORM(vcmpequb, 3, 0)
+GEN_VXRFORM(vcmpequh, 3, 1)
+GEN_VXRFORM(vcmpequw, 3, 2)
+GEN_VXRFORM(vcmpequd, 3, 3)
+GEN_VXRFORM(vcmpnezb, 3, 4)
+GEN_VXRFORM(vcmpnezh, 3, 5)
+GEN_VXRFORM(vcmpnezw, 3, 6)
+GEN_VXRFORM(vcmpgtsb, 3, 12)
+GEN_VXRFORM(vcmpgtsh, 3, 13)
+GEN_VXRFORM(vcmpgtsw, 3, 14)
+GEN_VXRFORM(vcmpgtsd, 3, 15)
+GEN_VXRFORM(vcmpgtub, 3, 8)
+GEN_VXRFORM(vcmpgtuh, 3, 9)
+GEN_VXRFORM(vcmpgtuw, 3, 10)
+GEN_VXRFORM(vcmpgtud, 3, 11)
+GEN_VXRFORM(vcmpeqfp, 3, 3)
+GEN_VXRFORM(vcmpgefp, 3, 7)
+GEN_VXRFORM(vcmpgtfp, 3, 11)
+GEN_VXRFORM(vcmpbfp, 3, 15)
+GEN_VXRFORM(vcmpneb, 3, 0)
+GEN_VXRFORM(vcmpneh, 3, 1)
+GEN_VXRFORM(vcmpnew, 3, 2)
+
+GEN_VXRFORM_DUAL(vcmpequb, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpneb, PPC_NONE, PPC2_ISA300)
+GEN_VXRFORM_DUAL(vcmpequh, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpneh, PPC_NONE, PPC2_ISA300)
+GEN_VXRFORM_DUAL(vcmpequw, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpnew, PPC_NONE, PPC2_ISA300)
+GEN_VXRFORM_DUAL(vcmpeqfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpequd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpbfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpgtsd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXRFORM_DUAL(vcmpgtfp, PPC_ALTIVEC, PPC_NONE, \
+                 vcmpgtud, PPC_NONE, PPC2_ALTIVEC_207)
+
+#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rd;                                                    \
+        TCGv_i32 simm;                                                  \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        simm = tcg_const_i32(SIMM5(ctx->opcode));                       \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name (rd, simm);                                   \
+        tcg_temp_free_i32(simm);                                        \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+GEN_VXFORM_SIMM(vspltisb, 6, 12);
+GEN_VXFORM_SIMM(vspltish, 6, 13);
+GEN_VXFORM_SIMM(vspltisw, 6, 14);
+
+#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name (rd, rb);                                     \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                         \
+    }
+
+#define GEN_VXFORM_NOA_ENV(name, opc2, opc3)                            \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+                                                                        \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name(cpu_env, rd, rb);                             \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXFORM_NOA_2(name, opc2, opc3, opc4)                        \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name(rd, rb);                                      \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4)                        \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rb;                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb);                \
+        tcg_temp_free_ptr(rb);                                          \
+    }
+GEN_VXFORM_NOA(vupkhsb, 7, 8);
+GEN_VXFORM_NOA(vupkhsh, 7, 9);
+GEN_VXFORM_NOA(vupkhsw, 7, 25);
+GEN_VXFORM_NOA(vupklsb, 7, 10);
+GEN_VXFORM_NOA(vupklsh, 7, 11);
+GEN_VXFORM_NOA(vupklsw, 7, 27);
+GEN_VXFORM_NOA(vupkhpx, 7, 13);
+GEN_VXFORM_NOA(vupklpx, 7, 15);
+GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
+GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
+GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
+GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
+GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
+GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
+GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
+GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
+GEN_VXFORM_NOA(vprtybw, 1, 24);
+GEN_VXFORM_NOA(vprtybd, 1, 24);
+GEN_VXFORM_NOA(vprtybq, 1, 24);
+
+#define GEN_VXFORM_SIMM(name, opc2, opc3)                               \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+    {                                                                   \
+        TCGv_ptr rd;                                                    \
+        TCGv_i32 simm;                                                  \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        simm = tcg_const_i32(SIMM5(ctx->opcode));                       \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name (rd, simm);                                   \
+        tcg_temp_free_i32(simm);                                        \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXFORM_UIMM(name, opc2, opc3)                               \
+static void glue(gen_, name)(DisasContext *ctx)                                 \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+        TCGv_i32 uimm;                                                  \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name (rd, rb, uimm);                               \
+        tcg_temp_free_i32(uimm);                                        \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXFORM_UIMM_ENV(name, opc2, opc3)                           \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+        TCGv_i32 uimm;                                                  \
+                                                                        \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name(cpu_env, rd, rb, uimm);                       \
+        tcg_temp_free_i32(uimm);                                        \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+#define GEN_VXFORM_UIMM_SPLAT(name, opc2, opc3, splat_max)              \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+    {                                                                   \
+        TCGv_ptr rb, rd;                                                \
+        uint8_t uimm = UIMM4(ctx->opcode);                              \
+        TCGv_i32 t0 = tcg_temp_new_i32();                               \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        if (uimm > splat_max) {                                         \
+            uimm = 0;                                                   \
+        }                                                               \
+        tcg_gen_movi_i32(t0, uimm);                                     \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        gen_helper_##name(rd, rb, t0);                                  \
+        tcg_temp_free_i32(t0);                                          \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+GEN_VXFORM_UIMM(vspltb, 6, 8);
+GEN_VXFORM_UIMM(vsplth, 6, 9);
+GEN_VXFORM_UIMM(vspltw, 6, 10);
+GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
+GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
+GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
+GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
+GEN_VXFORM_UIMM_SPLAT(vinsertb, 6, 12, 15);
+GEN_VXFORM_UIMM_SPLAT(vinserth, 6, 13, 14);
+GEN_VXFORM_UIMM_SPLAT(vinsertw, 6, 14, 12);
+GEN_VXFORM_UIMM_SPLAT(vinsertd, 6, 15, 8);
+GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
+GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
+GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
+GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
+GEN_VXFORM_DUAL(vspltb, PPC_ALTIVEC, PPC_NONE,
+                vextractub, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
+                vextractuh, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
+                vextractuw, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltisb, PPC_ALTIVEC, PPC_NONE,
+                vinsertb, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltish, PPC_ALTIVEC, PPC_NONE,
+                vinserth, PPC_NONE, PPC2_ISA300);
+GEN_VXFORM_DUAL(vspltisw, PPC_ALTIVEC, PPC_NONE,
+                vinsertw, PPC_NONE, PPC2_ISA300);
+
+static void gen_vsldoi(DisasContext *ctx)
+{
+    TCGv_ptr ra, rb, rd;
+    TCGv_i32 sh;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rb = gen_avr_ptr(rB(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    sh = tcg_const_i32(VSH(ctx->opcode));
+    gen_helper_vsldoi (rd, ra, rb, sh);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rb);
+    tcg_temp_free_ptr(rd);
+    tcg_temp_free_i32(sh);
+}
+
+#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
+static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
+    {                                                                   \
+        TCGv_ptr ra, rb, rc, rd;                                        \
+        if (unlikely(!ctx->altivec_enabled)) {                          \
+            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
+            return;                                                     \
+        }                                                               \
+        ra = gen_avr_ptr(rA(ctx->opcode));                              \
+        rb = gen_avr_ptr(rB(ctx->opcode));                              \
+        rc = gen_avr_ptr(rC(ctx->opcode));                              \
+        rd = gen_avr_ptr(rD(ctx->opcode));                              \
+        if (Rc(ctx->opcode)) {                                          \
+            gen_helper_##name1(cpu_env, rd, ra, rb, rc);                \
+        } else {                                                        \
+            gen_helper_##name0(cpu_env, rd, ra, rb, rc);                \
+        }                                                               \
+        tcg_temp_free_ptr(ra);                                          \
+        tcg_temp_free_ptr(rb);                                          \
+        tcg_temp_free_ptr(rc);                                          \
+        tcg_temp_free_ptr(rd);                                          \
+    }
+
+GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
+
+static void gen_vmladduhm(DisasContext *ctx)
+{
+    TCGv_ptr ra, rb, rc, rd;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rb = gen_avr_ptr(rB(ctx->opcode));
+    rc = gen_avr_ptr(rC(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_vmladduhm(rd, ra, rb, rc);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rb);
+    tcg_temp_free_ptr(rc);
+    tcg_temp_free_ptr(rd);
+}
+
+static void gen_vpermr(DisasContext *ctx)
+{
+    TCGv_ptr ra, rb, rc, rd;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rb = gen_avr_ptr(rB(ctx->opcode));
+    rc = gen_avr_ptr(rC(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_vpermr(cpu_env, rd, ra, rb, rc);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rb);
+    tcg_temp_free_ptr(rc);
+    tcg_temp_free_ptr(rd);
+}
+
+GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18)
+GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19)
+GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20)
+GEN_VAFORM_PAIRED(vsel, vperm, 21)
+GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
+
+GEN_VXFORM_NOA(vclzb, 1, 28)
+GEN_VXFORM_NOA(vclzh, 1, 29)
+GEN_VXFORM_NOA(vclzw, 1, 30)
+GEN_VXFORM_NOA(vclzd, 1, 31)
+GEN_VXFORM_NOA_2(vnegw, 1, 24, 6)
+GEN_VXFORM_NOA_2(vnegd, 1, 24, 7)
+GEN_VXFORM_NOA_2(vextsb2w, 1, 24, 16)
+GEN_VXFORM_NOA_2(vextsh2w, 1, 24, 17)
+GEN_VXFORM_NOA_2(vextsb2d, 1, 24, 24)
+GEN_VXFORM_NOA_2(vextsh2d, 1, 24, 25)
+GEN_VXFORM_NOA_2(vextsw2d, 1, 24, 26)
+GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
+GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
+GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
+GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
+GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
+GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
+GEN_VXFORM_NOA(vpopcntb, 1, 28)
+GEN_VXFORM_NOA(vpopcnth, 1, 29)
+GEN_VXFORM_NOA(vpopcntw, 1, 30)
+GEN_VXFORM_NOA(vpopcntd, 1, 31)
+GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
+                vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM(vbpermd, 6, 23);
+GEN_VXFORM(vbpermq, 6, 21);
+GEN_VXFORM_NOA(vgbbd, 6, 20);
+GEN_VXFORM(vpmsumb, 4, 16)
+GEN_VXFORM(vpmsumh, 4, 17)
+GEN_VXFORM(vpmsumw, 4, 18)
+GEN_VXFORM(vpmsumd, 4, 19)
+
+#define GEN_BCD(op)                                 \
+static void gen_##op(DisasContext *ctx)             \
+{                                                   \
+    TCGv_ptr ra, rb, rd;                            \
+    TCGv_i32 ps;                                    \
+                                                    \
+    if (unlikely(!ctx->altivec_enabled)) {          \
+        gen_exception(ctx, POWERPC_EXCP_VPU);       \
+        return;                                     \
+    }                                               \
+                                                    \
+    ra = gen_avr_ptr(rA(ctx->opcode));              \
+    rb = gen_avr_ptr(rB(ctx->opcode));              \
+    rd = gen_avr_ptr(rD(ctx->opcode));              \
+                                                    \
+    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+                                                    \
+    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
+                                                    \
+    tcg_temp_free_ptr(ra);                          \
+    tcg_temp_free_ptr(rb);                          \
+    tcg_temp_free_ptr(rd);                          \
+    tcg_temp_free_i32(ps);                          \
+}
+
+#define GEN_BCD2(op)                                \
+static void gen_##op(DisasContext *ctx)             \
+{                                                   \
+    TCGv_ptr rd, rb;                                \
+    TCGv_i32 ps;                                    \
+                                                    \
+    if (unlikely(!ctx->altivec_enabled)) {          \
+        gen_exception(ctx, POWERPC_EXCP_VPU);       \
+        return;                                     \
+    }                                               \
+                                                    \
+    rb = gen_avr_ptr(rB(ctx->opcode));              \
+    rd = gen_avr_ptr(rD(ctx->opcode));              \
+                                                    \
+    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+                                                    \
+    gen_helper_##op(cpu_crf[6], rd, rb, ps);        \
+                                                    \
+    tcg_temp_free_ptr(rb);                          \
+    tcg_temp_free_ptr(rd);                          \
+    tcg_temp_free_i32(ps);                          \
+}
+
+GEN_BCD(bcdadd)
+GEN_BCD(bcdsub)
+GEN_BCD2(bcdcfn)
+GEN_BCD2(bcdctn)
+GEN_BCD2(bcdcfz)
+GEN_BCD2(bcdctz)
+
+static void gen_xpnd04_1(DisasContext *ctx)
+{
+    switch (opc4(ctx->opcode)) {
+    case 4:
+        gen_bcdctz(ctx);
+        break;
+    case 5:
+        gen_bcdctn(ctx);
+        break;
+    case 6:
+        gen_bcdcfz(ctx);
+        break;
+    case 7:
+        gen_bcdcfn(ctx);
+        break;
+    default:
+        gen_invalid(ctx);
+        break;
+    }
+}
+
+static void gen_xpnd04_2(DisasContext *ctx)
+{
+    switch (opc4(ctx->opcode)) {
+    case 4:
+        gen_bcdctz(ctx);
+        break;
+    case 6:
+        gen_bcdcfz(ctx);
+        break;
+    case 7:
+        gen_bcdcfn(ctx);
+        break;
+    default:
+        gen_invalid(ctx);
+        break;
+    }
+}
+
+GEN_VXFORM_DUAL(vsubcuw, PPC_ALTIVEC, PPC_NONE, \
+                xpnd04_1, PPC_NONE, PPC2_ISA300)
+GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \
+                xpnd04_2, PPC_NONE, PPC2_ISA300)
+
+GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
+                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
+                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
+                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
+                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
+
+static void gen_vsbox(DisasContext *ctx)
+{
+    TCGv_ptr ra, rd;
+    if (unlikely(!ctx->altivec_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VPU);
+        return;
+    }
+    ra = gen_avr_ptr(rA(ctx->opcode));
+    rd = gen_avr_ptr(rD(ctx->opcode));
+    gen_helper_vsbox(rd, ra);
+    tcg_temp_free_ptr(ra);
+    tcg_temp_free_ptr(rd);
+}
+
+GEN_VXFORM(vcipher, 4, 20)
+GEN_VXFORM(vcipherlast, 4, 20)
+GEN_VXFORM(vncipher, 4, 21)
+GEN_VXFORM(vncipherlast, 4, 21)
+
+GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
+                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
+
+#define VSHASIGMA(op)                         \
+static void gen_##op(DisasContext *ctx)       \
+{                                             \
+    TCGv_ptr ra, rd;                          \
+    TCGv_i32 st_six;                          \
+    if (unlikely(!ctx->altivec_enabled)) {    \
+        gen_exception(ctx, POWERPC_EXCP_VPU); \
+        return;                               \
+    }                                         \
+    ra = gen_avr_ptr(rA(ctx->opcode));        \
+    rd = gen_avr_ptr(rD(ctx->opcode));        \
+    st_six = tcg_const_i32(rB(ctx->opcode));  \
+    gen_helper_##op(rd, ra, st_six);          \
+    tcg_temp_free_ptr(ra);                    \
+    tcg_temp_free_ptr(rd);                    \
+    tcg_temp_free_i32(st_six);                \
+}
+
+VSHASIGMA(vshasigmaw)
+VSHASIGMA(vshasigmad)
+
+GEN_VXFORM3(vpermxor, 22, 0xFF)
+GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
+                vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
+
+#undef GEN_VR_LDX
+#undef GEN_VR_STX
+#undef GEN_VR_LVE
+#undef GEN_VR_STVE
+
+#undef GEN_VX_LOGICAL
+#undef GEN_VX_LOGICAL_207
+#undef GEN_VXFORM
+#undef GEN_VXFORM_207
+#undef GEN_VXFORM_DUAL
+#undef GEN_VXRFORM_DUAL
+#undef GEN_VXRFORM1
+#undef GEN_VXRFORM
+#undef GEN_VXFORM_SIMM
+#undef GEN_VXFORM_NOA
+#undef GEN_VXFORM_UIMM
+#undef GEN_VAFORM_PAIRED
+
+#undef GEN_BCD2
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
new file mode 100644
index 0000000000..f02b3bed50
--- /dev/null
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -0,0 +1,294 @@
+#define GEN_VR_LDX(name, opc2, opc3)                                          \
+GEN_HANDLER(name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
+#define GEN_VR_STX(name, opc2, opc3)                                          \
+GEN_HANDLER(st##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
+#define GEN_VR_LVE(name, opc2, opc3)                                    \
+    GEN_HANDLER(lve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
+#define GEN_VR_STVE(name, opc2, opc3)                                   \
+    GEN_HANDLER(stve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC)
+GEN_VR_LDX(lvx, 0x07, 0x03),
+GEN_VR_LDX(lvxl, 0x07, 0x0B),
+GEN_VR_LVE(bx, 0x07, 0x00),
+GEN_VR_LVE(hx, 0x07, 0x01),
+GEN_VR_LVE(wx, 0x07, 0x02),
+GEN_VR_STX(svx, 0x07, 0x07),
+GEN_VR_STX(svxl, 0x07, 0x0F),
+GEN_VR_STVE(bx, 0x07, 0x04),
+GEN_VR_STVE(hx, 0x07, 0x05),
+GEN_VR_STVE(wx, 0x07, 0x06),
+
+#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3)                        \
+GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
+
+#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
+
+GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16),
+GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17),
+GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18),
+GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19),
+GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20),
+GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26),
+GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22),
+GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21),
+
+#define GEN_VXFORM(name, opc2, opc3)                                    \
+GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
+
+#define GEN_VXFORM_207(name, opc2, opc3) \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207)
+
+#define GEN_VXFORM_300(name, opc2, opc3)                                \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300)
+
+#define GEN_VXFORM_300_EXT(name, opc2, opc3, inval)                     \
+GEN_HANDLER_E(name, 0x04, opc2, opc3, inval, PPC_NONE, PPC2_ISA300)
+
+#define GEN_VXFORM_300_EO(name, opc2, opc3, opc4)                     \
+GEN_HANDLER_E_2(name, 0x04, opc2, opc3, opc4, 0x00000000, PPC_NONE,     \
+                                                       PPC2_ISA300)
+
+#define GEN_VXFORM_DUAL(name0, name1, opc2, opc3, type0, type1) \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, type0, type1)
+
+#define GEN_VXRFORM_DUAL(name0, name1, opc2, opc3, tp0, tp1) \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, opc3, 0x00000000, tp0, tp1), \
+GEN_HANDLER_E(name0##_##name1, 0x4, opc2, (opc3 | 0x10), 0x00000000, tp0, tp1),
+
+GEN_VXFORM_DUAL(vaddubm, vmul10cuq, 0, 0, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vadduhm, vmul10ecuq, 0, 1, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vadduwm, 0, 2),
+GEN_VXFORM_207(vaddudm, 0, 3),
+GEN_VXFORM_DUAL(vsububm, bcdadd, 0, 16, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vsubuhm, bcdsub, 0, 17, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vsubuwm, 0, 18),
+GEN_VXFORM_207(vsubudm, 0, 19),
+GEN_VXFORM(vmaxub, 1, 0),
+GEN_VXFORM(vmaxuh, 1, 1),
+GEN_VXFORM(vmaxuw, 1, 2),
+GEN_VXFORM_207(vmaxud, 1, 3),
+GEN_VXFORM(vmaxsb, 1, 4),
+GEN_VXFORM(vmaxsh, 1, 5),
+GEN_VXFORM(vmaxsw, 1, 6),
+GEN_VXFORM_207(vmaxsd, 1, 7),
+GEN_VXFORM(vminub, 1, 8),
+GEN_VXFORM(vminuh, 1, 9),
+GEN_VXFORM(vminuw, 1, 10),
+GEN_VXFORM_207(vminud, 1, 11),
+GEN_VXFORM(vminsb, 1, 12),
+GEN_VXFORM(vminsh, 1, 13),
+GEN_VXFORM(vminsw, 1, 14),
+GEN_VXFORM_207(vminsd, 1, 15),
+GEN_VXFORM_DUAL(vavgub, vabsdub, 1, 16, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vavguh, vabsduh, 1, 17, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vavguw, vabsduw, 1, 18, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vavgsb, 1, 20),
+GEN_VXFORM(vavgsh, 1, 21),
+GEN_VXFORM(vavgsw, 1, 22),
+GEN_VXFORM(vmrghb, 6, 0),
+GEN_VXFORM(vmrghh, 6, 1),
+GEN_VXFORM(vmrghw, 6, 2),
+GEN_VXFORM(vmrglb, 6, 4),
+GEN_VXFORM(vmrglh, 6, 5),
+GEN_VXFORM(vmrglw, 6, 6),
+GEN_VXFORM_207(vmrgew, 6, 30),
+GEN_VXFORM_207(vmrgow, 6, 26),
+GEN_VXFORM(vmuloub, 4, 0),
+GEN_VXFORM(vmulouh, 4, 1),
+GEN_VXFORM_DUAL(vmulouw, vmuluwm, 4, 2, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vmulosb, 4, 4),
+GEN_VXFORM(vmulosh, 4, 5),
+GEN_VXFORM_207(vmulosw, 4, 6),
+GEN_VXFORM(vmuleub, 4, 8),
+GEN_VXFORM(vmuleuh, 4, 9),
+GEN_VXFORM_207(vmuleuw, 4, 10),
+GEN_VXFORM(vmulesb, 4, 12),
+GEN_VXFORM(vmulesh, 4, 13),
+GEN_VXFORM_207(vmulesw, 4, 14),
+GEN_VXFORM(vslb, 2, 4),
+GEN_VXFORM(vslh, 2, 5),
+GEN_VXFORM_DUAL(vslw, vrlwnm, 2, 6, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_207(vsld, 2, 23),
+GEN_VXFORM(vsrb, 2, 8),
+GEN_VXFORM(vsrh, 2, 9),
+GEN_VXFORM(vsrw, 2, 10),
+GEN_VXFORM_207(vsrd, 2, 27),
+GEN_VXFORM(vsrab, 2, 12),
+GEN_VXFORM(vsrah, 2, 13),
+GEN_VXFORM(vsraw, 2, 14),
+GEN_VXFORM_207(vsrad, 2, 15),
+GEN_VXFORM_300(vsrv, 2, 28),
+GEN_VXFORM_300(vslv, 2, 29),
+GEN_VXFORM(vslo, 6, 16),
+GEN_VXFORM(vsro, 6, 17),
+GEN_VXFORM(vaddcuw, 0, 6),
+GEN_HANDLER_E_2(vprtybw, 0x4, 0x1, 0x18, 8, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E_2(vprtybd, 0x4, 0x1, 0x18, 9, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E_2(vprtybq, 0x4, 0x1, 0x18, 10, 0, PPC_NONE, PPC2_ISA300),
+
+GEN_VXFORM_DUAL(vsubcuw, xpnd04_1, 0, 22, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vaddubs, vmul10uq, 0, 8, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vadduhs, vmul10euq, 0, 9, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vadduws, 0, 10),
+GEN_VXFORM(vaddsbs, 0, 12),
+GEN_VXFORM(vaddshs, 0, 13),
+GEN_VXFORM(vaddsws, 0, 14),
+GEN_VXFORM_DUAL(vsububs, bcdadd, 0, 24, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vsubuhs, bcdsub, 0, 25, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vsubuws, 0, 26),
+GEN_VXFORM(vsubsbs, 0, 28),
+GEN_VXFORM(vsubshs, 0, 29),
+GEN_VXFORM_DUAL(vsubsws, xpnd04_2, 0, 30, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_207(vadduqm, 0, 4),
+GEN_VXFORM_207(vaddcuq, 0, 5),
+GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_207(vsubuqm, 0, 20),
+GEN_VXFORM_207(vsubcuq, 0, 21),
+GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM(vrlb, 2, 0),
+GEN_VXFORM(vrlh, 2, 1),
+GEN_VXFORM_DUAL(vrlw, vrlwmi, 2, 2, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vrld, vrldmi, 2, 3, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vsl, vrldnm, 2, 7, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM(vsr, 2, 11),
+GEN_VXFORM(vpkuhum, 7, 0),
+GEN_VXFORM(vpkuwum, 7, 1),
+GEN_VXFORM_207(vpkudum, 7, 17),
+GEN_VXFORM(vpkuhus, 7, 2),
+GEN_VXFORM(vpkuwus, 7, 3),
+GEN_VXFORM_207(vpkudus, 7, 19),
+GEN_VXFORM(vpkshus, 7, 4),
+GEN_VXFORM(vpkswus, 7, 5),
+GEN_VXFORM_207(vpksdus, 7, 21),
+GEN_VXFORM(vpkshss, 7, 6),
+GEN_VXFORM(vpkswss, 7, 7),
+GEN_VXFORM_207(vpksdss, 7, 23),
+GEN_VXFORM(vpkpx, 7, 12),
+GEN_VXFORM(vsum4ubs, 4, 24),
+GEN_VXFORM(vsum4sbs, 4, 28),
+GEN_VXFORM(vsum4shs, 4, 25),
+GEN_VXFORM(vsum2sws, 4, 26),
+GEN_VXFORM(vsumsws, 4, 30),
+GEN_VXFORM(vaddfp, 5, 0),
+GEN_VXFORM(vsubfp, 5, 1),
+GEN_VXFORM(vmaxfp, 5, 16),
+GEN_VXFORM(vminfp, 5, 17),
+
+#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
+    GEN_HANDLER2(name, str, 0x4, opc2, opc3, 0x00000000, PPC_ALTIVEC),
+#define GEN_VXRFORM1_300(opname, name, str, opc2, opc3)                 \
+GEN_HANDLER2_E(name, str, 0x4, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ISA300),
+#define GEN_VXRFORM(name, opc2, opc3)                                \
+    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
+    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
+#define GEN_VXRFORM_300(name, opc2, opc3)                                   \
+    GEN_VXRFORM1_300(name, name, #name, opc2, opc3)                         \
+    GEN_VXRFORM1_300(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
+
+GEN_VXRFORM_300(vcmpnezb, 3, 4)
+GEN_VXRFORM_300(vcmpnezh, 3, 5)
+GEN_VXRFORM_300(vcmpnezw, 3, 6)
+GEN_VXRFORM(vcmpgtsb, 3, 12)
+GEN_VXRFORM(vcmpgtsh, 3, 13)
+GEN_VXRFORM(vcmpgtsw, 3, 14)
+GEN_VXRFORM(vcmpgtub, 3, 8)
+GEN_VXRFORM(vcmpgtuh, 3, 9)
+GEN_VXRFORM(vcmpgtuw, 3, 10)
+GEN_VXRFORM_DUAL(vcmpeqfp, vcmpequd, 3, 3, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM(vcmpgefp, 3, 7)
+GEN_VXRFORM_DUAL(vcmpgtfp, vcmpgtud, 3, 11, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpbfp, vcmpgtsd, 3, 15, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpequb, vcmpneb, 3, 0, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpequh, vcmpneh, 3, 1, PPC_ALTIVEC, PPC_NONE)
+GEN_VXRFORM_DUAL(vcmpequw, vcmpnew, 3, 2, PPC_ALTIVEC, PPC_NONE)
+
+#define GEN_VXFORM_DUAL_INV(name0, name1, opc2, opc3, inval0, inval1, type) \
+GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, \
+                                                               PPC_NONE)
+GEN_VXFORM_DUAL_INV(vspltb, vextractub, 6, 8, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_DUAL_INV(vsplth, vextractuh, 6, 9, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_DUAL_INV(vspltw, vextractuw, 6, 10, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_300_EXT(vextractd, 6, 11, 0x100000),
+GEN_VXFORM_DUAL_INV(vspltisb, vinsertb, 6, 12, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_DUAL_INV(vspltish, vinserth, 6, 13, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_DUAL_INV(vspltisw, vinsertw, 6, 14, 0x00000000, 0x100000,
+                                               PPC_ALTIVEC),
+GEN_VXFORM_300_EXT(vinsertd, 6, 15, 0x100000),
+GEN_VXFORM_300_EO(vnegw, 0x01, 0x18, 0x06),
+GEN_VXFORM_300_EO(vnegd, 0x01, 0x18, 0x07),
+GEN_VXFORM_300_EO(vextsb2w, 0x01, 0x18, 0x10),
+GEN_VXFORM_300_EO(vextsh2w, 0x01, 0x18, 0x11),
+GEN_VXFORM_300_EO(vextsb2d, 0x01, 0x18, 0x18),
+GEN_VXFORM_300_EO(vextsh2d, 0x01, 0x18, 0x19),
+GEN_VXFORM_300_EO(vextsw2d, 0x01, 0x18, 0x1A),
+GEN_VXFORM_300_EO(vctzb, 0x01, 0x18, 0x1C),
+GEN_VXFORM_300_EO(vctzh, 0x01, 0x18, 0x1D),
+GEN_VXFORM_300_EO(vctzw, 0x01, 0x18, 0x1E),
+GEN_VXFORM_300_EO(vctzd, 0x01, 0x18, 0x1F),
+GEN_VXFORM_300_EO(vclzlsbb, 0x01, 0x18, 0x0),
+GEN_VXFORM_300_EO(vctzlsbb, 0x01, 0x18, 0x1),
+GEN_VXFORM_300(vpermr, 0x1D, 0xFF),
+
+#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
+    GEN_HANDLER(name, 0x04, opc2, opc3, 0x001f0000, PPC_ALTIVEC)
+GEN_VXFORM_NOA(vupkhsb, 7, 8),
+GEN_VXFORM_NOA(vupkhsh, 7, 9),
+GEN_VXFORM_207(vupkhsw, 7, 25),
+GEN_VXFORM_NOA(vupklsb, 7, 10),
+GEN_VXFORM_NOA(vupklsh, 7, 11),
+GEN_VXFORM_207(vupklsw, 7, 27),
+GEN_VXFORM_NOA(vupkhpx, 7, 13),
+GEN_VXFORM_NOA(vupklpx, 7, 15),
+GEN_VXFORM_NOA(vrefp, 5, 4),
+GEN_VXFORM_NOA(vrsqrtefp, 5, 5),
+GEN_VXFORM_NOA(vexptefp, 5, 6),
+GEN_VXFORM_NOA(vlogefp, 5, 7),
+GEN_VXFORM_NOA(vrfim, 5, 11),
+GEN_VXFORM_NOA(vrfin, 5, 8),
+GEN_VXFORM_NOA(vrfip, 5, 10),
+GEN_VXFORM_NOA(vrfiz, 5, 9),
+
+#define GEN_VXFORM_UIMM(name, opc2, opc3)                               \
+    GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC)
+GEN_VXFORM_UIMM(vcfux, 5, 12),
+GEN_VXFORM_UIMM(vcfsx, 5, 13),
+GEN_VXFORM_UIMM(vctuxs, 5, 14),
+GEN_VXFORM_UIMM(vctsxs, 5, 15),
+
+
+#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
+    GEN_HANDLER(name0##_##name1, 0x04, opc2, 0xFF, 0x00000000, PPC_ALTIVEC)
+GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16),
+GEN_VAFORM_PAIRED(vmsumubm, vmsummbm, 18),
+GEN_VAFORM_PAIRED(vmsumuhm, vmsumuhs, 19),
+GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20),
+GEN_VAFORM_PAIRED(vsel, vperm, 21),
+GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23),
+
+GEN_VXFORM_DUAL(vclzb, vpopcntb, 1, 28, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzh, vpopcnth, 1, 29, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzw, vpopcntw, 1, 30, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vclzd, vpopcntd, 1, 31, PPC_NONE, PPC2_ALTIVEC_207),
+
+GEN_VXFORM_300(vbpermd, 6, 23),
+GEN_VXFORM_207(vbpermq, 6, 21),
+GEN_VXFORM_207(vgbbd, 6, 20),
+GEN_VXFORM_207(vpmsumb, 4, 16),
+GEN_VXFORM_207(vpmsumh, 4, 17),
+GEN_VXFORM_207(vpmsumw, 4, 18),
+GEN_VXFORM_207(vpmsumd, 4, 19),
+
+GEN_VXFORM_207(vsbox, 4, 23),
+
+GEN_VXFORM_DUAL(vcipher, vcipherlast, 4, 20, PPC_NONE, PPC2_ALTIVEC_207),
+GEN_VXFORM_DUAL(vncipher, vncipherlast, 4, 21, PPC_NONE, PPC2_ALTIVEC_207),
+
+GEN_VXFORM_207(vshasigmaw, 1, 26),
+GEN_VXFORM_207(vshasigmad, 1, 27),
+
+GEN_VXFORM_DUAL(vsldoi, vpermxor, 22, 0xFF, PPC_ALTIVEC, PPC_NONE),
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
new file mode 100644
index 0000000000..5a27be4bd4
--- /dev/null
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -0,0 +1,1009 @@
+/***                           VSX extension                               ***/
+
+static inline TCGv_i64 cpu_vsrh(int n)
+{
+    if (n < 32) {
+        return cpu_fpr[n];
+    } else {
+        return cpu_avrh[n-32];
+    }
+}
+
+static inline TCGv_i64 cpu_vsrl(int n)
+{
+    if (n < 32) {
+        return cpu_vsr[n];
+    } else {
+        return cpu_avrl[n-32];
+    }
+}
+
+#define VSX_LOAD_SCALAR(name, operation)                      \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                             \
+    TCGv EA;                                                  \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                     \
+    EA = tcg_temp_new();                                      \
+    gen_addr_reg_index(ctx, EA);                              \
+    gen_qemu_##operation(ctx, cpu_vsrh(xT(ctx->opcode)), EA); \
+    /* NOTE: cpu_vsrl is undefined */                         \
+    tcg_temp_free(EA);                                        \
+}
+
+VSX_LOAD_SCALAR(lxsdx, ld64_i64)
+VSX_LOAD_SCALAR(lxsiwax, ld32s_i64)
+VSX_LOAD_SCALAR(lxsibzx, ld8u_i64)
+VSX_LOAD_SCALAR(lxsihzx, ld16u_i64)
+VSX_LOAD_SCALAR(lxsiwzx, ld32u_i64)
+VSX_LOAD_SCALAR(lxsspx, ld32fs)
+
+static void gen_lxvd2x(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_ld64_i64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
+    tcg_gen_addi_tl(EA, EA, 8);
+    gen_qemu_ld64_i64(ctx, cpu_vsrl(xT(ctx->opcode)), EA);
+    tcg_temp_free(EA);
+}
+
+static void gen_lxvdsx(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_ld64_i64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
+    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xT(ctx->opcode)));
+    tcg_temp_free(EA);
+}
+
+static void gen_lxvw4x(DisasContext *ctx)
+{
+    TCGv EA;
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+
+    gen_addr_reg_index(ctx, EA);
+    if (ctx->le_mode) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+
+        tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
+        tcg_gen_shri_i64(t1, t0, 32);
+        tcg_gen_deposit_i64(xth, t1, t0, 32, 32);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_ld_i64(t0, EA, ctx->mem_idx, MO_LEQ);
+        tcg_gen_shri_i64(t1, t0, 32);
+        tcg_gen_deposit_i64(xtl, t1, t0, 32, 32);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    } else {
+        tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
+    }
+    tcg_temp_free(EA);
+}
+
+static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
+                          TCGv_i64 inh, TCGv_i64 inl)
+{
+    TCGv_i64 mask = tcg_const_i64(0x00FF00FF00FF00FF);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    /* outh = ((inh & mask) << 8) | ((inh >> 8) & mask) */
+    tcg_gen_and_i64(t0, inh, mask);
+    tcg_gen_shli_i64(t0, t0, 8);
+    tcg_gen_shri_i64(t1, inh, 8);
+    tcg_gen_and_i64(t1, t1, mask);
+    tcg_gen_or_i64(outh, t0, t1);
+
+    /* outl = ((inl & mask) << 8) | ((inl >> 8) & mask) */
+    tcg_gen_and_i64(t0, inl, mask);
+    tcg_gen_shli_i64(t0, t0, 8);
+    tcg_gen_shri_i64(t1, inl, 8);
+    tcg_gen_and_i64(t1, t1, mask);
+    tcg_gen_or_i64(outl, t0, t1);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(mask);
+}
+
+static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
+                          TCGv_i64 inh, TCGv_i64 inl)
+{
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 lo = tcg_temp_new_i64();
+
+    tcg_gen_bswap64_i64(hi, inh);
+    tcg_gen_bswap64_i64(lo, inl);
+    tcg_gen_shri_i64(outh, hi, 32);
+    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
+    tcg_gen_shri_i64(outl, lo, 32);
+    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
+
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(lo);
+}
+static void gen_lxvh8x(DisasContext *ctx)
+{
+    TCGv EA;
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
+    tcg_gen_addi_tl(EA, EA, 8);
+    tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
+    if (ctx->le_mode) {
+        gen_bswap16x8(xth, xtl, xth, xtl);
+    }
+    tcg_temp_free(EA);
+}
+
+static void gen_lxvb16x(DisasContext *ctx)
+{
+    TCGv EA;
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
+    tcg_gen_addi_tl(EA, EA, 8);
+    tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
+    tcg_temp_free(EA);
+}
+
+#define VSX_STORE_SCALAR(name, operation)                     \
+static void gen_##name(DisasContext *ctx)                     \
+{                                                             \
+    TCGv EA;                                                  \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    gen_set_access_type(ctx, ACCESS_INT);                     \
+    EA = tcg_temp_new();                                      \
+    gen_addr_reg_index(ctx, EA);                              \
+    gen_qemu_##operation(ctx, cpu_vsrh(xS(ctx->opcode)), EA); \
+    tcg_temp_free(EA);                                        \
+}
+
+VSX_STORE_SCALAR(stxsdx, st64_i64)
+
+VSX_STORE_SCALAR(stxsibx, st8_i64)
+VSX_STORE_SCALAR(stxsihx, st16_i64)
+VSX_STORE_SCALAR(stxsiwx, st32_i64)
+VSX_STORE_SCALAR(stxsspx, st32fs)
+
+static void gen_stxvd2x(DisasContext *ctx)
+{
+    TCGv EA;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    gen_qemu_st64_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
+    tcg_gen_addi_tl(EA, EA, 8);
+    gen_qemu_st64_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
+    tcg_temp_free(EA);
+}
+
+static void gen_stxvw4x(DisasContext *ctx)
+{
+    TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode));
+    TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode));
+    TCGv EA;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    if (ctx->le_mode) {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+
+        tcg_gen_shri_i64(t0, xsh, 32);
+        tcg_gen_deposit_i64(t1, t0, xsh, 32, 32);
+        tcg_gen_qemu_st_i64(t1, EA, ctx->mem_idx, MO_LEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_shri_i64(t0, xsl, 32);
+        tcg_gen_deposit_i64(t1, t0, xsl, 32, 32);
+        tcg_gen_qemu_st_i64(t1, EA, ctx->mem_idx, MO_LEQ);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    } else {
+        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ);
+    }
+    tcg_temp_free(EA);
+}
+
+static void gen_stxvh8x(DisasContext *ctx)
+{
+    TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode));
+    TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode));
+    TCGv EA;
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    if (ctx->le_mode) {
+        TCGv_i64 outh = tcg_temp_new_i64();
+        TCGv_i64 outl = tcg_temp_new_i64();
+
+        gen_bswap16x8(outh, outl, xsh, xsl);
+        tcg_gen_qemu_st_i64(outh, EA, ctx->mem_idx, MO_BEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_st_i64(outl, EA, ctx->mem_idx, MO_BEQ);
+        tcg_temp_free_i64(outh);
+        tcg_temp_free_i64(outl);
+    } else {
+        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ);
+    }
+    tcg_temp_free(EA);
+}
+
+static void gen_stxvb16x(DisasContext *ctx)
+{
+    TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode));
+    TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode));
+    TCGv EA;
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_set_access_type(ctx, ACCESS_INT);
+    EA = tcg_temp_new();
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ);
+    tcg_gen_addi_tl(EA, EA, 8);
+    tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ);
+    tcg_temp_free(EA);
+}
+
+#define MV_VSRW(name, tcgop1, tcgop2, target, source)           \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    if (xS(ctx->opcode) < 32) {                                 \
+        if (unlikely(!ctx->fpu_enabled)) {                      \
+            gen_exception(ctx, POWERPC_EXCP_FPU);               \
+            return;                                             \
+        }                                                       \
+    } else {                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VPU);               \
+            return;                                             \
+        }                                                       \
+    }                                                           \
+    TCGv_i64 tmp = tcg_temp_new_i64();                          \
+    tcg_gen_##tcgop1(tmp, source);                              \
+    tcg_gen_##tcgop2(target, tmp);                              \
+    tcg_temp_free_i64(tmp);                                     \
+}
+
+
+MV_VSRW(mfvsrwz, ext32u_i64, trunc_i64_tl, cpu_gpr[rA(ctx->opcode)], \
+        cpu_vsrh(xS(ctx->opcode)))
+MV_VSRW(mtvsrwa, extu_tl_i64, ext32s_i64, cpu_vsrh(xT(ctx->opcode)), \
+        cpu_gpr[rA(ctx->opcode)])
+MV_VSRW(mtvsrwz, extu_tl_i64, ext32u_i64, cpu_vsrh(xT(ctx->opcode)), \
+        cpu_gpr[rA(ctx->opcode)])
+
+#if defined(TARGET_PPC64)
+#define MV_VSRD(name, target, source)                           \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    if (xS(ctx->opcode) < 32) {                                 \
+        if (unlikely(!ctx->fpu_enabled)) {                      \
+            gen_exception(ctx, POWERPC_EXCP_FPU);               \
+            return;                                             \
+        }                                                       \
+    } else {                                                    \
+        if (unlikely(!ctx->altivec_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VPU);               \
+            return;                                             \
+        }                                                       \
+    }                                                           \
+    tcg_gen_mov_i64(target, source);                            \
+}
+
+MV_VSRD(mfvsrd, cpu_gpr[rA(ctx->opcode)], cpu_vsrh(xS(ctx->opcode)))
+MV_VSRD(mtvsrd, cpu_vsrh(xT(ctx->opcode)), cpu_gpr[rA(ctx->opcode)])
+
+static void gen_mfvsrld(DisasContext *ctx)
+{
+    if (xS(ctx->opcode) < 32) {
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
+            return;
+        }
+    } else {
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
+            return;
+        }
+    }
+
+    tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], cpu_vsrl(xS(ctx->opcode)));
+}
+
+static void gen_mtvsrdd(DisasContext *ctx)
+{
+    if (xT(ctx->opcode) < 32) {
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
+            return;
+        }
+    } else {
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
+            return;
+        }
+    }
+
+    if (!rA(ctx->opcode)) {
+        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);
+    } else {
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_gpr[rA(ctx->opcode)]);
+    }
+
+    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_gpr[rB(ctx->opcode)]);
+}
+
+static void gen_mtvsrws(DisasContext *ctx)
+{
+    if (xT(ctx->opcode) < 32) {
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
+            return;
+        }
+    } else {
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
+            return;
+        }
+    }
+
+    tcg_gen_deposit_i64(cpu_vsrl(xT(ctx->opcode)), cpu_gpr[rA(ctx->opcode)],
+                        cpu_gpr[rA(ctx->opcode)], 32, 32);
+    tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xT(ctx->opcode)));
+}
+
+#endif
+
+static void gen_xxpermdi(DisasContext *ctx)
+{
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+
+    if (unlikely((xT(ctx->opcode) == xA(ctx->opcode)) ||
+                 (xT(ctx->opcode) == xB(ctx->opcode)))) {
+        TCGv_i64 xh, xl;
+
+        xh = tcg_temp_new_i64();
+        xl = tcg_temp_new_i64();
+
+        if ((DM(ctx->opcode) & 2) == 0) {
+            tcg_gen_mov_i64(xh, cpu_vsrh(xA(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(xh, cpu_vsrl(xA(ctx->opcode)));
+        }
+        if ((DM(ctx->opcode) & 1) == 0) {
+            tcg_gen_mov_i64(xl, cpu_vsrh(xB(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(xl, cpu_vsrl(xB(ctx->opcode)));
+        }
+
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xh);
+        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xl);
+
+        tcg_temp_free_i64(xh);
+        tcg_temp_free_i64(xl);
+    } else {
+        if ((DM(ctx->opcode) & 2) == 0) {
+            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)));
+        }
+        if ((DM(ctx->opcode) & 1) == 0) {
+            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode)));
+        } else {
+            tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode)));
+        }
+    }
+}
+
+#define OP_ABS 1
+#define OP_NABS 2
+#define OP_NEG 3
+#define OP_CPSGN 4
+#define SGN_MASK_DP  0x8000000000000000ull
+#define SGN_MASK_SP 0x8000000080000000ull
+
+#define VSX_SCALAR_MOVE(name, op, sgn_mask)                       \
+static void glue(gen_, name)(DisasContext * ctx)                  \
+    {                                                             \
+        TCGv_i64 xb, sgm;                                         \
+        if (unlikely(!ctx->vsx_enabled)) {                        \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+            return;                                               \
+        }                                                         \
+        xb = tcg_temp_new_i64();                                  \
+        sgm = tcg_temp_new_i64();                                 \
+        tcg_gen_mov_i64(xb, cpu_vsrh(xB(ctx->opcode)));           \
+        tcg_gen_movi_i64(sgm, sgn_mask);                          \
+        switch (op) {                                             \
+            case OP_ABS: {                                        \
+                tcg_gen_andc_i64(xb, xb, sgm);                    \
+                break;                                            \
+            }                                                     \
+            case OP_NABS: {                                       \
+                tcg_gen_or_i64(xb, xb, sgm);                      \
+                break;                                            \
+            }                                                     \
+            case OP_NEG: {                                        \
+                tcg_gen_xor_i64(xb, xb, sgm);                     \
+                break;                                            \
+            }                                                     \
+            case OP_CPSGN: {                                      \
+                TCGv_i64 xa = tcg_temp_new_i64();                 \
+                tcg_gen_mov_i64(xa, cpu_vsrh(xA(ctx->opcode)));   \
+                tcg_gen_and_i64(xa, xa, sgm);                     \
+                tcg_gen_andc_i64(xb, xb, sgm);                    \
+                tcg_gen_or_i64(xb, xb, xa);                       \
+                tcg_temp_free_i64(xa);                            \
+                break;                                            \
+            }                                                     \
+        }                                                         \
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xb);           \
+        tcg_temp_free_i64(xb);                                    \
+        tcg_temp_free_i64(sgm);                                   \
+    }
+
+VSX_SCALAR_MOVE(xsabsdp, OP_ABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xsnabsdp, OP_NABS, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xsnegdp, OP_NEG, SGN_MASK_DP)
+VSX_SCALAR_MOVE(xscpsgndp, OP_CPSGN, SGN_MASK_DP)
+
+#define VSX_VECTOR_MOVE(name, op, sgn_mask)                      \
+static void glue(gen_, name)(DisasContext * ctx)                 \
+    {                                                            \
+        TCGv_i64 xbh, xbl, sgm;                                  \
+        if (unlikely(!ctx->vsx_enabled)) {                       \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);               \
+            return;                                              \
+        }                                                        \
+        xbh = tcg_temp_new_i64();                                \
+        xbl = tcg_temp_new_i64();                                \
+        sgm = tcg_temp_new_i64();                                \
+        tcg_gen_mov_i64(xbh, cpu_vsrh(xB(ctx->opcode)));         \
+        tcg_gen_mov_i64(xbl, cpu_vsrl(xB(ctx->opcode)));         \
+        tcg_gen_movi_i64(sgm, sgn_mask);                         \
+        switch (op) {                                            \
+            case OP_ABS: {                                       \
+                tcg_gen_andc_i64(xbh, xbh, sgm);                 \
+                tcg_gen_andc_i64(xbl, xbl, sgm);                 \
+                break;                                           \
+            }                                                    \
+            case OP_NABS: {                                      \
+                tcg_gen_or_i64(xbh, xbh, sgm);                   \
+                tcg_gen_or_i64(xbl, xbl, sgm);                   \
+                break;                                           \
+            }                                                    \
+            case OP_NEG: {                                       \
+                tcg_gen_xor_i64(xbh, xbh, sgm);                  \
+                tcg_gen_xor_i64(xbl, xbl, sgm);                  \
+                break;                                           \
+            }                                                    \
+            case OP_CPSGN: {                                     \
+                TCGv_i64 xah = tcg_temp_new_i64();               \
+                TCGv_i64 xal = tcg_temp_new_i64();               \
+                tcg_gen_mov_i64(xah, cpu_vsrh(xA(ctx->opcode))); \
+                tcg_gen_mov_i64(xal, cpu_vsrl(xA(ctx->opcode))); \
+                tcg_gen_and_i64(xah, xah, sgm);                  \
+                tcg_gen_and_i64(xal, xal, sgm);                  \
+                tcg_gen_andc_i64(xbh, xbh, sgm);                 \
+                tcg_gen_andc_i64(xbl, xbl, sgm);                 \
+                tcg_gen_or_i64(xbh, xbh, xah);                   \
+                tcg_gen_or_i64(xbl, xbl, xal);                   \
+                tcg_temp_free_i64(xah);                          \
+                tcg_temp_free_i64(xal);                          \
+                break;                                           \
+            }                                                    \
+        }                                                        \
+        tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xbh);         \
+        tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xbl);         \
+        tcg_temp_free_i64(xbh);                                  \
+        tcg_temp_free_i64(xbl);                                  \
+        tcg_temp_free_i64(sgm);                                  \
+    }
+
+VSX_VECTOR_MOVE(xvabsdp, OP_ABS, SGN_MASK_DP)
+VSX_VECTOR_MOVE(xvnabsdp, OP_NABS, SGN_MASK_DP)
+VSX_VECTOR_MOVE(xvnegdp, OP_NEG, SGN_MASK_DP)
+VSX_VECTOR_MOVE(xvcpsgndp, OP_CPSGN, SGN_MASK_DP)
+VSX_VECTOR_MOVE(xvabssp, OP_ABS, SGN_MASK_SP)
+VSX_VECTOR_MOVE(xvnabssp, OP_NABS, SGN_MASK_SP)
+VSX_VECTOR_MOVE(xvnegsp, OP_NEG, SGN_MASK_SP)
+VSX_VECTOR_MOVE(xvcpsgnsp, OP_CPSGN, SGN_MASK_SP)
+
+#define GEN_VSX_HELPER_2(name, op1, op2, inval, type)                         \
+static void gen_##name(DisasContext * ctx)                                    \
+{                                                                             \
+    TCGv_i32 opc;                                                             \
+    if (unlikely(!ctx->vsx_enabled)) {                                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                                \
+        return;                                                               \
+    }                                                                         \
+    opc = tcg_const_i32(ctx->opcode);                                         \
+    gen_helper_##name(cpu_env, opc);                                          \
+    tcg_temp_free_i32(opc);                                                   \
+}
+
+#define GEN_VSX_HELPER_XT_XB_ENV(name, op1, op2, inval, type) \
+static void gen_##name(DisasContext * ctx)                    \
+{                                                             \
+    if (unlikely(!ctx->vsx_enabled)) {                        \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                \
+        return;                                               \
+    }                                                         \
+    gen_helper_##name(cpu_vsrh(xT(ctx->opcode)), cpu_env,     \
+                      cpu_vsrh(xB(ctx->opcode)));             \
+}
+
+GEN_VSX_HELPER_2(xsadddp, 0x00, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssubdp, 0x00, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmuldp, 0x00, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsdivdp, 0x00, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsredp, 0x14, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xssqrtdp, 0x16, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrsqrtedp, 0x14, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstdivdp, 0x14, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xstsqrtdp, 0x14, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddadp, 0x04, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaddmdp, 0x04, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubadp, 0x04, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmsubmdp, 0x04, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddadp, 0x04, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmaddmdp, 0x04, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpeqdp, 0x0C, 0x00, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscmpgtdp, 0x0C, 0x01, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscmpgedp, 0x0C, 0x02, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscmpnedp, 0x0C, 0x03, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsp, 0x12, 0x10, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xscvdpspn, 0x16, 0x10, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvspdp, 0x12, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xscvspdpn, 0x16, 0x14, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvdpsxds, 0x10, 0x15, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpsxws, 0x10, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxds, 0x10, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvdpuxws, 0x10, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvsxddp, 0x10, 0x17, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xscvuxddp, 0x10, 0x16, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpi, 0x12, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpic, 0x16, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpim, 0x12, 0x07, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpip, 0x12, 0x06, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsrdpiz, 0x12, 0x05, 0, PPC2_VSX)
+GEN_VSX_HELPER_XT_XB_ENV(xsrsp, 0x12, 0x11, 0, PPC2_VSX207)
+
+GEN_VSX_HELPER_2(xsaddsp, 0x00, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xssubsp, 0x00, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmulsp, 0x00, 0x02, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsdivsp, 0x00, 0x03, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsresp, 0x14, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xssqrtsp, 0x16, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsrsqrtesp, 0x14, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddasp, 0x04, 0x00, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmaddmsp, 0x04, 0x01, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubasp, 0x04, 0x02, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsmsubmsp, 0x04, 0x03, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddasp, 0x04, 0x10, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmaddmsp, 0x04, 0x11, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubasp, 0x04, 0x12, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xsnmsubmsp, 0x04, 0x13, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvsxdsp, 0x10, 0x13, 0, PPC2_VSX207)
+GEN_VSX_HELPER_2(xscvuxdsp, 0x10, 0x12, 0, PPC2_VSX207)
+
+GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmuldp, 0x00, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivdp, 0x00, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvredp, 0x14, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtdp, 0x16, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtedp, 0x14, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivdp, 0x14, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtdp, 0x14, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddadp, 0x04, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmdp, 0x04, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubadp, 0x04, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmdp, 0x04, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqdp, 0x0C, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtdp, 0x0C, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgedp, 0x0C, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpnedp, 0x0C, 0x0F, 0, PPC2_ISA300)
+GEN_VSX_HELPER_2(xvcvdpsp, 0x12, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxds, 0x10, 0x1D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpsxws, 0x10, 0x0D, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxds, 0x10, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvdpuxws, 0x10, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxddp, 0x10, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxddp, 0x10, 0x1E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwdp, 0x10, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwdp, 0x10, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpi, 0x12, 0x0C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpic, 0x16, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpim, 0x12, 0x0F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpip, 0x12, 0x0E, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrdpiz, 0x12, 0x0D, 0, PPC2_VSX)
+
+GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmulsp, 0x00, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvdivsp, 0x00, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvresp, 0x14, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvsqrtsp, 0x16, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrsqrtesp, 0x14, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtdivsp, 0x14, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvtsqrtsp, 0x14, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddasp, 0x04, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaddmsp, 0x04, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubasp, 0x04, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmsubmsp, 0x04, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpeqsp, 0x0C, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgtsp, 0x0C, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpgesp, 0x0C, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcmpnesp, 0x0C, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspdp, 0x12, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxds, 0x10, 0x19, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspsxws, 0x10, 0x09, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxds, 0x10, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvspuxws, 0x10, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxdsp, 0x10, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxdsp, 0x10, 0x1A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvsxwsp, 0x10, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvcvuxwsp, 0x10, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspi, 0x12, 0x08, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspic, 0x16, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
+
+static void gen_xxbrd(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    tcg_gen_bswap64_i64(xth, xbh);
+    tcg_gen_bswap64_i64(xtl, xbl);
+}
+
+static void gen_xxbrh(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_bswap16x8(xth, xtl, xbh, xbl);
+}
+
+static void gen_xxbrq(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    tcg_gen_bswap64_i64(t0, xbl);
+    tcg_gen_bswap64_i64(xtl, xbh);
+    tcg_gen_mov_i64(xth, t0);
+    tcg_temp_free_i64(t0);
+}
+
+static void gen_xxbrw(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_bswap32x4(xth, xtl, xbh, xbl);
+}
+
+#define VSX_LOGICAL(name, tcg_op)                                    \
+static void glue(gen_, name)(DisasContext * ctx)                     \
+    {                                                                \
+        if (unlikely(!ctx->vsx_enabled)) {                           \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);                   \
+            return;                                                  \
+        }                                                            \
+        tcg_op(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)), \
+            cpu_vsrh(xB(ctx->opcode)));                              \
+        tcg_op(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)), \
+            cpu_vsrl(xB(ctx->opcode)));                              \
+    }
+
+VSX_LOGICAL(xxland, tcg_gen_and_i64)
+VSX_LOGICAL(xxlandc, tcg_gen_andc_i64)
+VSX_LOGICAL(xxlor, tcg_gen_or_i64)
+VSX_LOGICAL(xxlxor, tcg_gen_xor_i64)
+VSX_LOGICAL(xxlnor, tcg_gen_nor_i64)
+VSX_LOGICAL(xxleqv, tcg_gen_eqv_i64)
+VSX_LOGICAL(xxlnand, tcg_gen_nand_i64)
+VSX_LOGICAL(xxlorc, tcg_gen_orc_i64)
+
+#define VSX_XXMRG(name, high)                               \
+static void glue(gen_, name)(DisasContext * ctx)            \
+    {                                                       \
+        TCGv_i64 a0, a1, b0, b1;                            \
+        if (unlikely(!ctx->vsx_enabled)) {                  \
+            gen_exception(ctx, POWERPC_EXCP_VSXU);          \
+            return;                                         \
+        }                                                   \
+        a0 = tcg_temp_new_i64();                            \
+        a1 = tcg_temp_new_i64();                            \
+        b0 = tcg_temp_new_i64();                            \
+        b1 = tcg_temp_new_i64();                            \
+        if (high) {                                         \
+            tcg_gen_mov_i64(a0, cpu_vsrh(xA(ctx->opcode))); \
+            tcg_gen_mov_i64(a1, cpu_vsrh(xA(ctx->opcode))); \
+            tcg_gen_mov_i64(b0, cpu_vsrh(xB(ctx->opcode))); \
+            tcg_gen_mov_i64(b1, cpu_vsrh(xB(ctx->opcode))); \
+        } else {                                            \
+            tcg_gen_mov_i64(a0, cpu_vsrl(xA(ctx->opcode))); \
+            tcg_gen_mov_i64(a1, cpu_vsrl(xA(ctx->opcode))); \
+            tcg_gen_mov_i64(b0, cpu_vsrl(xB(ctx->opcode))); \
+            tcg_gen_mov_i64(b1, cpu_vsrl(xB(ctx->opcode))); \
+        }                                                   \
+        tcg_gen_shri_i64(a0, a0, 32);                       \
+        tcg_gen_shri_i64(b0, b0, 32);                       \
+        tcg_gen_deposit_i64(cpu_vsrh(xT(ctx->opcode)),      \
+                            b0, a0, 32, 32);                \
+        tcg_gen_deposit_i64(cpu_vsrl(xT(ctx->opcode)),      \
+                            b1, a1, 32, 32);                \
+        tcg_temp_free_i64(a0);                              \
+        tcg_temp_free_i64(a1);                              \
+        tcg_temp_free_i64(b0);                              \
+        tcg_temp_free_i64(b1);                              \
+    }
+
+VSX_XXMRG(xxmrghw, 1)
+VSX_XXMRG(xxmrglw, 0)
+
+static void gen_xxsel(DisasContext * ctx)
+{
+    TCGv_i64 a, b, c;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    a = tcg_temp_new_i64();
+    b = tcg_temp_new_i64();
+    c = tcg_temp_new_i64();
+
+    tcg_gen_mov_i64(a, cpu_vsrh(xA(ctx->opcode)));
+    tcg_gen_mov_i64(b, cpu_vsrh(xB(ctx->opcode)));
+    tcg_gen_mov_i64(c, cpu_vsrh(xC(ctx->opcode)));
+
+    tcg_gen_and_i64(b, b, c);
+    tcg_gen_andc_i64(a, a, c);
+    tcg_gen_or_i64(cpu_vsrh(xT(ctx->opcode)), a, b);
+
+    tcg_gen_mov_i64(a, cpu_vsrl(xA(ctx->opcode)));
+    tcg_gen_mov_i64(b, cpu_vsrl(xB(ctx->opcode)));
+    tcg_gen_mov_i64(c, cpu_vsrl(xC(ctx->opcode)));
+
+    tcg_gen_and_i64(b, b, c);
+    tcg_gen_andc_i64(a, a, c);
+    tcg_gen_or_i64(cpu_vsrl(xT(ctx->opcode)), a, b);
+
+    tcg_temp_free_i64(a);
+    tcg_temp_free_i64(b);
+    tcg_temp_free_i64(c);
+}
+
+static void gen_xxspltw(DisasContext *ctx)
+{
+    TCGv_i64 b, b2;
+    TCGv_i64 vsr = (UIM(ctx->opcode) & 2) ?
+                   cpu_vsrl(xB(ctx->opcode)) :
+                   cpu_vsrh(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+
+    b = tcg_temp_new_i64();
+    b2 = tcg_temp_new_i64();
+
+    if (UIM(ctx->opcode) & 1) {
+        tcg_gen_ext32u_i64(b, vsr);
+    } else {
+        tcg_gen_shri_i64(b, vsr, 32);
+    }
+
+    tcg_gen_shli_i64(b2, b, 32);
+    tcg_gen_or_i64(cpu_vsrh(xT(ctx->opcode)), b, b2);
+    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xT(ctx->opcode)));
+
+    tcg_temp_free_i64(b);
+    tcg_temp_free_i64(b2);
+}
+
+#define pattern(x) (((x) & 0xff) * (~(uint64_t)0 / 0xff))
+
+static void gen_xxspltib(DisasContext *ctx)
+{
+    unsigned char uim8 = IMM8(ctx->opcode);
+    if (xS(ctx->opcode) < 32) {
+        if (unlikely(!ctx->altivec_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VPU);
+            return;
+        }
+    } else {
+        if (unlikely(!ctx->vsx_enabled)) {
+            gen_exception(ctx, POWERPC_EXCP_VSXU);
+            return;
+        }
+    }
+    tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), pattern(uim8));
+    tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), pattern(uim8));
+}
+
+static void gen_xxsldwi(DisasContext *ctx)
+{
+    TCGv_i64 xth, xtl;
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    xth = tcg_temp_new_i64();
+    xtl = tcg_temp_new_i64();
+
+    switch (SHW(ctx->opcode)) {
+        case 0: {
+            tcg_gen_mov_i64(xth, cpu_vsrh(xA(ctx->opcode)));
+            tcg_gen_mov_i64(xtl, cpu_vsrl(xA(ctx->opcode)));
+            break;
+        }
+        case 1: {
+            TCGv_i64 t0 = tcg_temp_new_i64();
+            tcg_gen_mov_i64(xth, cpu_vsrh(xA(ctx->opcode)));
+            tcg_gen_shli_i64(xth, xth, 32);
+            tcg_gen_mov_i64(t0, cpu_vsrl(xA(ctx->opcode)));
+            tcg_gen_shri_i64(t0, t0, 32);
+            tcg_gen_or_i64(xth, xth, t0);
+            tcg_gen_mov_i64(xtl, cpu_vsrl(xA(ctx->opcode)));
+            tcg_gen_shli_i64(xtl, xtl, 32);
+            tcg_gen_mov_i64(t0, cpu_vsrh(xB(ctx->opcode)));
+            tcg_gen_shri_i64(t0, t0, 32);
+            tcg_gen_or_i64(xtl, xtl, t0);
+            tcg_temp_free_i64(t0);
+            break;
+        }
+        case 2: {
+            tcg_gen_mov_i64(xth, cpu_vsrl(xA(ctx->opcode)));
+            tcg_gen_mov_i64(xtl, cpu_vsrh(xB(ctx->opcode)));
+            break;
+        }
+        case 3: {
+            TCGv_i64 t0 = tcg_temp_new_i64();
+            tcg_gen_mov_i64(xth, cpu_vsrl(xA(ctx->opcode)));
+            tcg_gen_shli_i64(xth, xth, 32);
+            tcg_gen_mov_i64(t0, cpu_vsrh(xB(ctx->opcode)));
+            tcg_gen_shri_i64(t0, t0, 32);
+            tcg_gen_or_i64(xth, xth, t0);
+            tcg_gen_mov_i64(xtl, cpu_vsrh(xB(ctx->opcode)));
+            tcg_gen_shli_i64(xtl, xtl, 32);
+            tcg_gen_mov_i64(t0, cpu_vsrl(xB(ctx->opcode)));
+            tcg_gen_shri_i64(t0, t0, 32);
+            tcg_gen_or_i64(xtl, xtl, t0);
+            tcg_temp_free_i64(t0);
+            break;
+        }
+    }
+
+    tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), xth);
+    tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), xtl);
+
+    tcg_temp_free_i64(xth);
+    tcg_temp_free_i64(xtl);
+}
+
+#undef GEN_XX2FORM
+#undef GEN_XX3FORM
+#undef GEN_XX2IFORM
+#undef GEN_XX3_RC_FORM
+#undef GEN_XX3FORM_DM
+#undef VSX_LOGICAL
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
new file mode 100644
index 0000000000..3d9104155a
--- /dev/null
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -0,0 +1,300 @@
+GEN_HANDLER_E(lxsdx, 0x1F, 0x0C, 0x12, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxsiwax, 0x1F, 0x0C, 0x02, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxsiwzx, 0x1F, 0x0C, 0x00, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxsibzx, 0x1F, 0x0D, 0x18, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxsihzx, 0x1F, 0x0D, 0x19, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(lxsspx, 0x1F, 0x0C, 0x10, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxvdsx, 0x1F, 0x0C, 0x0A, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxvw4x, 0x1F, 0x0C, 0x18, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(lxvh8x, 0x1F, 0x0C, 0x19, 0, PPC_NONE,  PPC2_ISA300),
+GEN_HANDLER_E(lxvb16x, 0x1F, 0x0C, 0x1B, 0, PPC_NONE, PPC2_ISA300),
+
+GEN_HANDLER_E(stxsdx, 0x1F, 0xC, 0x16, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxsibx, 0x1F, 0xD, 0x1C, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stxsihx, 0x1F, 0xD, 0x1D, 0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(stxsiwx, 0x1F, 0xC, 0x04, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(stxsspx, 0x1F, 0xC, 0x14, 0, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxvw4x, 0x1F, 0xC, 0x1C, 0, PPC_NONE, PPC2_VSX),
+GEN_HANDLER_E(stxvh8x, 0x1F, 0x0C, 0x1D, 0, PPC_NONE,  PPC2_ISA300),
+GEN_HANDLER_E(stxvb16x, 0x1F, 0x0C, 0x1F, 0, PPC_NONE, PPC2_ISA300),
+
+GEN_HANDLER_E(mfvsrwz, 0x1F, 0x13, 0x03, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrwa, 0x1F, 0x13, 0x06, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrwz, 0x1F, 0x13, 0x07, 0x0000F800, PPC_NONE, PPC2_VSX207),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(mfvsrd, 0x1F, 0x13, 0x01, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mtvsrd, 0x1F, 0x13, 0x05, 0x0000F800, PPC_NONE, PPC2_VSX207),
+GEN_HANDLER_E(mfvsrld, 0X1F, 0x13, 0x09, 0x0000F800, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mtvsrdd, 0X1F, 0x13, 0x0D, 0x0, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(mtvsrws, 0x1F, 0x13, 0x0C, 0x0000F800, PPC_NONE, PPC2_ISA300),
+#endif
+
+#define GEN_XX1FORM(name, opc2, opc3, fl2)                              \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
+
+#define GEN_XX2FORM(name, opc2, opc3, fl2)                           \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
+
+#define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
+GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
+
+#define GEN_XX3FORM(name, opc2, opc3, fl2)                           \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
+
+#define GEN_XX2IFORM(name, opc2, opc3, fl2)                           \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 1, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 1, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 1, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 1, PPC_NONE, fl2)
+
+#define GEN_XX3_RC_FORM(name, opc2, opc3, fl2)                          \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x00, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x01, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x02, opc3 | 0x10, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x03, opc3 | 0x10, 0, PPC_NONE, fl2)
+
+#define GEN_XX3FORM_DM(name, opc2, opc3) \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX)
+
+GEN_XX2FORM(xsabsdp, 0x12, 0x15, PPC2_VSX),
+GEN_XX2FORM(xsnabsdp, 0x12, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsnegdp, 0x12, 0x17, PPC2_VSX),
+GEN_XX3FORM(xscpsgndp, 0x00, 0x16, PPC2_VSX),
+
+GEN_XX2FORM(xvabsdp, 0x12, 0x1D, PPC2_VSX),
+GEN_XX2FORM(xvnabsdp, 0x12, 0x1E, PPC2_VSX),
+GEN_XX2FORM(xvnegdp, 0x12, 0x1F, PPC2_VSX),
+GEN_XX3FORM(xvcpsgndp, 0x00, 0x1E, PPC2_VSX),
+GEN_XX2FORM(xvabssp, 0x12, 0x19, PPC2_VSX),
+GEN_XX2FORM(xvnabssp, 0x12, 0x1A, PPC2_VSX),
+GEN_XX2FORM(xvnegsp, 0x12, 0x1B, PPC2_VSX),
+GEN_XX3FORM(xvcpsgnsp, 0x00, 0x1A, PPC2_VSX),
+
+GEN_XX3FORM(xsadddp, 0x00, 0x04, PPC2_VSX),
+GEN_XX3FORM(xssubdp, 0x00, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmuldp, 0x00, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsdivdp, 0x00, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsredp,  0x14, 0x05, PPC2_VSX),
+GEN_XX2FORM(xssqrtdp,  0x16, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrsqrtedp,  0x14, 0x04, PPC2_VSX),
+GEN_XX3FORM(xstdivdp,  0x14, 0x07, PPC2_VSX),
+GEN_XX2FORM(xstsqrtdp,  0x14, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmaddadp, 0x04, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaddmdp, 0x04, 0x05, PPC2_VSX),
+GEN_XX3FORM(xsmsubadp, 0x04, 0x06, PPC2_VSX),
+GEN_XX3FORM(xsmsubmdp, 0x04, 0x07, PPC2_VSX),
+GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
+GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
+GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
+GEN_XX3FORM(xscmpeqdp, 0x0C, 0x00, PPC2_ISA300),
+GEN_XX3FORM(xscmpgtdp, 0x0C, 0x01, PPC2_ISA300),
+GEN_XX3FORM(xscmpgedp, 0x0C, 0x02, PPC2_ISA300),
+GEN_XX3FORM(xscmpnedp, 0x0C, 0x03, PPC2_ISA300),
+GEN_XX2IFORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
+GEN_XX2IFORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsp, 0x12, 0x10, PPC2_VSX),
+GEN_XX2FORM(xscvdpspn, 0x16, 0x10, PPC2_VSX207),
+GEN_XX2FORM(xscvspdp, 0x12, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvspdpn, 0x16, 0x14, PPC2_VSX207),
+GEN_XX2FORM(xscvdpsxds, 0x10, 0x15, PPC2_VSX),
+GEN_XX2FORM(xscvdpsxws, 0x10, 0x05, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxds, 0x10, 0x14, PPC2_VSX),
+GEN_XX2FORM(xscvdpuxws, 0x10, 0x04, PPC2_VSX),
+GEN_XX2FORM(xscvsxddp, 0x10, 0x17, PPC2_VSX),
+GEN_XX2FORM(xscvuxddp, 0x10, 0x16, PPC2_VSX),
+GEN_XX2FORM(xsrdpi, 0x12, 0x04, PPC2_VSX),
+GEN_XX2FORM(xsrdpic, 0x16, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpim, 0x12, 0x07, PPC2_VSX),
+GEN_XX2FORM(xsrdpip, 0x12, 0x06, PPC2_VSX),
+GEN_XX2FORM(xsrdpiz, 0x12, 0x05, PPC2_VSX),
+
+GEN_XX3FORM(xsaddsp, 0x00, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xssubsp, 0x00, 0x01, PPC2_VSX207),
+GEN_XX3FORM(xsmulsp, 0x00, 0x02, PPC2_VSX207),
+GEN_XX3FORM(xsdivsp, 0x00, 0x03, PPC2_VSX207),
+GEN_XX2FORM(xsresp,  0x14, 0x01, PPC2_VSX207),
+GEN_XX2FORM(xsrsp, 0x12, 0x11, PPC2_VSX207),
+GEN_XX2FORM(xssqrtsp,  0x16, 0x00, PPC2_VSX207),
+GEN_XX2FORM(xsrsqrtesp,  0x14, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddasp, 0x04, 0x00, PPC2_VSX207),
+GEN_XX3FORM(xsmaddmsp, 0x04, 0x01, PPC2_VSX207),
+GEN_XX3FORM(xsmsubasp, 0x04, 0x02, PPC2_VSX207),
+GEN_XX3FORM(xsmsubmsp, 0x04, 0x03, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddasp, 0x04, 0x10, PPC2_VSX207),
+GEN_XX3FORM(xsnmaddmsp, 0x04, 0x11, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubasp, 0x04, 0x12, PPC2_VSX207),
+GEN_XX3FORM(xsnmsubmsp, 0x04, 0x13, PPC2_VSX207),
+GEN_XX2FORM(xscvsxdsp, 0x10, 0x13, PPC2_VSX207),
+GEN_XX2FORM(xscvuxdsp, 0x10, 0x12, PPC2_VSX207),
+
+GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmuldp, 0x00, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvdivdp, 0x00, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvredp,  0x14, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvsqrtdp,  0x16, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtedp,  0x14, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvtdivdp, 0x14, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtdp, 0x14, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmaddadp, 0x04, 0x0C, PPC2_VSX),
+GEN_XX3FORM(xvmaddmdp, 0x04, 0x0D, PPC2_VSX),
+GEN_XX3FORM(xvmsubadp, 0x04, 0x0E, PPC2_VSX),
+GEN_XX3FORM(xvmsubmdp, 0x04, 0x0F, PPC2_VSX),
+GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
+GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
+GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqdp, 0x0C, 0x0C, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtdp, 0x0C, 0x0D, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgedp, 0x0C, 0x0E, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpnedp, 0x0C, 0x0F, PPC2_ISA300),
+GEN_XX2FORM(xvcvdpsp, 0x12, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxds, 0x10, 0x1D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpsxws, 0x10, 0x0D, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxds, 0x10, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvdpuxws, 0x10, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvcvsxddp, 0x10, 0x1F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxddp, 0x10, 0x1E, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwdp, 0x10, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwdp, 0x10, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpi, 0x12, 0x0C, PPC2_VSX),
+GEN_XX2FORM(xvrdpic, 0x16, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpim, 0x12, 0x0F, PPC2_VSX),
+GEN_XX2FORM(xvrdpip, 0x12, 0x0E, PPC2_VSX),
+GEN_XX2FORM(xvrdpiz, 0x12, 0x0D, PPC2_VSX),
+
+GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmulsp, 0x00, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvdivsp, 0x00, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvresp, 0x14, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvsqrtsp, 0x16, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrsqrtesp, 0x14, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvtdivsp, 0x14, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvtsqrtsp, 0x14, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmaddasp, 0x04, 0x08, PPC2_VSX),
+GEN_XX3FORM(xvmaddmsp, 0x04, 0x09, PPC2_VSX),
+GEN_XX3FORM(xvmsubasp, 0x04, 0x0A, PPC2_VSX),
+GEN_XX3FORM(xvmsubmsp, 0x04, 0x0B, PPC2_VSX),
+GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
+GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
+GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
+GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpeqsp, 0x0C, 0x08, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgtsp, 0x0C, 0x09, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpgesp, 0x0C, 0x0A, PPC2_VSX),
+GEN_XX3_RC_FORM(xvcmpnesp, 0x0C, 0x0B, PPC2_ISA300),
+GEN_XX2FORM(xvcvspdp, 0x12, 0x1C, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxds, 0x10, 0x19, PPC2_VSX),
+GEN_XX2FORM(xvcvspsxws, 0x10, 0x09, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxds, 0x10, 0x18, PPC2_VSX),
+GEN_XX2FORM(xvcvspuxws, 0x10, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvcvsxdsp, 0x10, 0x1B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxdsp, 0x10, 0x1A, PPC2_VSX),
+GEN_XX2FORM(xvcvsxwsp, 0x10, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvcvuxwsp, 0x10, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspi, 0x12, 0x08, PPC2_VSX),
+GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
+GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
+GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
+GEN_XX2FORM_EO(xxbrh, 0x16, 0x1D, 0x07, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrw, 0x16, 0x1D, 0x0F, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrd, 0x16, 0x1D, 0x17, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
+
+#define VSX_LOGICAL(name, opc2, opc3, fl2) \
+GEN_XX3FORM(name, opc2, opc3, fl2)
+
+VSX_LOGICAL(xxland, 0x8, 0x10, PPC2_VSX),
+VSX_LOGICAL(xxlandc, 0x8, 0x11, PPC2_VSX),
+VSX_LOGICAL(xxlor, 0x8, 0x12, PPC2_VSX),
+VSX_LOGICAL(xxlxor, 0x8, 0x13, PPC2_VSX),
+VSX_LOGICAL(xxlnor, 0x8, 0x14, PPC2_VSX),
+VSX_LOGICAL(xxleqv, 0x8, 0x17, PPC2_VSX207),
+VSX_LOGICAL(xxlnand, 0x8, 0x16, PPC2_VSX207),
+VSX_LOGICAL(xxlorc, 0x8, 0x15, PPC2_VSX207),
+GEN_XX3FORM(xxmrghw, 0x08, 0x02, PPC2_VSX),
+GEN_XX3FORM(xxmrglw, 0x08, 0x06, PPC2_VSX),
+GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
+GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
+GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
+
+#define GEN_XXSEL_ROW(opc3) \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x19, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1A, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1B, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1C, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1D, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1E, opc3, 0, PPC_NONE, PPC2_VSX), \
+GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x1F, opc3, 0, PPC_NONE, PPC2_VSX), \
+
+GEN_XXSEL_ROW(0x00)
+GEN_XXSEL_ROW(0x01)
+GEN_XXSEL_ROW(0x02)
+GEN_XXSEL_ROW(0x03)
+GEN_XXSEL_ROW(0x04)
+GEN_XXSEL_ROW(0x05)
+GEN_XXSEL_ROW(0x06)
+GEN_XXSEL_ROW(0x07)
+GEN_XXSEL_ROW(0x08)
+GEN_XXSEL_ROW(0x09)
+GEN_XXSEL_ROW(0x0A)
+GEN_XXSEL_ROW(0x0B)
+GEN_XXSEL_ROW(0x0C)
+GEN_XXSEL_ROW(0x0D)
+GEN_XXSEL_ROW(0x0E)
+GEN_XXSEL_ROW(0x0F)
+GEN_XXSEL_ROW(0x10)
+GEN_XXSEL_ROW(0x11)
+GEN_XXSEL_ROW(0x12)
+GEN_XXSEL_ROW(0x13)
+GEN_XXSEL_ROW(0x14)
+GEN_XXSEL_ROW(0x15)
+GEN_XXSEL_ROW(0x16)
+GEN_XXSEL_ROW(0x17)
+GEN_XXSEL_ROW(0x18)
+GEN_XXSEL_ROW(0x19)
+GEN_XXSEL_ROW(0x1A)
+GEN_XXSEL_ROW(0x1B)
+GEN_XXSEL_ROW(0x1C)
+GEN_XXSEL_ROW(0x1D)
+GEN_XXSEL_ROW(0x1E)
+GEN_XXSEL_ROW(0x1F)
+
+GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01),
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 7a9b15e7e1..626e03186c 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7459,7 +7459,8 @@ enum BOOK3S_CPU_TYPE {
     BOOK3S_CPU_POWER5PLUS,
     BOOK3S_CPU_POWER6,
     BOOK3S_CPU_POWER7,
-    BOOK3S_CPU_POWER8
+    BOOK3S_CPU_POWER8,
+    BOOK3S_CPU_POWER9
 };
 
 static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
@@ -7469,7 +7470,6 @@ static void gen_fscr_facility_check(DisasContext *ctx, int facility_sprn,
     TCGv_i32 t2 = tcg_const_i32(sprn);
     TCGv_i32 t3 = tcg_const_i32(cause);
 
-    gen_update_current_nip(ctx);
     gen_helper_fscr_facility_check(cpu_env, t1, t2, t3);
 
     tcg_temp_free_i32(t3);
@@ -7484,7 +7484,6 @@ static void gen_msr_facility_check(DisasContext *ctx, int facility_sprn,
     TCGv_i32 t2 = tcg_const_i32(sprn);
     TCGv_i32 t3 = tcg_const_i32(cause);
 
-    gen_update_current_nip(ctx);
     gen_helper_msr_facility_check(cpu_env, t1, t2, t3);
 
     tcg_temp_free_i32(t3);
@@ -8241,6 +8240,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         break;
     case BOOK3S_CPU_POWER7:
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
         gen_spr_book3s_ids(env);
         gen_spr_amr(env, version >= BOOK3S_CPU_POWER8);
         gen_spr_book3s_purr(env);
@@ -8293,6 +8293,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         break;
     case BOOK3S_CPU_POWER7:
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
     default:
         env->slb_nr = 32;
         break;
@@ -8310,6 +8311,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int version)
         ppcPOWER7_irq_init(ppc_env_get_cpu(env));
         break;
     case BOOK3S_CPU_POWER8:
+    case BOOK3S_CPU_POWER9:
         init_excp_POWER8(env);
         ppcPOWER7_irq_init(ppc_env_get_cpu(env));
         break;
@@ -8772,6 +8774,86 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
     pcc->l1_icache_size = 0x8000;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
 }
+static void init_proc_POWER9(CPUPPCState *env)
+{
+    init_proc_book3s_64(env, BOOK3S_CPU_POWER9);
+}
+
+static bool ppc_pvr_match_power9(PowerPCCPUClass *pcc, uint32_t pvr)
+{
+    if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER9_BASE) {
+        return true;
+    }
+    return false;
+}
+
+POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
+
+    dc->fw_name = "PowerPC,POWER9";
+    dc->desc = "POWER9";
+    dc->props = powerpc_servercpu_properties;
+    pcc->pvr_match = ppc_pvr_match_power9;
+    pcc->pcr_mask = PCR_COMPAT_2_05 | PCR_COMPAT_2_06 | PCR_COMPAT_2_07;
+    pcc->init_proc = init_proc_POWER9;
+    pcc->check_pow = check_pow_nocheck;
+    pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
+                       PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
+                       PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
+                       PPC_FLOAT_FRSQRTES |
+                       PPC_FLOAT_STFIWX |
+                       PPC_FLOAT_EXT |
+                       PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
+                       PPC_MEM_SYNC | PPC_MEM_EIEIO |
+                       PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
+                       PPC_64B | PPC_64BX | PPC_ALTIVEC |
+                       PPC_SEGMENT_64B | PPC_SLBI |
+                       PPC_POPCNTB | PPC_POPCNTWD |
+                       PPC_CILDST;
+    pcc->insns_flags2 = PPC2_VSX | PPC2_VSX207 | PPC2_DFP | PPC2_DBRX |
+                        PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
+                        PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
+                        PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
+                        PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
+                        PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+                        PPC2_TM | PPC2_PM_ISA206 | PPC2_ISA300;
+    pcc->msr_mask = (1ull << MSR_SF) |
+                    (1ull << MSR_TM) |
+                    (1ull << MSR_VR) |
+                    (1ull << MSR_VSX) |
+                    (1ull << MSR_EE) |
+                    (1ull << MSR_PR) |
+                    (1ull << MSR_FP) |
+                    (1ull << MSR_ME) |
+                    (1ull << MSR_FE0) |
+                    (1ull << MSR_SE) |
+                    (1ull << MSR_DE) |
+                    (1ull << MSR_FE1) |
+                    (1ull << MSR_IR) |
+                    (1ull << MSR_DR) |
+                    (1ull << MSR_PMM) |
+                    (1ull << MSR_RI) |
+                    (1ull << MSR_LE);
+    /* Using 2.07 defines until new radix model is added. */
+    pcc->mmu_model = POWERPC_MMU_2_07;
+#if defined(CONFIG_SOFTMMU)
+    pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
+    /* segment page size remain the same */
+    pcc->sps = &POWER7_POWER8_sps;
+#endif
+    pcc->excp_model = POWERPC_EXCP_POWER8;
+    pcc->bus_model = PPC_FLAGS_INPUT_POWER7;
+    pcc->bfd_mach = bfd_mach_ppc64;
+    pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
+                 POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
+                 POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+                 POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
+    pcc->l1_dcache_size = 0x8000;
+    pcc->l1_icache_size = 0x8000;
+    pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
+}
 
 #if !defined(CONFIG_USER_ONLY)
 
@@ -9169,13 +9251,47 @@ static int register_dblind_insn (opc_handler_t **ppc_opcodes,
     return 0;
 }
 
+static int register_trplind_insn(opc_handler_t **ppc_opcodes,
+                                 unsigned char idx1, unsigned char idx2,
+                                 unsigned char idx3, unsigned char idx4,
+                                 opc_handler_t *handler)
+{
+    opc_handler_t **table;
+
+    if (register_ind_in_table(ppc_opcodes, idx1, idx2, NULL) < 0) {
+        printf("*** ERROR: unable to join indirect table idx "
+               "[%02x-%02x]\n", idx1, idx2);
+        return -1;
+    }
+    table = ind_table(ppc_opcodes[idx1]);
+    if (register_ind_in_table(table, idx2, idx3, NULL) < 0) {
+        printf("*** ERROR: unable to join 2nd-level indirect table idx "
+               "[%02x-%02x-%02x]\n", idx1, idx2, idx3);
+        return -1;
+    }
+    table = ind_table(table[idx2]);
+    if (register_ind_in_table(table, idx3, idx4, handler) < 0) {
+        printf("*** ERROR: unable to insert opcode "
+               "[%02x-%02x-%02x-%02x]\n", idx1, idx2, idx3, idx4);
+        return -1;
+    }
+    return 0;
+}
 static int register_insn (opc_handler_t **ppc_opcodes, opcode_t *insn)
 {
     if (insn->opc2 != 0xFF) {
         if (insn->opc3 != 0xFF) {
-            if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
-                                     insn->opc3, &insn->handler) < 0)
-                return -1;
+            if (insn->opc4 != 0xFF) {
+                if (register_trplind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                          insn->opc3, insn->opc4,
+                                          &insn->handler) < 0) {
+                    return -1;
+                }
+            } else {
+                if (register_dblind_insn(ppc_opcodes, insn->opc1, insn->opc2,
+                                         insn->opc3, &insn->handler) < 0)
+                    return -1;
+            }
         } else {
             if (register_ind_insn(ppc_opcodes, insn->opc1,
                                   insn->opc2, &insn->handler) < 0)
@@ -9251,7 +9367,7 @@ static void dump_ppc_insns (CPUPPCState *env)
 {
     opc_handler_t **table, *handler;
     const char *p, *q;
-    uint8_t opc1, opc2, opc3;
+    uint8_t opc1, opc2, opc3, opc4;
 
     printf("Instructions set:\n");
     /* opc1 is 6 bits long */
@@ -9271,34 +9387,51 @@ static void dump_ppc_insns (CPUPPCState *env)
                     for (opc3 = 0; opc3 < PPC_CPU_INDIRECT_OPCODES_LEN;
                             opc3++) {
                         handler = table[opc3];
-                        if (handler->handler != &gen_invalid) {
-                            /* Special hack to properly dump SPE insns */
-                            p = strchr(handler->oname, '_');
-                            if (p == NULL) {
-                                printf("INSN: %02x %02x %02x (%02d %04d) : "
-                                       "%s\n",
-                                       opc1, opc2, opc3, opc1,
-                                       (opc3 << 5) | opc2,
-                                       handler->oname);
-                            } else {
-                                q = "speundef";
-                                if ((p - handler->oname) != strlen(q) ||
-                                    memcmp(handler->oname, q, strlen(q)) != 0) {
-                                    /* First instruction */
-                                    printf("INSN: %02x %02x %02x (%02d %04d) : "
-                                           "%.*s\n",
-                                           opc1, opc2 << 1, opc3, opc1,
-                                           (opc3 << 6) | (opc2 << 1),
-                                           (int)(p - handler->oname),
+                        if (is_indirect_opcode(handler)) {
+                            table = ind_table(handler);
+                            /* opc4 is 5 bits long */
+                            for (opc4 = 0; opc4 < PPC_CPU_INDIRECT_OPCODES_LEN;
+                                 opc4++) {
+                                handler = table[opc4];
+                                if (handler->handler != &gen_invalid) {
+                                    printf("INSN: %02x %02x %02x %02x -- "
+                                           "(%02d %04d %02d) : %s\n",
+                                           opc1, opc2, opc3, opc4,
+                                           opc1, (opc3 << 5) | opc2, opc4,
                                            handler->oname);
                                 }
-                                if (strcmp(p + 1, q) != 0) {
-                                    /* Second instruction */
+                            }
+                        } else {
+                            if (handler->handler != &gen_invalid) {
+                                /* Special hack to properly dump SPE insns */
+                                p = strchr(handler->oname, '_');
+                                if (p == NULL) {
                                     printf("INSN: %02x %02x %02x (%02d %04d) : "
                                            "%s\n",
-                                           opc1, (opc2 << 1) | 1, opc3, opc1,
-                                           (opc3 << 6) | (opc2 << 1) | 1,
-                                           p + 1);
+                                           opc1, opc2, opc3, opc1,
+                                           (opc3 << 5) | opc2,
+                                           handler->oname);
+                                } else {
+                                    q = "speundef";
+                                    if ((p - handler->oname) != strlen(q)
+                                        || (memcmp(handler->oname, q, strlen(q))
+                                            != 0)) {
+                                        /* First instruction */
+                                        printf("INSN: %02x %02x %02x"
+                                               "(%02d %04d) : %.*s\n",
+                                               opc1, opc2 << 1, opc3, opc1,
+                                               (opc3 << 6) | (opc2 << 1),
+                                               (int)(p - handler->oname),
+                                               handler->oname);
+                                    }
+                                    if (strcmp(p + 1, q) != 0) {
+                                        /* Second instruction */
+                                        printf("INSN: %02x %02x %02x "
+                                               "(%02d %04d) : %s\n", opc1,
+                                               (opc2 << 1) | 1, opc3, opc1,
+                                               (opc3 << 6) | (opc2 << 1) | 1,
+                                               p + 1);
+                                    }
                                 }
                             }
                         }
@@ -9545,7 +9678,7 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 #endif
 
-    cpu_exec_init(cs, &local_err);
+    cpu_exec_realizefn(cs, &local_err);
     if (local_err != NULL) {
         error_propagate(errp, local_err);
         return;
@@ -9773,11 +9906,17 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp)
 static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
 {
     PowerPCCPU *cpu = POWERPC_CPU(dev);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
     CPUPPCState *env = &cpu->env;
-    opc_handler_t **table;
-    int i, j;
+    Error *local_err = NULL;
+    opc_handler_t **table, **table_2;
+    int i, j, k;
 
-    cpu_exec_exit(CPU(dev));
+    pcc->parent_unrealize(dev, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     for (i = 0; i < PPC_CPU_OPCODES_LEN; i++) {
         if (env->opcodes[i] == &invalid_handler) {
@@ -9786,10 +9925,20 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
         if (is_indirect_opcode(env->opcodes[i])) {
             table = ind_table(env->opcodes[i]);
             for (j = 0; j < PPC_CPU_INDIRECT_OPCODES_LEN; j++) {
-                if (table[j] != &invalid_handler &&
-                        is_indirect_opcode(table[j])) {
+                if (table[j] == &invalid_handler) {
+                    continue;
+                }
+                if (is_indirect_opcode(table[j])) {
+                    table_2 = ind_table(table[j]);
+                    for (k = 0; k < PPC_CPU_INDIRECT_OPCODES_LEN; k++) {
+                        if (table_2[k] != &invalid_handler &&
+                            is_indirect_opcode(table_2[k])) {
+                            g_free((opc_handler_t *)((uintptr_t)table_2[k] &
+                                                     ~PPC_INDIRECT));
+                        }
+                    }
                     g_free((opc_handler_t *)((uintptr_t)table[j] &
-                        ~PPC_INDIRECT));
+                                             ~PPC_INDIRECT));
                 }
             }
             g_free((opc_handler_t *)((uintptr_t)env->opcodes[i] &
@@ -9800,7 +9949,8 @@ static void ppc_cpu_unrealizefn(DeviceState *dev, Error **errp)
 
 int ppc_get_compat_smt_threads(PowerPCCPU *cpu)
 {
-    int ret = MIN(smp_threads, kvmppc_smt_threads());
+    CPUState *cs = CPU(cpu);
+    int ret = MIN(cs->nr_threads, kvmppc_smt_threads());
 
     switch (cpu->cpu_version) {
     case CPU_POWERPC_LOGICAL_2_05:
@@ -10370,6 +10520,11 @@ static gchar *ppc_gdb_arch_name(CPUState *cs)
 #endif
 }
 
+static Property ppc_cpu_properties[] = {
+    DEFINE_PROP_BOOL("pre-2.8-migration", PowerPCCPU, pre_2_8_migration, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void ppc_cpu_class_init(ObjectClass *oc, void *data)
 {
     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
@@ -10377,10 +10532,12 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
     DeviceClass *dc = DEVICE_CLASS(oc);
 
     pcc->parent_realize = dc->realize;
+    pcc->parent_unrealize = dc->unrealize;
     pcc->pvr_match = ppc_pvr_match_default;
     pcc->interrupts_big_endian = ppc_cpu_interrupts_big_endian_always;
     dc->realize = ppc_cpu_realizefn;
     dc->unrealize = ppc_cpu_unrealizefn;
+    dc->props = ppc_cpu_properties;
 
     pcc->parent_reset = cc->reset;
     cc->reset = ppc_cpu_reset;
diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs
index dd62cbda83..6b02b1794c 100644
--- a/target-s390x/Makefile.objs
+++ b/target-s390x/Makefile.objs
@@ -1,5 +1,25 @@
 obj-y += translate.o helper.o cpu.o interrupt.o
 obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o
-obj-y += gdbstub.o
+obj-y += gdbstub.o cpu_models.o cpu_features.o
 obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o
 obj-$(CONFIG_KVM) += kvm.o
+
+# build and run feature list generator
+feat-src = $(SRC_PATH)/target-$(TARGET_BASE_ARCH)/
+feat-dst = $(BUILD_DIR)/$(TARGET_DIR)
+ifneq ($(MAKECMDGOALS),clean)
+GENERATED_HEADERS += $(feat-dst)gen-features.h
+endif
+
+$(feat-dst)gen-features.h: $(feat-dst)gen-features.h-timestamp
+	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
+$(feat-dst)gen-features.h-timestamp: $(feat-dst)gen-features
+	$(call quiet-command,$< >$@,"GEN","$(TARGET_DIR)gen-features.h")
+
+$(feat-dst)gen-features: $(feat-src)gen-features.c
+	$(call quiet-command,$(HOST_CC) $(QEMU_INCLUDES) -o $@ $<,"CC","$(TARGET_DIR)gen-features")
+
+clean-target:
+	rm -f gen-features.h-timestamp
+	rm -f gen-features.h
+	rm -f gen-features
diff --git a/target-s390x/cpu-qom.h b/target-s390x/cpu-qom.h
index 66b5d1808f..4e936e7788 100644
--- a/target-s390x/cpu-qom.h
+++ b/target-s390x/cpu-qom.h
@@ -21,6 +21,7 @@
 #define QEMU_S390_CPU_QOM_H
 
 #include "qom/cpu.h"
+#include "cpu_models.h"
 
 #define TYPE_S390_CPU "s390-cpu"
 
@@ -45,6 +46,11 @@ typedef struct S390CPUClass {
     /*< private >*/
     CPUClass parent_class;
     /*< public >*/
+    const S390CPUDef *cpu_def;
+    bool kvm_required;
+    bool is_static;
+    bool is_migration_safe;
+    const char *desc;
 
     int64_t next_cpu_id;
 
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index e43e2d6155..0a39d31237 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -44,30 +44,6 @@
 #define CR0_RESET       0xE0UL
 #define CR14_RESET      0xC2000000UL;
 
-/* generate CPU information for cpu -? */
-void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf)
-{
-#ifdef CONFIG_KVM
-    (*cpu_fprintf)(f, "s390 %16s\n", "host");
-#endif
-}
-
-#ifndef CONFIG_USER_ONLY
-CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
-{
-    CpuDefinitionInfoList *entry;
-    CpuDefinitionInfo *info;
-
-    info = g_malloc0(sizeof(*info));
-    info->name = g_strdup("host");
-
-    entry = g_malloc0(sizeof(*entry));
-    entry->value = info;
-
-    return entry;
-}
-#endif
-
 static void s390_cpu_set_pc(CPUState *cs, vaddr value)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -188,7 +164,7 @@ static void s390_cpu_machine_reset_cb(void *opaque)
 {
     S390CPU *cpu = opaque;
 
-    run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, CPU(cpu));
+    run_on_cpu(CPU(cpu), s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
 }
 #endif
 
@@ -206,6 +182,12 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
     CPUS390XState *env = &cpu->env;
     Error *err = NULL;
 
+    /* the model has to be realized before qemu_init_vcpu() due to kvm */
+    s390_realize_cpu_model(cs, &err);
+    if (err) {
+        goto out;
+    }
+
 #if !defined(CONFIG_USER_ONLY)
     if (cpu->id >= max_cpus) {
         error_setg(&err, "Unable to add CPU: %" PRIi64
@@ -225,7 +207,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
         goto out;
     }
 
-    cpu_exec_init(cs, &err);
+    cpu_exec_realizefn(cs, &err);
     if (err != NULL) {
         goto out;
     }
@@ -238,7 +220,7 @@ static void s390_cpu_realizefn(DeviceState *dev, Error **errp)
     s390_cpu_gdb_init(cs);
     qemu_init_vcpu(cs);
 #if !defined(CONFIG_USER_ONLY)
-    run_on_cpu(cs, s390_do_cpu_full_reset, cs);
+    run_on_cpu(cs, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
 #else
     cpu_reset(cs);
 #endif
@@ -309,6 +291,7 @@ static void s390_cpu_initfn(Object *obj)
     cs->exception_index = EXCP_HLT;
     object_property_add(OBJECT(cpu), "id", "int64_t", s390x_cpu_get_id,
                         s390x_cpu_set_id, NULL, NULL, NULL);
+    s390_cpu_model_register_props(obj);
 #if !defined(CONFIG_USER_ONLY)
     qemu_get_timedate(&tm, 0);
     env->tod_offset = TOD_UNIX_EPOCH +
@@ -435,6 +418,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
     scc->cpu_reset = s390_cpu_reset;
     scc->initial_cpu_reset = s390_cpu_initial_reset;
     cc->reset = s390_cpu_full_reset;
+    cc->class_by_name = s390_cpu_class_by_name,
     cc->has_work = s390_cpu_has_work;
     cc->do_interrupt = s390_cpu_do_interrupt;
     cc->dump_state = s390_cpu_dump_state;
@@ -456,12 +440,7 @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_core_xml_file = "s390x-core64.xml";
     cc->gdb_arch_name = s390_gdb_arch_name;
 
-    /*
-     * Reason: s390_cpu_realizefn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
+    s390_cpu_model_class_register_props(oc);
 }
 
 static const TypeInfo s390_cpu_type_info = {
@@ -470,7 +449,7 @@ static const TypeInfo s390_cpu_type_info = {
     .instance_size = sizeof(S390CPU),
     .instance_init = s390_cpu_initfn,
     .instance_finalize = s390_cpu_finalize,
-    .abstract = false,
+    .abstract = true,
     .class_size = sizeof(S390CPUClass),
     .class_init = s390_cpu_class_init,
 };
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index c216bdacef..fd36a25cf5 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -188,6 +188,7 @@ struct S390CPU {
 
     CPUS390XState env;
     int64_t id;
+    S390CPUModel *model;
     /* needed for live migration */
     void *irqstate;
     uint32_t irqstate_saved_size;
@@ -394,6 +395,8 @@ static inline void cpu_get_tb_cpu_state(CPUS390XState* env, target_ulong *pc,
              ((env->psw.mask & PSW_MASK_32) ? FLAG_MASK_32 : 0);
 }
 
+#define MAX_ILEN 6
+
 /* While the PoO talks about ILC (a number between 1-3) what is actually
    stored in LowCore is shifted left one bit (an even between 2-6).  As
    this is the actual length of the insn and therefore more useful, that
@@ -499,17 +502,14 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb,
 #define decode_basedisp_rs decode_basedisp_s
 
 /* helper functions for run_on_cpu() */
-static inline void s390_do_cpu_reset(void *arg)
+static inline void s390_do_cpu_reset(CPUState *cs, run_on_cpu_data arg)
 {
-    CPUState *cs = arg;
     S390CPUClass *scc = S390_CPU_GET_CLASS(cs);
 
     scc->cpu_reset(cs);
 }
-static inline void s390_do_cpu_full_reset(void *arg)
+static inline void s390_do_cpu_full_reset(CPUState *cs, run_on_cpu_data arg)
 {
-    CPUState *cs = arg;
-
     cpu_reset(cs);
 }
 
@@ -621,8 +621,6 @@ static inline unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu)
     return 0;
 }
 #endif
-void cpu_lock(void);
-void cpu_unlock(void);
 
 extern void subsystem_reset(void);
 
@@ -631,6 +629,10 @@ extern void subsystem_reset(void);
 
 void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 #define cpu_list s390_cpu_list
+void s390_cpu_model_register_props(Object *obj);
+void s390_cpu_model_class_register_props(ObjectClass *oc);
+void s390_realize_cpu_model(CPUState *cs, Error **errp);
+ObjectClass *s390_cpu_class_by_name(const char *name);
 
 #define EXCP_EXT 1 /* external interrupt */
 #define EXCP_SVC 2 /* supervisor call (syscall) */
@@ -669,6 +671,13 @@ void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 
 /* CC optimization */
 
+/* Instead of computing the condition codes after each x86 instruction,
+ * QEMU just stores the result (called CC_DST), the type of operation
+ * (called CC_OP) and whatever operands are needed (CC_SRC and possibly
+ * CC_VR). When the condition codes are needed, the condition codes can
+ * be calculated using this information. Condition codes are not generated
+ * if they are only needed for conditional branches.
+ */
 enum cc_op {
     CC_OP_CONST0 = 0,           /* CC is 0 */
     CC_OP_CONST1,               /* CC is 1 */
diff --git a/target-s390x/cpu_features.c b/target-s390x/cpu_features.c
new file mode 100644
index 0000000000..42fd9d792b
--- /dev/null
+++ b/target-s390x/cpu_features.c
@@ -0,0 +1,404 @@
+/*
+ * CPU features/facilities for s390x
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "cpu_features.h"
+#include "gen-features.h"
+
+#define FEAT_INIT(_name, _type, _bit, _desc) \
+    {                                                \
+        .name = _name,                               \
+        .type = _type,                               \
+        .bit = _bit,                                 \
+        .desc = _desc,                               \
+    }
+
+/* indexed by feature number for easy lookup */
+static const S390FeatDef s390_features[] = {
+    FEAT_INIT("esan3", S390_FEAT_TYPE_STFL, 0, "Instructions marked as n3"),
+    FEAT_INIT("zarch", S390_FEAT_TYPE_STFL, 1, "z/Architecture architectural mode"),
+    FEAT_INIT("dateh", S390_FEAT_TYPE_STFL, 3, "DAT-enhancement facility"),
+    FEAT_INIT("idtes", S390_FEAT_TYPE_STFL, 4, "IDTE selective TLB segment-table clearing"),
+    FEAT_INIT("idter", S390_FEAT_TYPE_STFL, 5, "IDTE selective TLB region-table clearing"),
+    FEAT_INIT("asnlxr", S390_FEAT_TYPE_STFL, 6, "ASN-and-LX reuse facility"),
+    FEAT_INIT("stfle", S390_FEAT_TYPE_STFL, 7, "Store-facility-list-extended facility"),
+    FEAT_INIT("edat", S390_FEAT_TYPE_STFL, 8, "Enhanced-DAT facility"),
+    FEAT_INIT("srs", S390_FEAT_TYPE_STFL, 9, "Sense-running-status facility"),
+    FEAT_INIT("csske", S390_FEAT_TYPE_STFL, 10, "Conditional-SSKE facility"),
+    FEAT_INIT("ctop", S390_FEAT_TYPE_STFL, 11, "Configuration-topology facility"),
+    FEAT_INIT("ipter", S390_FEAT_TYPE_STFL, 13, "IPTE-range facility"),
+    FEAT_INIT("nonqks", S390_FEAT_TYPE_STFL, 14, "Nonquiescing key-setting facility"),
+    FEAT_INIT("etf2", S390_FEAT_TYPE_STFL, 16, "Extended-translation facility 2"),
+    FEAT_INIT("msa-base", S390_FEAT_TYPE_STFL, 17, "Message-security-assist facility (excluding subfunctions)"),
+    FEAT_INIT("ldisp", S390_FEAT_TYPE_STFL, 18, "Long-displacement facility"),
+    FEAT_INIT("ldisphp", S390_FEAT_TYPE_STFL, 19, "Long-displacement facility has high performance"),
+    FEAT_INIT("hfpm", S390_FEAT_TYPE_STFL, 20, "HFP-multiply-add/subtract facility"),
+    FEAT_INIT("eimm", S390_FEAT_TYPE_STFL, 21, "Extended-immediate facility"),
+    FEAT_INIT("etf3", S390_FEAT_TYPE_STFL, 22, "Extended-translation facility 3"),
+    FEAT_INIT("hfpue", S390_FEAT_TYPE_STFL, 23, "HFP-unnormalized-extension facility"),
+    FEAT_INIT("etf2eh", S390_FEAT_TYPE_STFL, 24, "ETF2-enhancement facility"),
+    FEAT_INIT("stckf", S390_FEAT_TYPE_STFL, 25, "Store-clock-fast facility"),
+    FEAT_INIT("parseh", S390_FEAT_TYPE_STFL, 26, "Parsing-enhancement facility"),
+    FEAT_INIT("mvcos", S390_FEAT_TYPE_STFL, 27, "Move-with-optional-specification facility"),
+    FEAT_INIT("tods-base", S390_FEAT_TYPE_STFL, 28, "TOD-clock-steering facility (excluding subfunctions)"),
+    FEAT_INIT("etf3eh", S390_FEAT_TYPE_STFL, 30, "ETF3-enhancement facility"),
+    FEAT_INIT("ectg", S390_FEAT_TYPE_STFL, 31, "Extract-CPU-time facility"),
+    FEAT_INIT("csst", S390_FEAT_TYPE_STFL, 32, "Compare-and-swap-and-store facility"),
+    FEAT_INIT("csst2", S390_FEAT_TYPE_STFL, 33, "Compare-and-swap-and-store facility 2"),
+    FEAT_INIT("ginste", S390_FEAT_TYPE_STFL, 34, "General-instructions-extension facility"),
+    FEAT_INIT("exrl", S390_FEAT_TYPE_STFL, 35, "Execute-extensions facility"),
+    FEAT_INIT("emon", S390_FEAT_TYPE_STFL, 36, "Enhanced-monitor facility"),
+    FEAT_INIT("fpe", S390_FEAT_TYPE_STFL, 37, "Floating-point extension facility"),
+    FEAT_INIT("sprogp", S390_FEAT_TYPE_STFL, 40, "Set-program-parameters facility"),
+    FEAT_INIT("fpseh", S390_FEAT_TYPE_STFL, 41, "Floating-point-support-enhancement facilities"),
+    FEAT_INIT("dfp", S390_FEAT_TYPE_STFL, 42, "DFP (decimal-floating-point) facility"),
+    FEAT_INIT("dfphp", S390_FEAT_TYPE_STFL, 43, "DFP (decimal-floating-point) facility has high performance"),
+    FEAT_INIT("pfpo", S390_FEAT_TYPE_STFL, 44, "PFPO instruction"),
+    FEAT_INIT("stfle45", S390_FEAT_TYPE_STFL, 45, "Various facilities introduced with z196"),
+    FEAT_INIT("cmpsceh", S390_FEAT_TYPE_STFL, 47, "CMPSC-enhancement facility"),
+    FEAT_INIT("dfpzc", S390_FEAT_TYPE_STFL, 48, "Decimal-floating-point zoned-conversion facility"),
+    FEAT_INIT("stfle49", S390_FEAT_TYPE_STFL, 49, "Various facilities introduced with zEC12"),
+    FEAT_INIT("cte", S390_FEAT_TYPE_STFL, 50, "Constrained transactional-execution facility"),
+    FEAT_INIT("ltlbc", S390_FEAT_TYPE_STFL, 51, "Local-TLB-clearing facility"),
+    FEAT_INIT("iacc2", S390_FEAT_TYPE_STFL, 52, "Interlocked-access facility 2"),
+    FEAT_INIT("stfle53", S390_FEAT_TYPE_STFL, 53, "Various facilities introduced with z13"),
+    FEAT_INIT("msa5-base", S390_FEAT_TYPE_STFL, 57, "Message-security-assist-extension-5 facility (excluding subfunctions)"),
+    FEAT_INIT("ri", S390_FEAT_TYPE_STFL, 64, "CPU runtime-instrumentation facility"),
+    FEAT_INIT("te", S390_FEAT_TYPE_STFL, 73, "Transactional-execution facility"),
+    FEAT_INIT("sthyi", S390_FEAT_TYPE_STFL, 74, "Store-hypervisor-information facility"),
+    FEAT_INIT("aefsi", S390_FEAT_TYPE_STFL, 75, "Access-exception-fetch/store-indication facility"),
+    FEAT_INIT("msa3-base", S390_FEAT_TYPE_STFL, 76, "Message-security-assist-extension-3 facility (excluding subfunctions)"),
+    FEAT_INIT("msa4-base", S390_FEAT_TYPE_STFL, 77, "Message-security-assist-extension-4 facility (excluding subfunctions)"),
+    FEAT_INIT("edat2", S390_FEAT_TYPE_STFL, 78, "Enhanced-DAT facility 2"),
+    FEAT_INIT("dfppc", S390_FEAT_TYPE_STFL, 80, "Decimal-floating-point packed-conversion facility"),
+    FEAT_INIT("vx", S390_FEAT_TYPE_STFL, 129, "Vector facility"),
+
+    FEAT_INIT("gsls", S390_FEAT_TYPE_SCLP_CONF_CHAR, 40, "SIE: Guest-storage-limit-suppression facility"),
+    FEAT_INIT("esop", S390_FEAT_TYPE_SCLP_CONF_CHAR, 46, "Enhanced-suppression-on-protection facility"),
+
+    FEAT_INIT("64bscao", S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, 0, "SIE: 64-bit-SCAO facility"),
+    FEAT_INIT("cmma", S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, 1, "SIE: Collaborative-memory-management assist"),
+    FEAT_INIT("pfmfi", S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, 9, "SIE: PFMF interpretation facility"),
+    FEAT_INIT("ibs", S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT, 10, "SIE: Interlock-and-broadcast-suppression facility"),
+
+    FEAT_INIT("sief2", S390_FEAT_TYPE_SCLP_CPU, 4, "SIE: interception format 2 (Virtual SIE)"),
+    FEAT_INIT("skey", S390_FEAT_TYPE_SCLP_CPU, 5, "SIE: Storage-key facility"),
+    FEAT_INIT("gpereh", S390_FEAT_TYPE_SCLP_CPU, 10, "SIE: Guest-PER enhancement facility"),
+    FEAT_INIT("siif", S390_FEAT_TYPE_SCLP_CPU, 11, "SIE: Shared IPTE-interlock facility"),
+    FEAT_INIT("sigpif", S390_FEAT_TYPE_SCLP_CPU, 12, "SIE: SIGP interpretation facility"),
+    FEAT_INIT("ib", S390_FEAT_TYPE_SCLP_CPU, 42, "SIE: Intervention bypass facility"),
+    FEAT_INIT("cei", S390_FEAT_TYPE_SCLP_CPU, 43, "SIE: Conditional-external-interception facility"),
+
+    FEAT_INIT("dateh2", S390_FEAT_TYPE_MISC, 0, "DAT-enhancement facility 2"),
+    FEAT_INIT("cmm", S390_FEAT_TYPE_MISC, 0, "Collaborative-memory-management facility"),
+
+    FEAT_INIT("plo-cl", S390_FEAT_TYPE_PLO, 0, "PLO Compare and load (32 bit in general registers)"),
+    FEAT_INIT("plo-clg", S390_FEAT_TYPE_PLO, 1, "PLO Compare and load (64 bit in parameter list)"),
+    FEAT_INIT("plo-clgr", S390_FEAT_TYPE_PLO, 2, "PLO Compare and load (32 bit in general registers)"),
+    FEAT_INIT("plo-clx", S390_FEAT_TYPE_PLO, 3, "PLO Compare and load (128 bit in parameter list)"),
+    FEAT_INIT("plo-cs", S390_FEAT_TYPE_PLO, 4, "PLO Compare and swap (32 bit in general registers)"),
+    FEAT_INIT("plo-csg", S390_FEAT_TYPE_PLO, 5, "PLO Compare and swap (64 bit in parameter list)"),
+    FEAT_INIT("plo-csgr", S390_FEAT_TYPE_PLO, 6, "PLO Compare and swap (32 bit in general registers)"),
+    FEAT_INIT("plo-csx", S390_FEAT_TYPE_PLO, 7, "PLO Compare and swap (128 bit in parameter list)"),
+    FEAT_INIT("plo-dcs", S390_FEAT_TYPE_PLO, 8, "PLO Double compare and swap (32 bit in general registers)"),
+    FEAT_INIT("plo-dcsg", S390_FEAT_TYPE_PLO, 9, "PLO Double compare and swap (64 bit in parameter list)"),
+    FEAT_INIT("plo-dcsgr", S390_FEAT_TYPE_PLO, 10, "PLO Double compare and swap (32 bit in general registers)"),
+    FEAT_INIT("plo-dcsx", S390_FEAT_TYPE_PLO, 11, "PLO Double compare and swap (128 bit in parameter list)"),
+    FEAT_INIT("plo-csst", S390_FEAT_TYPE_PLO, 12, "PLO Compare and swap and store (32 bit in general registers)"),
+    FEAT_INIT("plo-csstg", S390_FEAT_TYPE_PLO, 13, "PLO Compare and swap and store (64 bit in parameter list)"),
+    FEAT_INIT("plo-csstgr", S390_FEAT_TYPE_PLO, 14, "PLO Compare and swap and store (32 bit in general registers)"),
+    FEAT_INIT("plo-csstx", S390_FEAT_TYPE_PLO, 15, "PLO Compare and swap and store (128 bit in parameter list)"),
+    FEAT_INIT("plo-csdst", S390_FEAT_TYPE_PLO, 16, "PLO Compare and swap and double store (32 bit in general registers)"),
+    FEAT_INIT("plo-csdstg", S390_FEAT_TYPE_PLO, 17, "PLO Compare and swap and double store (64 bit in parameter list)"),
+    FEAT_INIT("plo-csdstgr", S390_FEAT_TYPE_PLO, 18, "PLO Compare and swap and double store (32 bit in general registers)"),
+    FEAT_INIT("plo-csdstx", S390_FEAT_TYPE_PLO, 19, "PLO Compare and swap and double store (128 bit in parameter list)"),
+    FEAT_INIT("plo-cstst", S390_FEAT_TYPE_PLO, 20, "PLO Compare and swap and triple store (32 bit in general registers)"),
+    FEAT_INIT("plo-cststg", S390_FEAT_TYPE_PLO, 21, "PLO Compare and swap and triple store (64 bit in parameter list)"),
+    FEAT_INIT("plo-cststgr", S390_FEAT_TYPE_PLO, 22, "PLO Compare and swap and triple store (32 bit in general registers)"),
+    FEAT_INIT("plo-cststx", S390_FEAT_TYPE_PLO, 23, "PLO Compare and swap and triple store (128 bit in parameter list)"),
+
+    FEAT_INIT("ptff-qto", S390_FEAT_TYPE_PTFF, 1, "PTFF Query TOD Offset"),
+    FEAT_INIT("ptff-qsi", S390_FEAT_TYPE_PTFF, 2, "PTFF Query Steering Information"),
+    FEAT_INIT("ptff-qpc", S390_FEAT_TYPE_PTFF, 3, "PTFF Query Physical Clock"),
+    FEAT_INIT("ptff-qui", S390_FEAT_TYPE_PTFF, 4, "PTFF Query UTC Information"),
+    FEAT_INIT("ptff-qtou", S390_FEAT_TYPE_PTFF, 5, "PTFF Query TOD Offset User"),
+    FEAT_INIT("ptff-sto", S390_FEAT_TYPE_PTFF, 65, "PTFF Set TOD Offset"),
+    FEAT_INIT("ptff-stou", S390_FEAT_TYPE_PTFF, 69, "PTFF Set TOD Offset User"),
+
+    FEAT_INIT("kmac-dea", S390_FEAT_TYPE_KMAC, 1, "KMAC DEA"),
+    FEAT_INIT("kmac-tdea-128", S390_FEAT_TYPE_KMAC, 2, "KMAC TDEA-128"),
+    FEAT_INIT("kmac-tdea-192", S390_FEAT_TYPE_KMAC, 3, "KMAC TDEA-192"),
+    FEAT_INIT("kmac-edea", S390_FEAT_TYPE_KMAC, 9, "KMAC Encrypted-DEA"),
+    FEAT_INIT("kmac-etdea-128", S390_FEAT_TYPE_KMAC, 10, "KMAC Encrypted-TDEA-128"),
+    FEAT_INIT("kmac-etdea-192", S390_FEAT_TYPE_KMAC, 11, "KMAC Encrypted-TDEA-192"),
+    FEAT_INIT("kmac-aes-128", S390_FEAT_TYPE_KMAC, 18, "KMAC AES-128"),
+    FEAT_INIT("kmac-aes-192", S390_FEAT_TYPE_KMAC, 19, "KMAC AES-192"),
+    FEAT_INIT("kmac-aes-256", S390_FEAT_TYPE_KMAC, 20, "KMAC AES-256"),
+    FEAT_INIT("kmac-eaes-128", S390_FEAT_TYPE_KMAC, 26, "KMAC Encrypted-AES-128"),
+    FEAT_INIT("kmac-eaes-192", S390_FEAT_TYPE_KMAC, 27, "KMAC Encrypted-AES-192"),
+    FEAT_INIT("kmac-eaes-256", S390_FEAT_TYPE_KMAC, 28, "KMAC Encrypted-AES-256"),
+
+    FEAT_INIT("kmc-dea", S390_FEAT_TYPE_KMC, 1, "KMC DEA"),
+    FEAT_INIT("kmc-tdea-128", S390_FEAT_TYPE_KMC, 2, "KMC TDEA-128"),
+    FEAT_INIT("kmc-tdea-192", S390_FEAT_TYPE_KMC, 3, "KMC TDEA-192"),
+    FEAT_INIT("kmc-edea", S390_FEAT_TYPE_KMC, 9, "KMC Encrypted-DEA"),
+    FEAT_INIT("kmc-etdea-128", S390_FEAT_TYPE_KMC, 10, "KMC Encrypted-TDEA-128"),
+    FEAT_INIT("kmc-etdea-192", S390_FEAT_TYPE_KMC, 11, "KMC Encrypted-TDEA-192"),
+    FEAT_INIT("kmc-aes-128", S390_FEAT_TYPE_KMC, 18, "KMC AES-128"),
+    FEAT_INIT("kmc-aes-192", S390_FEAT_TYPE_KMC, 19, "KMC AES-192"),
+    FEAT_INIT("kmc-aes-256", S390_FEAT_TYPE_KMC, 20, "KMC AES-256"),
+    FEAT_INIT("kmc-eaes-128", S390_FEAT_TYPE_KMC, 26, "KMC Encrypted-AES-128"),
+    FEAT_INIT("kmc-eaes-192", S390_FEAT_TYPE_KMC, 27, "KMC Encrypted-AES-192"),
+    FEAT_INIT("kmc-eaes-256", S390_FEAT_TYPE_KMC, 28, "KMC Encrypted-AES-256"),
+    FEAT_INIT("kmc-prng", S390_FEAT_TYPE_KMC, 67, "KMC PRNG"),
+
+    FEAT_INIT("km-dea", S390_FEAT_TYPE_KM, 1, "KM DEA"),
+    FEAT_INIT("km-tdea-128", S390_FEAT_TYPE_KM, 2, "KM TDEA-128"),
+    FEAT_INIT("km-tdea-192", S390_FEAT_TYPE_KM, 3, "KM TDEA-192"),
+    FEAT_INIT("km-edea", S390_FEAT_TYPE_KM, 9, "KM Encrypted-DEA"),
+    FEAT_INIT("km-etdea-128", S390_FEAT_TYPE_KM, 10, "KM Encrypted-TDEA-128"),
+    FEAT_INIT("km-etdea-192", S390_FEAT_TYPE_KM, 11, "KM Encrypted-TDEA-192"),
+    FEAT_INIT("km-aes-128", S390_FEAT_TYPE_KM, 18, "KM AES-128"),
+    FEAT_INIT("km-aes-192", S390_FEAT_TYPE_KM, 19, "KM AES-192"),
+    FEAT_INIT("km-aes-256", S390_FEAT_TYPE_KM, 20, "KM AES-256"),
+    FEAT_INIT("km-eaes-128", S390_FEAT_TYPE_KM, 26, "KM Encrypted-AES-128"),
+    FEAT_INIT("km-eaes-192", S390_FEAT_TYPE_KM, 27, "KM Encrypted-AES-192"),
+    FEAT_INIT("km-eaes-256", S390_FEAT_TYPE_KM, 28, "KM Encrypted-AES-256"),
+    FEAT_INIT("km-xts-aes-128", S390_FEAT_TYPE_KM, 50, "KM XTS-AES-128"),
+    FEAT_INIT("km-xts-aes-256", S390_FEAT_TYPE_KM, 52, "KM XTS-AES-256"),
+    FEAT_INIT("km-xts-eaes-128", S390_FEAT_TYPE_KM, 58, "KM XTS-Encrypted-AES-128"),
+    FEAT_INIT("km-xts-eaes-256", S390_FEAT_TYPE_KM, 60, "KM XTS-Encrypted-AES-256"),
+
+    FEAT_INIT("kimd-sha-1", S390_FEAT_TYPE_KIMD, 1, "KIMD SHA-1"),
+    FEAT_INIT("kimd-sha-256", S390_FEAT_TYPE_KIMD, 2, "KIMD SHA-256"),
+    FEAT_INIT("kimd-sha-512", S390_FEAT_TYPE_KIMD, 3, "KIMD SHA-512"),
+    FEAT_INIT("kimd-ghash", S390_FEAT_TYPE_KIMD, 65, "KIMD GHASH"),
+    FEAT_INIT("klmd-sha-1", S390_FEAT_TYPE_KLMD, 1, "KLMD SHA-1"),
+    FEAT_INIT("klmd-sha-256", S390_FEAT_TYPE_KLMD, 2, "KLMD SHA-256"),
+    FEAT_INIT("klmd-sha-512", S390_FEAT_TYPE_KLMD, 3, "KLMD SHA-512"),
+
+    FEAT_INIT("pckmo-edea", S390_FEAT_TYPE_PCKMO, 1, "PCKMO Encrypted-DEA-Key"),
+    FEAT_INIT("pckmo-etdea-128", S390_FEAT_TYPE_PCKMO, 2, "PCKMO Encrypted-TDEA-128-Key"),
+    FEAT_INIT("pckmo-etdea-192", S390_FEAT_TYPE_PCKMO, 3, "PCKMO Encrypted-TDEA-192-Key"),
+    FEAT_INIT("pckmo-aes-128", S390_FEAT_TYPE_PCKMO, 18, "PCKMO Encrypted-AES-128-Key"),
+    FEAT_INIT("pckmo-aes-192", S390_FEAT_TYPE_PCKMO, 19, "PCKMO Encrypted-AES-192-Key"),
+    FEAT_INIT("pckmo-aes-256", S390_FEAT_TYPE_PCKMO, 20, "PCKMO Encrypted-AES-256-Key"),
+
+    FEAT_INIT("kmctr-dea", S390_FEAT_TYPE_KMCTR, 1, "KMCTR DEA"),
+    FEAT_INIT("kmctr-tdea-128", S390_FEAT_TYPE_KMCTR, 2, "KMCTR TDEA-128"),
+    FEAT_INIT("kmctr-tdea-192", S390_FEAT_TYPE_KMCTR, 3, "KMCTR TDEA-192"),
+    FEAT_INIT("kmctr-edea", S390_FEAT_TYPE_KMCTR, 9, "KMCTR Encrypted-DEA"),
+    FEAT_INIT("kmctr-etdea-128", S390_FEAT_TYPE_KMCTR, 10, "KMCTR Encrypted-TDEA-128"),
+    FEAT_INIT("kmctr-etdea-192", S390_FEAT_TYPE_KMCTR, 11, "KMCTR Encrypted-TDEA-192"),
+    FEAT_INIT("kmctr-aes-128", S390_FEAT_TYPE_KMCTR, 18, "KMCTR AES-128"),
+    FEAT_INIT("kmctr-aes-192", S390_FEAT_TYPE_KMCTR, 19, "KMCTR AES-192"),
+    FEAT_INIT("kmctr-aes-256", S390_FEAT_TYPE_KMCTR, 20, "KMCTR AES-256"),
+    FEAT_INIT("kmctr-eaes-128", S390_FEAT_TYPE_KMCTR, 26, "KMCTR Encrypted-AES-128"),
+    FEAT_INIT("kmctr-eaes-192", S390_FEAT_TYPE_KMCTR, 27, "KMCTR Encrypted-AES-192"),
+    FEAT_INIT("kmctr-eaes-256", S390_FEAT_TYPE_KMCTR, 28, "KMCTR Encrypted-AES-256"),
+
+    FEAT_INIT("kmf-dea", S390_FEAT_TYPE_KMF, 1, "KMF DEA"),
+    FEAT_INIT("kmf-tdea-128", S390_FEAT_TYPE_KMF, 2, "KMF TDEA-128"),
+    FEAT_INIT("kmf-tdea-192", S390_FEAT_TYPE_KMF, 3, "KMF TDEA-192"),
+    FEAT_INIT("kmf-edea", S390_FEAT_TYPE_KMF, 9, "KMF Encrypted-DEA"),
+    FEAT_INIT("kmf-etdea-128", S390_FEAT_TYPE_KMF, 10, "KMF Encrypted-TDEA-128"),
+    FEAT_INIT("kmf-etdea-192", S390_FEAT_TYPE_KMF, 11, "KMF Encrypted-TDEA-192"),
+    FEAT_INIT("kmf-aes-128", S390_FEAT_TYPE_KMF, 18, "KMF AES-128"),
+    FEAT_INIT("kmf-aes-192", S390_FEAT_TYPE_KMF, 19, "KMF AES-192"),
+    FEAT_INIT("kmf-aes-256", S390_FEAT_TYPE_KMF, 20, "KMF AES-256"),
+    FEAT_INIT("kmf-eaes-128", S390_FEAT_TYPE_KMF, 26, "KMF Encrypted-AES-128"),
+    FEAT_INIT("kmf-eaes-192", S390_FEAT_TYPE_KMF, 27, "KMF Encrypted-AES-192"),
+    FEAT_INIT("kmf-eaes-256", S390_FEAT_TYPE_KMF, 28, "KMF Encrypted-AES-256"),
+
+    FEAT_INIT("kmo-dea", S390_FEAT_TYPE_KMO, 1, "KMO DEA"),
+    FEAT_INIT("kmo-tdea-128", S390_FEAT_TYPE_KMO, 2, "KMO TDEA-128"),
+    FEAT_INIT("kmo-tdea-192", S390_FEAT_TYPE_KMO, 3, "KMO TDEA-192"),
+    FEAT_INIT("kmo-edea", S390_FEAT_TYPE_KMO, 9, "KMO Encrypted-DEA"),
+    FEAT_INIT("kmo-etdea-128", S390_FEAT_TYPE_KMO, 10, "KMO Encrypted-TDEA-128"),
+    FEAT_INIT("kmo-etdea-192", S390_FEAT_TYPE_KMO, 11, "KMO Encrypted-TDEA-192"),
+    FEAT_INIT("kmo-aes-128", S390_FEAT_TYPE_KMO, 18, "KMO AES-128"),
+    FEAT_INIT("kmo-aes-192", S390_FEAT_TYPE_KMO, 19, "KMO AES-192"),
+    FEAT_INIT("kmo-aes-256", S390_FEAT_TYPE_KMO, 20, "KMO AES-256"),
+    FEAT_INIT("kmo-eaes-128", S390_FEAT_TYPE_KMO, 26, "KMO Encrypted-AES-128"),
+    FEAT_INIT("kmo-eaes-192", S390_FEAT_TYPE_KMO, 27, "KMO Encrypted-AES-192"),
+    FEAT_INIT("kmo-eaes-256", S390_FEAT_TYPE_KMO, 28, "KMO Encrypted-AES-256"),
+
+    FEAT_INIT("pcc-cmac-dea", S390_FEAT_TYPE_PCC, 1, "PCC Compute-Last-Block-CMAC-Using-DEA"),
+    FEAT_INIT("pcc-cmac-tdea-128", S390_FEAT_TYPE_PCC, 2, "PCC Compute-Last-Block-CMAC-Using-TDEA-128"),
+    FEAT_INIT("pcc-cmac-tdea-192", S390_FEAT_TYPE_PCC, 3, "PCC Compute-Last-Block-CMAC-Using-TDEA-192"),
+    FEAT_INIT("pcc-cmac-edea", S390_FEAT_TYPE_PCC, 9, "PCC Compute-Last-Block-CMAC-Using-Encrypted-DEA"),
+    FEAT_INIT("pcc-cmac-etdea-128", S390_FEAT_TYPE_PCC, 10, "PCC Compute-Last-Block-CMAC-Using-Encrypted-TDEA-128"),
+    FEAT_INIT("pcc-cmac-etdea-192", S390_FEAT_TYPE_PCC, 11, "PCC Compute-Last-Block-CMAC-Using-EncryptedTDEA-192"),
+    FEAT_INIT("pcc-cmac-aes-128", S390_FEAT_TYPE_PCC, 18, "PCC Compute-Last-Block-CMAC-Using-AES-128"),
+    FEAT_INIT("pcc-cmac-aes-192", S390_FEAT_TYPE_PCC, 19, "PCC Compute-Last-Block-CMAC-Using-AES-192"),
+    FEAT_INIT("pcc-cmac-eaes-256", S390_FEAT_TYPE_PCC, 20, "PCC Compute-Last-Block-CMAC-Using-AES-256"),
+    FEAT_INIT("pcc-cmac-eaes-128", S390_FEAT_TYPE_PCC, 26, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-128"),
+    FEAT_INIT("pcc-cmac-eaes-192", S390_FEAT_TYPE_PCC, 27, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-192"),
+    FEAT_INIT("pcc-cmac-eaes-256", S390_FEAT_TYPE_PCC, 28, "PCC Compute-Last-Block-CMAC-Using-Encrypted-AES-256"),
+    FEAT_INIT("pcc-xts-aes-128", S390_FEAT_TYPE_PCC, 50, "PCC Compute-XTS-Parameter-Using-AES-128"),
+    FEAT_INIT("pcc-xts-aes-256", S390_FEAT_TYPE_PCC, 52, "PCC Compute-XTS-Parameter-Using-AES-256"),
+    FEAT_INIT("pcc-xts-eaes-128", S390_FEAT_TYPE_PCC, 58, "PCC Compute-XTS-Parameter-Using-Encrypted-AES-128"),
+    FEAT_INIT("pcc-xts-eaes-256", S390_FEAT_TYPE_PCC, 60, "PCC Compute-XTS-Parameter-Using-Encrypted-AES-256"),
+
+    FEAT_INIT("ppno-sha-512-drng", S390_FEAT_TYPE_PPNO, 3, "PPNO SHA-512-DRNG"),
+};
+
+const S390FeatDef *s390_feat_def(S390Feat feat)
+{
+    return &s390_features[feat];
+}
+
+S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit)
+{
+    S390Feat feat;
+
+    for (feat = 0; feat < ARRAY_SIZE(s390_features); feat++) {
+        if (s390_features[feat].type == type &&
+            s390_features[feat].bit == bit) {
+            return feat;
+        }
+    }
+    return S390_FEAT_MAX;
+}
+
+void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
+{
+    int i, j;
+
+    for (i = 0; i < (S390_FEAT_MAX / 64 + 1); i++) {
+        if (init[i]) {
+            for (j = 0; j < 64; j++) {
+                if (init[i] & 1ULL << j) {
+                    set_bit(i * 64 + j, bitmap);
+                }
+            }
+        }
+    }
+}
+
+void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+                          uint8_t *data)
+{
+    S390Feat feat;
+    int bit_nr;
+
+    if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
+        /* z/Architecture is always active if around */
+        data[0] |= 0x20;
+    }
+
+    feat = find_first_bit(features, S390_FEAT_MAX);
+    while (feat < S390_FEAT_MAX) {
+        if (s390_features[feat].type == type) {
+            bit_nr = s390_features[feat].bit;
+            /* big endian on uint8_t array */
+            data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
+        }
+        feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
+    }
+}
+
+void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
+                              uint8_t *data)
+{
+    int nr_bits, le_bit;
+
+    switch (type) {
+    case S390_FEAT_TYPE_STFL:
+       nr_bits = 2048;
+       break;
+    case S390_FEAT_TYPE_PLO:
+       nr_bits = 256;
+       break;
+    default:
+       /* all cpu subfunctions have 128 bit */
+       nr_bits = 128;
+    };
+
+    le_bit = find_first_bit((unsigned long *) data, nr_bits);
+    while (le_bit < nr_bits) {
+        /* convert the bit number to a big endian bit nr */
+        S390Feat feat = s390_feat_by_type_and_bit(type, BE_BIT_NR(le_bit));
+        /* ignore unknown bits */
+        if (feat < S390_FEAT_MAX) {
+            set_bit(feat, features);
+        }
+        le_bit = find_next_bit((unsigned long *) data, nr_bits, le_bit + 1);
+    }
+}
+
+void s390_feat_bitmap_to_ascii(const S390FeatBitmap features, void *opaque,
+                               void (*fn)(const char *name, void *opaque))
+{
+    S390FeatBitmap bitmap, tmp;
+    S390FeatGroup group;
+    S390Feat feat;
+
+    bitmap_copy(bitmap, features, S390_FEAT_MAX);
+
+    /* process whole groups first */
+    for (group = 0; group < S390_FEAT_GROUP_MAX; group++) {
+        const S390FeatGroupDef *def = s390_feat_group_def(group);
+
+        bitmap_and(tmp, bitmap, def->feat, S390_FEAT_MAX);
+        if (bitmap_equal(tmp, def->feat, S390_FEAT_MAX)) {
+            bitmap_andnot(bitmap, bitmap, def->feat, S390_FEAT_MAX);
+            fn(def->name, opaque);
+        }
+    }
+
+    /* report leftovers as separate features */
+    feat = find_first_bit(bitmap, S390_FEAT_MAX);
+    while (feat < S390_FEAT_MAX) {
+        fn(s390_feat_def(feat)->name, opaque);
+        feat = find_next_bit(bitmap, S390_FEAT_MAX, feat + 1);
+    };
+}
+
+#define FEAT_GROUP_INIT(_name, _group, _desc)        \
+    {                                                \
+        .name = _name,                               \
+        .desc = _desc,                               \
+        .init = { S390_FEAT_GROUP_LIST_ ## _group }, \
+    }
+
+/* indexed by feature group number for easy lookup */
+static S390FeatGroupDef s390_feature_groups[] = {
+    FEAT_GROUP_INIT("plo", PLO, "Perform-locked-operation facility"),
+    FEAT_GROUP_INIT("tods", TOD_CLOCK_STEERING, "Tod-clock-steering facility"),
+    FEAT_GROUP_INIT("gen13ptff", GEN13_PTFF, "PTFF enhancements introduced with z13"),
+    FEAT_GROUP_INIT("msa", MSA, "Message-security-assist facility"),
+    FEAT_GROUP_INIT("msa1", MSA_EXT_1, "Message-security-assist-extension 1 facility"),
+    FEAT_GROUP_INIT("msa2", MSA_EXT_2, "Message-security-assist-extension 2 facility"),
+    FEAT_GROUP_INIT("msa3", MSA_EXT_3, "Message-security-assist-extension 3 facility"),
+    FEAT_GROUP_INIT("msa4", MSA_EXT_4, "Message-security-assist-extension 4 facility"),
+    FEAT_GROUP_INIT("msa5", MSA_EXT_5, "Message-security-assist-extension 5 facility"),
+};
+
+const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup group)
+{
+    return &s390_feature_groups[group];
+}
+
+static void init_groups(void)
+{
+    int i;
+
+    /* init all bitmaps from gnerated data initially */
+    for (i = 0; i < ARRAY_SIZE(s390_feature_groups); i++) {
+        s390_init_feat_bitmap(s390_feature_groups[i].init,
+                              s390_feature_groups[i].feat);
+    }
+}
+
+type_init(init_groups)
diff --git a/target-s390x/cpu_features.h b/target-s390x/cpu_features.h
new file mode 100644
index 0000000000..d669121786
--- /dev/null
+++ b/target-s390x/cpu_features.h
@@ -0,0 +1,93 @@
+/*
+ * CPU features/facilities helper structs and utility functions for s390
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): Michael Mueller <mimu@linux.vnet.ibm.com>
+ *            David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef TARGET_S390X_CPU_FEATURES_H
+#define TARGET_S390X_CPU_FEATURES_H
+
+#include "qemu/bitmap.h"
+#include "cpu_features_def.h"
+
+/* CPU features are announced via different ways */
+typedef enum {
+    S390_FEAT_TYPE_STFL,
+    S390_FEAT_TYPE_SCLP_CONF_CHAR,
+    S390_FEAT_TYPE_SCLP_CONF_CHAR_EXT,
+    S390_FEAT_TYPE_SCLP_CPU,
+    S390_FEAT_TYPE_MISC,
+    S390_FEAT_TYPE_PLO,
+    S390_FEAT_TYPE_PTFF,
+    S390_FEAT_TYPE_KMAC,
+    S390_FEAT_TYPE_KMC,
+    S390_FEAT_TYPE_KM,
+    S390_FEAT_TYPE_KIMD,
+    S390_FEAT_TYPE_KLMD,
+    S390_FEAT_TYPE_PCKMO,
+    S390_FEAT_TYPE_KMCTR,
+    S390_FEAT_TYPE_KMF,
+    S390_FEAT_TYPE_KMO,
+    S390_FEAT_TYPE_PCC,
+    S390_FEAT_TYPE_PPNO,
+} S390FeatType;
+
+/* Definition of a CPU feature */
+typedef struct {
+    const char *name;       /* name exposed to the user */
+    const char *desc;       /* description exposed to the user */
+    S390FeatType type;      /* feature type (way of indication)*/
+    int bit;                /* bit within the feature type area (fixed) */
+} S390FeatDef;
+
+/* use ordinary bitmap operations to work with features */
+typedef unsigned long S390FeatBitmap[BITS_TO_LONGS(S390_FEAT_MAX)];
+
+/* 64bit based bitmap used to init S390FeatBitmap from generated data */
+typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
+
+const S390FeatDef *s390_feat_def(S390Feat feat);
+S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
+void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
+void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+                          uint8_t *data);
+void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
+                          uint8_t *data);
+void s390_feat_bitmap_to_ascii(const S390FeatBitmap features, void *opaque,
+                               void (*fn)(const char *name, void *opaque));
+
+/* static groups that will never change */
+typedef enum {
+    S390_FEAT_GROUP_PLO,
+    S390_FEAT_GROUP_TOD_CLOCK_STEERING,
+    S390_FEAT_GROUP_GEN13_PTFF_ENH,
+    S390_FEAT_GROUP_MSA,
+    S390_FEAT_GROUP_MSA_EXT_1,
+    S390_FEAT_GROUP_MSA_EXT_2,
+    S390_FEAT_GROUP_MSA_EXT_3,
+    S390_FEAT_GROUP_MSA_EXT_4,
+    S390_FEAT_GROUP_MSA_EXT_5,
+    S390_FEAT_GROUP_MAX,
+} S390FeatGroup;
+
+/* Definition of a CPU feature group */
+typedef struct {
+    const char *name;       /* name exposed to the user */
+    const char *desc;       /* description exposed to the user */
+    S390FeatBitmap feat;    /* features contained in the group */
+    S390FeatInit init;      /* used to init feat from generated data */
+} S390FeatGroupDef;
+
+const S390FeatGroupDef *s390_feat_group_def(S390FeatGroup group);
+
+#define BE_BIT_NR(BIT) (BIT ^ (BITS_PER_LONG - 1))
+#define BE_BIT(BIT) (1ULL < BE_BIT_NR(BIT))
+
+#endif /* TARGET_S390X_CPU_FEATURES_H */
diff --git a/target-s390x/cpu_features_def.h b/target-s390x/cpu_features_def.h
new file mode 100644
index 0000000000..aa5ab8d371
--- /dev/null
+++ b/target-s390x/cpu_features_def.h
@@ -0,0 +1,231 @@
+/*
+ * CPU features/facilities for s390
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): Michael Mueller <mimu@linux.vnet.ibm.com>
+ *            David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef TARGET_S390X_CPU_FEATURES_DEF_H
+#define TARGET_S390X_CPU_FEATURES_DEF_H
+
+typedef enum {
+    S390_FEAT_ESAN3 = 0,
+    S390_FEAT_ZARCH,
+    S390_FEAT_DAT_ENH,
+    S390_FEAT_IDTE_SEGMENT,
+    S390_FEAT_IDTE_REGION,
+    S390_FEAT_ASN_LX_REUSE,
+    S390_FEAT_STFLE,
+    S390_FEAT_EDAT,
+    S390_FEAT_SENSE_RUNNING_STATUS,
+    S390_FEAT_CONDITIONAL_SSKE,
+    S390_FEAT_CONFIGURATION_TOPOLOGY,
+    S390_FEAT_IPTE_RANGE,
+    S390_FEAT_NONQ_KEY_SETTING,
+    S390_FEAT_EXTENDED_TRANSLATION_2,
+    S390_FEAT_MSA,
+    S390_FEAT_LONG_DISPLACEMENT,
+    S390_FEAT_LONG_DISPLACEMENT_FAST,
+    S390_FEAT_HFP_MADDSUB,
+    S390_FEAT_EXTENDED_IMMEDIATE,
+    S390_FEAT_EXTENDED_TRANSLATION_3,
+    S390_FEAT_HFP_UNNORMALIZED_EXT,
+    S390_FEAT_ETF2_ENH,
+    S390_FEAT_STORE_CLOCK_FAST,
+    S390_FEAT_PARSING_ENH,
+    S390_FEAT_MOVE_WITH_OPTIONAL_SPEC,
+    S390_FEAT_TOD_CLOCK_STEERING,
+    S390_FEAT_ETF3_ENH,
+    S390_FEAT_EXTRACT_CPU_TIME,
+    S390_FEAT_COMPARE_AND_SWAP_AND_STORE,
+    S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2,
+    S390_FEAT_GENERAL_INSTRUCTIONS_EXT,
+    S390_FEAT_EXECUTE_EXT,
+    S390_FEAT_ENHANCED_MONITOR,
+    S390_FEAT_FLOATING_POINT_EXT,
+    S390_FEAT_SET_PROGRAM_PARAMETERS,
+    S390_FEAT_FLOATING_POINT_SUPPPORT_ENH,
+    S390_FEAT_DFP,
+    S390_FEAT_DFP_FAST,
+    S390_FEAT_PFPO,
+    S390_FEAT_STFLE_45,
+    S390_FEAT_CMPSC_ENH,
+    S390_FEAT_DFP_ZONED_CONVERSION,
+    S390_FEAT_STFLE_49,
+    S390_FEAT_CONSTRAINT_TRANSACTIONAL_EXE,
+    S390_FEAT_LOCAL_TLB_CLEARING,
+    S390_FEAT_INTERLOCKED_ACCESS_2,
+    S390_FEAT_STFLE_53,
+    S390_FEAT_MSA_EXT_5,
+    S390_FEAT_RUNTIME_INSTRUMENTATION,
+    S390_FEAT_TRANSACTIONAL_EXE,
+    S390_FEAT_STORE_HYPERVISOR_INFO,
+    S390_FEAT_ACCESS_EXCEPTION_FS_INDICATION,
+    S390_FEAT_MSA_EXT_3,
+    S390_FEAT_MSA_EXT_4,
+    S390_FEAT_EDAT_2,
+    S390_FEAT_DFP_PACKED_CONVERSION,
+    S390_FEAT_VECTOR,
+    S390_FEAT_SIE_GSLS,
+    S390_FEAT_ESOP,
+    S390_FEAT_SIE_64BSCAO,
+    S390_FEAT_SIE_CMMA,
+    S390_FEAT_SIE_PFMFI,
+    S390_FEAT_SIE_IBS,
+    S390_FEAT_SIE_F2,
+    S390_FEAT_SIE_SKEY,
+    S390_FEAT_SIE_GPERE,
+    S390_FEAT_SIE_SIIF,
+    S390_FEAT_SIE_SIGPIF,
+    S390_FEAT_SIE_IB,
+    S390_FEAT_SIE_CEI,
+    S390_FEAT_DAT_ENH_2,
+    S390_FEAT_CMM,
+    S390_FEAT_PLO_CL,
+    S390_FEAT_PLO_CLG,
+    S390_FEAT_PLO_CLGR,
+    S390_FEAT_PLO_CLX,
+    S390_FEAT_PLO_CS,
+    S390_FEAT_PLO_CSG,
+    S390_FEAT_PLO_CSGR,
+    S390_FEAT_PLO_CSX,
+    S390_FEAT_PLO_DCS,
+    S390_FEAT_PLO_DCSG,
+    S390_FEAT_PLO_DCSGR,
+    S390_FEAT_PLO_DCSX,
+    S390_FEAT_PLO_CSST,
+    S390_FEAT_PLO_CSSTG,
+    S390_FEAT_PLO_CSSTGR,
+    S390_FEAT_PLO_CSSTX,
+    S390_FEAT_PLO_CSDST,
+    S390_FEAT_PLO_CSDSTG,
+    S390_FEAT_PLO_CSDSTGR,
+    S390_FEAT_PLO_CSDSTX,
+    S390_FEAT_PLO_CSTST,
+    S390_FEAT_PLO_CSTSTG,
+    S390_FEAT_PLO_CSTSTGR,
+    S390_FEAT_PLO_CSTSTX,
+    S390_FEAT_PTFF_QTO,
+    S390_FEAT_PTFF_QSI,
+    S390_FEAT_PTFF_QPT,
+    S390_FEAT_PTFF_QUI,
+    S390_FEAT_PTFF_QTOU,
+    S390_FEAT_PTFF_STO,
+    S390_FEAT_PTFF_STOU,
+    S390_FEAT_KMAC_DEA,
+    S390_FEAT_KMAC_TDEA_128,
+    S390_FEAT_KMAC_TDEA_192,
+    S390_FEAT_KMAC_EDEA,
+    S390_FEAT_KMAC_ETDEA_128,
+    S390_FEAT_KMAC_ETDEA_192,
+    S390_FEAT_KMAC_AES_128,
+    S390_FEAT_KMAC_AES_192,
+    S390_FEAT_KMAC_AES_256,
+    S390_FEAT_KMAC_EAES_128,
+    S390_FEAT_KMAC_EAES_192,
+    S390_FEAT_KMAC_EAES_256,
+    S390_FEAT_KMC_DEA,
+    S390_FEAT_KMC_TDEA_128,
+    S390_FEAT_KMC_TDEA_192,
+    S390_FEAT_KMC_EDEA,
+    S390_FEAT_KMC_ETDEA_128,
+    S390_FEAT_KMC_ETDEA_192,
+    S390_FEAT_KMC_AES_128,
+    S390_FEAT_KMC_AES_192,
+    S390_FEAT_KMC_AES_256,
+    S390_FEAT_KMC_EAES_128,
+    S390_FEAT_KMC_EAES_192,
+    S390_FEAT_KMC_EAES_256,
+    S390_FEAT_KMC_PRNG,
+    S390_FEAT_KM_DEA,
+    S390_FEAT_KM_TDEA_128,
+    S390_FEAT_KM_TDEA_192,
+    S390_FEAT_KM_EDEA,
+    S390_FEAT_KM_ETDEA_128,
+    S390_FEAT_KM_ETDEA_192,
+    S390_FEAT_KM_AES_128,
+    S390_FEAT_KM_AES_192,
+    S390_FEAT_KM_AES_256,
+    S390_FEAT_KM_EAES_128,
+    S390_FEAT_KM_EAES_192,
+    S390_FEAT_KM_EAES_256,
+    S390_FEAT_KM_XTS_AES_128,
+    S390_FEAT_KM_XTS_AES_256,
+    S390_FEAT_KM_XTS_EAES_128,
+    S390_FEAT_KM_XTS_EAES_256,
+    S390_FEAT_KIMD_SHA_1,
+    S390_FEAT_KIMD_SHA_256,
+    S390_FEAT_KIMD_SHA_512,
+    S390_FEAT_KIMD_GHASH,
+    S390_FEAT_KLMD_SHA_1,
+    S390_FEAT_KLMD_SHA_256,
+    S390_FEAT_KLMD_SHA_512,
+    S390_FEAT_PCKMO_EDEA,
+    S390_FEAT_PCKMO_ETDEA_128,
+    S390_FEAT_PCKMO_ETDEA_256,
+    S390_FEAT_PCKMO_AES_128,
+    S390_FEAT_PCKMO_AES_192,
+    S390_FEAT_PCKMO_AES_256,
+    S390_FEAT_KMCTR_DEA,
+    S390_FEAT_KMCTR_TDEA_128,
+    S390_FEAT_KMCTR_TDEA_192,
+    S390_FEAT_KMCTR_EDEA,
+    S390_FEAT_KMCTR_ETDEA_128,
+    S390_FEAT_KMCTR_ETDEA_192,
+    S390_FEAT_KMCTR_AES_128,
+    S390_FEAT_KMCTR_AES_192,
+    S390_FEAT_KMCTR_AES_256,
+    S390_FEAT_KMCTR_EAES_128,
+    S390_FEAT_KMCTR_EAES_192,
+    S390_FEAT_KMCTR_EAES_256,
+    S390_FEAT_KMF_DEA,
+    S390_FEAT_KMF_TDEA_128,
+    S390_FEAT_KMF_TDEA_192,
+    S390_FEAT_KMF_EDEA,
+    S390_FEAT_KMF_ETDEA_128,
+    S390_FEAT_KMF_ETDEA_192,
+    S390_FEAT_KMF_AES_128,
+    S390_FEAT_KMF_AES_192,
+    S390_FEAT_KMF_AES_256,
+    S390_FEAT_KMF_EAES_128,
+    S390_FEAT_KMF_EAES_192,
+    S390_FEAT_KMF_EAES_256,
+    S390_FEAT_KMO_DEA,
+    S390_FEAT_KMO_TDEA_128,
+    S390_FEAT_KMO_TDEA_192,
+    S390_FEAT_KMO_EDEA,
+    S390_FEAT_KMO_ETDEA_128,
+    S390_FEAT_KMO_ETDEA_192,
+    S390_FEAT_KMO_AES_128,
+    S390_FEAT_KMO_AES_192,
+    S390_FEAT_KMO_AES_256,
+    S390_FEAT_KMO_EAES_128,
+    S390_FEAT_KMO_EAES_192,
+    S390_FEAT_KMO_EAES_256,
+    S390_FEAT_PCC_CMAC_DEA,
+    S390_FEAT_PCC_CMAC_TDEA_128,
+    S390_FEAT_PCC_CMAC_TDEA_192,
+    S390_FEAT_PCC_CMAC_ETDEA_128,
+    S390_FEAT_PCC_CMAC_ETDEA_192,
+    S390_FEAT_PCC_CMAC_TDEA,
+    S390_FEAT_PCC_CMAC_AES_128,
+    S390_FEAT_PCC_CMAC_AES_192,
+    S390_FEAT_PCC_CMAC_AES_256,
+    S390_FEAT_PCC_CMAC_EAES_128,
+    S390_FEAT_PCC_CMAC_EAES_192,
+    S390_FEAT_PCC_CMAC_EAES_256,
+    S390_FEAT_PCC_XTS_AES_128,
+    S390_FEAT_PCC_XTS_AES_256,
+    S390_FEAT_PCC_XTS_EAES_128,
+    S390_FEAT_PCC_XTS_EAES_256,
+    S390_FEAT_PPNO_SHA_512_DRNG,
+    S390_FEAT_MAX,
+} S390Feat;
+
+#endif /* TARGET_S390X_CPU_FEATURES_DEF_H */
diff --git a/target-s390x/cpu_models.c b/target-s390x/cpu_models.c
new file mode 100644
index 0000000000..c1e729df5e
--- /dev/null
+++ b/target-s390x/cpu_models.c
@@ -0,0 +1,1100 @@
+/*
+ * CPU models for s390x
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "gen-features.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qmp/qbool.h"
+#ifndef CONFIG_USER_ONLY
+#include "sysemu/arch_init.h"
+#endif
+
+#define CPUDEF_INIT(_type, _gen, _ec_ga, _mha_pow, _hmfai, _name, _desc) \
+    {                                                                    \
+        .name = _name,                                                   \
+        .type = _type,                                                   \
+        .gen = _gen,                                                     \
+        .ec_ga = _ec_ga,                                                 \
+        .mha_pow = _mha_pow,                                             \
+        .hmfai = _hmfai,                                                 \
+        .desc = _desc,                                                   \
+        .base_init = { S390_FEAT_LIST_GEN ## _gen ## _GA ## _ec_ga ## _BASE },  \
+        .default_init = { S390_FEAT_LIST_GEN ## _gen ## _GA ## _ec_ga ## _DEFAULT },  \
+        .full_init = { S390_FEAT_LIST_GEN ## _gen ## _GA ## _ec_ga ## _FULL },  \
+    }
+
+/*
+ * CPU definiton list in order of release. For now, base features of a
+ * following release are always a subset of base features of the previous
+ * release. Same is correct for the other feature sets.
+ * A BC release always follows the corresponding EC release.
+ */
+static S390CPUDef s390_cpu_defs[] = {
+    CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"),
+    CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"),
+    CPUDEF_INIT(0x2064, 7, 3, 38, 0x00000000U, "z900.3", "IBM zSeries 900 GA3"),
+    CPUDEF_INIT(0x2066, 7, 3, 38, 0x00000000U, "z800", "IBM zSeries 800 GA1"),
+    CPUDEF_INIT(0x2084, 8, 1, 38, 0x00000000U, "z990", "IBM zSeries 990 GA1"),
+    CPUDEF_INIT(0x2084, 8, 2, 38, 0x00000000U, "z990.2", "IBM zSeries 990 GA2"),
+    CPUDEF_INIT(0x2084, 8, 3, 38, 0x00000000U, "z990.3", "IBM zSeries 990 GA3"),
+    CPUDEF_INIT(0x2086, 8, 3, 38, 0x00000000U, "z890", "IBM zSeries 880 GA1"),
+    CPUDEF_INIT(0x2084, 8, 4, 38, 0x00000000U, "z990.4", "IBM zSeries 990 GA4"),
+    CPUDEF_INIT(0x2086, 8, 4, 38, 0x00000000U, "z890.2", "IBM zSeries 880 GA2"),
+    CPUDEF_INIT(0x2084, 8, 5, 38, 0x00000000U, "z990.5", "IBM zSeries 990 GA5"),
+    CPUDEF_INIT(0x2086, 8, 5, 38, 0x00000000U, "z890.3", "IBM zSeries 880 GA3"),
+    CPUDEF_INIT(0x2094, 9, 1, 40, 0x00000000U, "z9EC", "IBM System z9 EC GA1"),
+    CPUDEF_INIT(0x2094, 9, 2, 40, 0x00000000U, "z9EC.2", "IBM System z9 EC GA2"),
+    CPUDEF_INIT(0x2096, 9, 2, 40, 0x00000000U, "z9BC", "IBM System z9 BC GA1"),
+    CPUDEF_INIT(0x2094, 9, 3, 40, 0x00000000U, "z9EC.3", "IBM System z9 EC GA3"),
+    CPUDEF_INIT(0x2096, 9, 3, 40, 0x00000000U, "z9BC.2", "IBM System z9 BC GA2"),
+    CPUDEF_INIT(0x2097, 10, 1, 43, 0x00000000U, "z10EC", "IBM System z10 EC GA1"),
+    CPUDEF_INIT(0x2097, 10, 2, 43, 0x00000000U, "z10EC.2", "IBM System z10 EC GA2"),
+    CPUDEF_INIT(0x2098, 10, 2, 43, 0x00000000U, "z10BC", "IBM System z10 BC GA1"),
+    CPUDEF_INIT(0x2097, 10, 3, 43, 0x00000000U, "z10EC.3", "IBM System z10 EC GA3"),
+    CPUDEF_INIT(0x2098, 10, 3, 43, 0x00000000U, "z10BC.2", "IBM System z10 BC GA2"),
+    CPUDEF_INIT(0x2817, 11, 1, 44, 0x08000000U, "z196", "IBM zEnterprise 196 GA1"),
+    CPUDEF_INIT(0x2817, 11, 2, 44, 0x08000000U, "z196.2", "IBM zEnterprise 196 GA2"),
+    CPUDEF_INIT(0x2818, 11, 2, 44, 0x08000000U, "z114", "IBM zEnterprise 114 GA1"),
+    CPUDEF_INIT(0x2827, 12, 1, 44, 0x08000000U, "zEC12", "IBM zEnterprise EC12 GA1"),
+    CPUDEF_INIT(0x2827, 12, 2, 44, 0x08000000U, "zEC12.2", "IBM zEnterprise EC12 GA2"),
+    CPUDEF_INIT(0x2828, 12, 2, 44, 0x08000000U, "zBC12", "IBM zEnterprise BC12 GA1"),
+    CPUDEF_INIT(0x2964, 13, 1, 47, 0x08000000U, "z13", "IBM z13 GA1"),
+    CPUDEF_INIT(0x2964, 13, 2, 47, 0x08000000U, "z13.2", "IBM z13 GA2"),
+    CPUDEF_INIT(0x2965, 13, 2, 47, 0x08000000U, "z13s", "IBM z13s GA1"),
+};
+
+uint32_t s390_get_hmfai(void)
+{
+    static S390CPU *cpu;
+
+    if (!cpu) {
+        cpu = S390_CPU(qemu_get_cpu(0));
+    }
+
+    if (!cpu || !cpu->model) {
+        return 0;
+    }
+    return cpu->model->def->hmfai;
+}
+
+uint8_t s390_get_mha_pow(void)
+{
+    static S390CPU *cpu;
+
+    if (!cpu) {
+        cpu = S390_CPU(qemu_get_cpu(0));
+    }
+
+    if (!cpu || !cpu->model) {
+        return 0;
+    }
+    return cpu->model->def->mha_pow;
+}
+
+uint32_t s390_get_ibc_val(void)
+{
+    uint16_t unblocked_ibc, lowest_ibc;
+    static S390CPU *cpu;
+
+    if (!cpu) {
+        cpu = S390_CPU(qemu_get_cpu(0));
+    }
+
+    if (!cpu || !cpu->model) {
+        return 0;
+    }
+    unblocked_ibc = s390_ibc_from_cpu_model(cpu->model);
+    lowest_ibc = cpu->model->lowest_ibc;
+    /* the lowest_ibc always has to be <= unblocked_ibc */
+    if (!lowest_ibc || lowest_ibc > unblocked_ibc) {
+        return 0;
+    }
+    return ((uint32_t) lowest_ibc << 16) | unblocked_ibc;
+}
+
+void s390_get_feat_block(S390FeatType type, uint8_t *data)
+{
+    static S390CPU *cpu;
+
+    if (!cpu) {
+        cpu = S390_CPU(qemu_get_cpu(0));
+    }
+
+    if (!cpu || !cpu->model) {
+        return;
+    }
+    s390_fill_feat_block(cpu->model->features, type, data);
+}
+
+bool s390_has_feat(S390Feat feat)
+{
+    static S390CPU *cpu;
+
+    if (!cpu) {
+        cpu = S390_CPU(qemu_get_cpu(0));
+    }
+
+    if (!cpu || !cpu->model) {
+#ifdef CONFIG_KVM
+        if (kvm_enabled()) {
+            if (feat == S390_FEAT_VECTOR) {
+                return kvm_check_extension(kvm_state,
+                                           KVM_CAP_S390_VECTOR_REGISTERS);
+            }
+            if (feat == S390_FEAT_RUNTIME_INSTRUMENTATION) {
+                return kvm_s390_get_ri();
+            }
+            if (feat == S390_FEAT_MSA_EXT_3) {
+                return true;
+            }
+        }
+#endif
+        return 0;
+    }
+    return test_bit(feat, cpu->model->features);
+}
+
+uint8_t s390_get_gen_for_cpu_type(uint16_t type)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) {
+        if (s390_cpu_defs[i].type == type) {
+            return s390_cpu_defs[i].gen;
+        }
+    }
+    return 0;
+}
+
+const S390CPUDef *s390_find_cpu_def(uint16_t type, uint8_t gen, uint8_t ec_ga,
+                                    S390FeatBitmap features)
+{
+    const S390CPUDef *last_compatible = NULL;
+    int i;
+
+    if (!gen) {
+        ec_ga = 0;
+    }
+    if (!gen && type) {
+        gen = s390_get_gen_for_cpu_type(type);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) {
+        const S390CPUDef *def = &s390_cpu_defs[i];
+        S390FeatBitmap missing;
+
+        /* don't even try newer generations if we know the generation */
+        if (gen) {
+            if (def->gen > gen) {
+                break;
+            } else if (def->gen == gen && ec_ga && def->ec_ga > ec_ga) {
+                break;
+            }
+        }
+
+        if (features) {
+            /* see if the model satisfies the minimum features */
+            bitmap_andnot(missing, def->base_feat, features, S390_FEAT_MAX);
+            if (!bitmap_empty(missing, S390_FEAT_MAX)) {
+                break;
+            }
+        }
+
+        /* stop the search if we found the exact model */
+        if (def->type == type && def->ec_ga == ec_ga) {
+            return def;
+        }
+        last_compatible = def;
+    }
+    return last_compatible;
+}
+
+struct S390PrintCpuListInfo {
+    FILE *f;
+    fprintf_function print;
+};
+
+static void print_cpu_model_list(ObjectClass *klass, void *opaque)
+{
+    struct S390PrintCpuListInfo *info = opaque;
+    S390CPUClass *scc = S390_CPU_CLASS(klass);
+    char *name = g_strdup(object_class_get_name(klass));
+    const char *details = "";
+
+    if (scc->is_static) {
+        details = "(static, migration-safe)";
+    } else if (scc->is_migration_safe) {
+        details = "(migration-safe)";
+    }
+
+    /* strip off the -s390-cpu */
+    g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0;
+    (*info->print)(info->f, "s390 %-15s %-35s %s\n", name, scc->desc,
+                   details);
+    g_free(name);
+}
+
+void s390_cpu_list(FILE *f, fprintf_function print)
+{
+    struct S390PrintCpuListInfo info = {
+        .f = f,
+        .print = print,
+    };
+    S390FeatGroup group;
+    S390Feat feat;
+
+    object_class_foreach(print_cpu_model_list, TYPE_S390_CPU, false, &info);
+
+    (*print)(f, "\nRecognized feature flags:\n");
+    for (feat = 0; feat < S390_FEAT_MAX; feat++) {
+        const S390FeatDef *def = s390_feat_def(feat);
+
+        (*print)(f, "%-20s %-50s\n", def->name, def->desc);
+    }
+
+    (*print)(f, "\nRecognized feature groups:\n");
+    for (group = 0; group < S390_FEAT_GROUP_MAX; group++) {
+        const S390FeatGroupDef *def = s390_feat_group_def(group);
+
+        (*print)(f, "%-20s %-50s\n", def->name, def->desc);
+    }
+}
+
+#ifndef CONFIG_USER_ONLY
+static void create_cpu_model_list(ObjectClass *klass, void *opaque)
+{
+    CpuDefinitionInfoList **cpu_list = opaque;
+    CpuDefinitionInfoList *entry;
+    CpuDefinitionInfo *info;
+    char *name = g_strdup(object_class_get_name(klass));
+    S390CPUClass *scc = S390_CPU_CLASS(klass);
+
+    /* strip off the -s390-cpu */
+    g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0;
+    info = g_malloc0(sizeof(*info));
+    info->name = name;
+    info->has_migration_safe = true;
+    info->migration_safe = scc->is_migration_safe;
+    info->q_static = scc->is_static;
+
+
+    entry = g_malloc0(sizeof(*entry));
+    entry->value = info;
+    entry->next = *cpu_list;
+    *cpu_list = entry;
+}
+
+CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+{
+    CpuDefinitionInfoList *list = NULL;
+
+    object_class_foreach(create_cpu_model_list, TYPE_S390_CPU, false, &list);
+
+    return list;
+}
+
+static void cpu_model_from_info(S390CPUModel *model, const CpuModelInfo *info,
+                                Error **errp)
+{
+    const QDict *qdict = NULL;
+    const QDictEntry *e;
+    Visitor *visitor;
+    ObjectClass *oc;
+    S390CPU *cpu;
+    Object *obj;
+
+    if (info->props) {
+        qdict = qobject_to_qdict(info->props);
+        if (!qdict) {
+            error_setg(errp, QERR_INVALID_PARAMETER_TYPE, "props", "dict");
+            return;
+        }
+    }
+
+    oc = cpu_class_by_name(TYPE_S390_CPU, info->name);
+    if (!oc) {
+        error_setg(errp, "The CPU definition \'%s\' is unknown.", info->name);
+        return;
+    }
+    if (S390_CPU_CLASS(oc)->kvm_required && !kvm_enabled()) {
+        error_setg(errp, "The CPU definition '%s' requires KVM", info->name);
+        return;
+    }
+    obj = object_new(object_class_get_name(oc));
+    cpu = S390_CPU(obj);
+
+    if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "it cannot be used.");
+        object_unref(obj);
+        return;
+    }
+
+    if (qdict) {
+        visitor = qobject_input_visitor_new(info->props, true);
+        visit_start_struct(visitor, NULL, NULL, 0, errp);
+        if (*errp) {
+            object_unref(obj);
+            return;
+        }
+        for (e = qdict_first(qdict); e; e = qdict_next(qdict, e)) {
+            object_property_set(obj, visitor, e->key, errp);
+            if (*errp) {
+                break;
+            }
+        }
+        if (!*errp) {
+            visit_check_struct(visitor, errp);
+        }
+        visit_end_struct(visitor, NULL);
+        visit_free(visitor);
+        if (*errp) {
+            object_unref(obj);
+            return;
+        }
+    }
+
+    /* copy the model and throw the cpu away */
+    memcpy(model, cpu->model, sizeof(*model));
+    object_unref(obj);
+}
+
+static void qdict_add_disabled_feat(const char *name, void *opaque)
+{
+    qdict_put((QDict *) opaque, name, qbool_from_bool(false));
+}
+
+static void qdict_add_enabled_feat(const char *name, void *opaque)
+{
+    qdict_put((QDict *) opaque, name, qbool_from_bool(true));
+}
+
+/* convert S390CPUDef into a static CpuModelInfo */
+static void cpu_info_from_model(CpuModelInfo *info, const S390CPUModel *model,
+                                bool delta_changes)
+{
+    QDict *qdict = qdict_new();
+    S390FeatBitmap bitmap;
+
+    /* always fallback to the static base model */
+    info->name = g_strdup_printf("%s-base", model->def->name);
+
+    if (delta_changes) {
+        /* features deleted from the base feature set */
+        bitmap_andnot(bitmap, model->def->base_feat, model->features,
+                      S390_FEAT_MAX);
+        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
+            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
+        }
+
+        /* features added to the base feature set */
+        bitmap_andnot(bitmap, model->features, model->def->base_feat,
+                      S390_FEAT_MAX);
+        if (!bitmap_empty(bitmap, S390_FEAT_MAX)) {
+            s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_enabled_feat);
+        }
+    } else {
+        /* expand all features */
+        s390_feat_bitmap_to_ascii(model->features, qdict,
+                                  qdict_add_enabled_feat);
+        bitmap_complement(bitmap, model->features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(bitmap, qdict, qdict_add_disabled_feat);
+    }
+
+    if (!qdict_size(qdict)) {
+        QDECREF(qdict);
+    } else {
+        info->props = QOBJECT(qdict);
+        info->has_props = true;
+    }
+}
+
+CpuModelExpansionInfo *arch_query_cpu_model_expansion(CpuModelExpansionType type,
+                                                      CpuModelInfo *model,
+                                                      Error **errp)
+{
+    CpuModelExpansionInfo *expansion_info = NULL;
+    S390CPUModel s390_model;
+    bool delta_changes = false;
+
+    /* convert it to our internal representation */
+    cpu_model_from_info(&s390_model, model, errp);
+    if (*errp) {
+        return NULL;
+    }
+
+    if (type == CPU_MODEL_EXPANSION_TYPE_STATIC) {
+        delta_changes = true;
+    } else if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
+        error_setg(errp, "The requested expansion type is not supported.");
+        return NULL;
+    }
+
+    /* convert it back to a static representation */
+    expansion_info = g_malloc0(sizeof(*expansion_info));
+    expansion_info->model = g_malloc0(sizeof(*expansion_info->model));
+    cpu_info_from_model(expansion_info->model, &s390_model, delta_changes);
+    return expansion_info;
+}
+
+static void list_add_feat(const char *name, void *opaque)
+{
+    strList **last = (strList **) opaque;
+    strList *entry;
+
+    entry = g_malloc0(sizeof(*entry));
+    entry->value = g_strdup(name);
+    entry->next = *last;
+    *last = entry;
+}
+
+CpuModelCompareInfo *arch_query_cpu_model_comparison(CpuModelInfo *infoa,
+                                                     CpuModelInfo *infob,
+                                                     Error **errp)
+{
+    CpuModelCompareResult feat_result, gen_result;
+    CpuModelCompareInfo *compare_info;
+    S390FeatBitmap missing, added;
+    S390CPUModel modela, modelb;
+
+    /* convert both models to our internal representation */
+    cpu_model_from_info(&modela, infoa, errp);
+    if (*errp) {
+        return NULL;
+    }
+    cpu_model_from_info(&modelb, infob, errp);
+    if (*errp) {
+        return NULL;
+    }
+    compare_info = g_malloc0(sizeof(*compare_info));
+
+    /* check the cpu generation and ga level */
+    if (modela.def->gen == modelb.def->gen) {
+        if (modela.def->ec_ga == modelb.def->ec_ga) {
+            /* ec and corresponding bc are identical */
+            gen_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
+        } else if (modela.def->ec_ga < modelb.def->ec_ga) {
+            gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+        } else {
+            gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+        }
+    } else if (modela.def->gen < modelb.def->gen) {
+        gen_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+    } else {
+        gen_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+    }
+    if (gen_result != CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        /* both models cannot be made identical */
+        list_add_feat("type", &compare_info->responsible_properties);
+    }
+
+    /* check the feature set */
+    if (bitmap_equal(modela.features, modelb.features, S390_FEAT_MAX)) {
+        feat_result = CPU_MODEL_COMPARE_RESULT_IDENTICAL;
+    } else {
+        bitmap_andnot(missing, modela.features, modelb.features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(missing,
+                                  &compare_info->responsible_properties,
+                                  list_add_feat);
+        bitmap_andnot(added, modelb.features, modela.features, S390_FEAT_MAX);
+        s390_feat_bitmap_to_ascii(added, &compare_info->responsible_properties,
+                                  list_add_feat);
+        if (bitmap_empty(missing, S390_FEAT_MAX)) {
+            feat_result = CPU_MODEL_COMPARE_RESULT_SUBSET;
+        } else if (bitmap_empty(added, S390_FEAT_MAX)) {
+            feat_result = CPU_MODEL_COMPARE_RESULT_SUPERSET;
+        } else {
+            feat_result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
+        }
+    }
+
+    /* combine the results */
+    if (gen_result == feat_result) {
+        compare_info->result = gen_result;
+    } else if (feat_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        compare_info->result = gen_result;
+    } else if (gen_result == CPU_MODEL_COMPARE_RESULT_IDENTICAL) {
+        compare_info->result = feat_result;
+    } else {
+        compare_info->result = CPU_MODEL_COMPARE_RESULT_INCOMPATIBLE;
+    }
+    return compare_info;
+}
+
+CpuModelBaselineInfo *arch_query_cpu_model_baseline(CpuModelInfo *infoa,
+                                                    CpuModelInfo *infob,
+                                                    Error **errp)
+{
+    CpuModelBaselineInfo *baseline_info;
+    S390CPUModel modela, modelb, model;
+    uint16_t cpu_type;
+    uint8_t max_gen_ga;
+    uint8_t max_gen;
+
+    /* convert both models to our internal representation */
+    cpu_model_from_info(&modela, infoa, errp);
+    if (*errp) {
+        return NULL;
+    }
+
+    cpu_model_from_info(&modelb, infob, errp);
+    if (*errp) {
+        return NULL;
+    }
+
+    /* features both models support */
+    bitmap_and(model.features, modela.features, modelb.features, S390_FEAT_MAX);
+
+    /* detect the maximum model not regarding features */
+    if (modela.def->gen == modelb.def->gen) {
+        if (modela.def->type == modelb.def->type) {
+            cpu_type = modela.def->type;
+        } else {
+            cpu_type = 0;
+        }
+        max_gen = modela.def->gen;
+        max_gen_ga = MIN(modela.def->ec_ga, modelb.def->ec_ga);
+    } else if (modela.def->gen > modelb.def->gen) {
+        cpu_type = modelb.def->type;
+        max_gen = modelb.def->gen;
+        max_gen_ga = modelb.def->ec_ga;
+    } else {
+        cpu_type = modela.def->type;
+        max_gen = modela.def->gen;
+        max_gen_ga = modela.def->ec_ga;
+    }
+
+    model.def = s390_find_cpu_def(cpu_type, max_gen, max_gen_ga,
+                                  model.features);
+    /* strip off features not part of the max model */
+    bitmap_and(model.features, model.features, model.def->full_feat,
+               S390_FEAT_MAX);
+
+    baseline_info = g_malloc0(sizeof(*baseline_info));
+    baseline_info->model = g_malloc0(sizeof(*baseline_info->model));
+    cpu_info_from_model(baseline_info->model, &model, true);
+    return baseline_info;
+}
+#endif
+
+static void check_consistency(const S390CPUModel *model)
+{
+    static int dep[][2] = {
+        { S390_FEAT_IPTE_RANGE, S390_FEAT_DAT_ENH },
+        { S390_FEAT_IDTE_SEGMENT, S390_FEAT_DAT_ENH },
+        { S390_FEAT_IDTE_REGION, S390_FEAT_DAT_ENH },
+        { S390_FEAT_IDTE_REGION, S390_FEAT_IDTE_SEGMENT },
+        { S390_FEAT_LOCAL_TLB_CLEARING, S390_FEAT_DAT_ENH},
+        { S390_FEAT_LONG_DISPLACEMENT_FAST, S390_FEAT_LONG_DISPLACEMENT },
+        { S390_FEAT_DFP_FAST, S390_FEAT_DFP },
+        { S390_FEAT_TRANSACTIONAL_EXE, S390_FEAT_STFLE_49 },
+        { S390_FEAT_EDAT_2, S390_FEAT_EDAT},
+        { S390_FEAT_MSA_EXT_5, S390_FEAT_KIMD_SHA_512 },
+        { S390_FEAT_MSA_EXT_5, S390_FEAT_KLMD_SHA_512 },
+        { S390_FEAT_MSA_EXT_4, S390_FEAT_MSA_EXT_3 },
+        { S390_FEAT_SIE_CMMA, S390_FEAT_CMM },
+        { S390_FEAT_SIE_CMMA, S390_FEAT_SIE_GSLS },
+        { S390_FEAT_SIE_PFMFI, S390_FEAT_EDAT },
+    };
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(dep); i++) {
+        if (test_bit(dep[i][0], model->features) &&
+            !test_bit(dep[i][1], model->features)) {
+            error_report("Warning: \'%s\' requires \'%s\'.",
+                         s390_feat_def(dep[i][0])->name,
+                         s390_feat_def(dep[i][1])->name);
+        }
+    }
+}
+
+static void error_prepend_missing_feat(const char *name, void *opaque)
+{
+    error_prepend((Error **) opaque, "%s ", name);
+}
+
+static void check_compatibility(const S390CPUModel *max_model,
+                                const S390CPUModel *model, Error **errp)
+{
+    S390FeatBitmap missing;
+
+    if (model->def->gen > max_model->def->gen) {
+        error_setg(errp, "Selected CPU generation is too new. Maximum "
+                   "supported model in the configuration: \'%s\'",
+                   max_model->def->name);
+        return;
+    } else if (model->def->gen == max_model->def->gen &&
+               model->def->ec_ga > max_model->def->ec_ga) {
+        error_setg(errp, "Selected CPU GA level is too new. Maximum "
+                   "supported model in the configuration: \'%s\'",
+                   max_model->def->name);
+        return;
+    }
+
+    /* detect the missing features to properly report them */
+    bitmap_andnot(missing, model->features, max_model->features, S390_FEAT_MAX);
+    if (bitmap_empty(missing, S390_FEAT_MAX)) {
+        return;
+    }
+
+    error_setg(errp, " ");
+    s390_feat_bitmap_to_ascii(missing, errp, error_prepend_missing_feat);
+    error_prepend(errp, "Some features requested in the CPU model are not "
+                  "available in the configuration: ");
+}
+
+static S390CPUModel *get_max_cpu_model(Error **errp)
+{
+#ifndef CONFIG_USER_ONLY
+    static S390CPUModel max_model;
+    static bool cached;
+
+    if (cached) {
+        return &max_model;
+    }
+
+    if (kvm_enabled()) {
+        kvm_s390_get_host_cpu_model(&max_model, errp);
+    } else {
+        /* TCG enulates a z900 */
+        max_model.def = &s390_cpu_defs[0];
+        bitmap_copy(max_model.features, max_model.def->default_feat,
+                    S390_FEAT_MAX);
+    }
+    if (!*errp) {
+        cached = true;
+        return &max_model;
+    }
+#endif
+    return NULL;
+}
+
+static inline void apply_cpu_model(const S390CPUModel *model, Error **errp)
+{
+#ifndef CONFIG_USER_ONLY
+    static S390CPUModel applied_model;
+    static bool applied;
+
+    /*
+     * We have the same model for all VCPUs. KVM can only be configured before
+     * any VCPUs are defined in KVM.
+     */
+    if (applied) {
+        if (model && memcmp(&applied_model, model, sizeof(S390CPUModel))) {
+            error_setg(errp, "Mixed CPU models are not supported on s390x.");
+        }
+        return;
+    }
+
+    if (kvm_enabled()) {
+        kvm_s390_apply_cpu_model(model, errp);
+    } else if (model) {
+        /* FIXME TCG - use data for stdip/stfl */
+    }
+
+    if (!*errp) {
+        applied = true;
+        if (model) {
+            applied_model = *model;
+        }
+    }
+#endif
+}
+
+void s390_realize_cpu_model(CPUState *cs, Error **errp)
+{
+    S390CPUClass *xcc = S390_CPU_GET_CLASS(cs);
+    S390CPU *cpu = S390_CPU(cs);
+    const S390CPUModel *max_model;
+
+    if (xcc->kvm_required && !kvm_enabled()) {
+        error_setg(errp, "CPU definition requires KVM");
+        return;
+    }
+
+    if (!cpu->model) {
+        /* no host model support -> perform compatibility stuff */
+        apply_cpu_model(NULL, errp);
+        return;
+    }
+
+    max_model = get_max_cpu_model(errp);
+    if (*errp) {
+        error_prepend(errp, "CPU models are not available: ");
+        return;
+    }
+
+    /* copy over properties that can vary */
+    cpu->model->lowest_ibc = max_model->lowest_ibc;
+    cpu->model->cpu_id = max_model->cpu_id;
+    cpu->model->cpu_ver = max_model->cpu_ver;
+
+    check_consistency(cpu->model);
+    check_compatibility(max_model, cpu->model, errp);
+    if (*errp) {
+        return;
+    }
+
+    apply_cpu_model(cpu->model, errp);
+}
+
+static void get_feature(Object *obj, Visitor *v, const char *name,
+                        void *opaque, Error **errp)
+{
+    S390Feat feat = (S390Feat) opaque;
+    S390CPU *cpu = S390_CPU(obj);
+    bool value;
+
+    if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "features cannot be queried.");
+        return;
+    }
+
+    value = test_bit(feat, cpu->model->features);
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void set_feature(Object *obj, Visitor *v, const char *name,
+                        void *opaque, Error **errp)
+{
+    S390Feat feat = (S390Feat) opaque;
+    DeviceState *dev = DEVICE(obj);
+    S390CPU *cpu = S390_CPU(obj);
+    bool value;
+
+    if (dev->realized) {
+        error_setg(errp, "Attempt to set property '%s' on '%s' after "
+                   "it was realized", name, object_get_typename(obj));
+        return;
+    } else if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "features cannot be changed.");
+        return;
+    }
+
+    visit_type_bool(v, name, &value, errp);
+    if (*errp) {
+        return;
+    }
+    if (value) {
+        if (!test_bit(feat, cpu->model->def->full_feat)) {
+            error_setg(errp, "Feature '%s' is not available for CPU model '%s',"
+                       " it was introduced with later models.",
+                       name, cpu->model->def->name);
+            return;
+        }
+        set_bit(feat, cpu->model->features);
+    } else {
+        clear_bit(feat, cpu->model->features);
+    }
+}
+
+static void get_feature_group(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    S390FeatGroup group = (S390FeatGroup) opaque;
+    const S390FeatGroupDef *def = s390_feat_group_def(group);
+    S390CPU *cpu = S390_CPU(obj);
+    S390FeatBitmap tmp;
+    bool value;
+
+    if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "features cannot be queried.");
+        return;
+    }
+
+    /* a group is enabled if all features are enabled */
+    bitmap_and(tmp, cpu->model->features, def->feat, S390_FEAT_MAX);
+    value = bitmap_equal(tmp, def->feat, S390_FEAT_MAX);
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void set_feature_group(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    S390FeatGroup group = (S390FeatGroup) opaque;
+    const S390FeatGroupDef *def = s390_feat_group_def(group);
+    DeviceState *dev = DEVICE(obj);
+    S390CPU *cpu = S390_CPU(obj);
+    bool value;
+
+    if (dev->realized) {
+        error_setg(errp, "Attempt to set property '%s' on '%s' after "
+                   "it was realized", name, object_get_typename(obj));
+        return;
+    } else if (!cpu->model) {
+        error_setg(errp, "Details about the host CPU model are not available, "
+                         "features cannot be changed.");
+        return;
+    }
+
+    visit_type_bool(v, name, &value, errp);
+    if (*errp) {
+        return;
+    }
+    if (value) {
+        /* groups are added in one shot, so an intersect is sufficient */
+        if (!bitmap_intersects(def->feat, cpu->model->def->full_feat,
+                               S390_FEAT_MAX)) {
+            error_setg(errp, "Group '%s' is not available for CPU model '%s',"
+                       " it was introduced with later models.",
+                       name, cpu->model->def->name);
+            return;
+        }
+        bitmap_or(cpu->model->features, cpu->model->features, def->feat,
+                  S390_FEAT_MAX);
+    } else {
+        bitmap_andnot(cpu->model->features, cpu->model->features, def->feat,
+                      S390_FEAT_MAX);
+    }
+}
+
+void s390_cpu_model_register_props(Object *obj)
+{
+    S390FeatGroup group;
+    S390Feat feat;
+
+    for (feat = 0; feat < S390_FEAT_MAX; feat++) {
+        const S390FeatDef *def = s390_feat_def(feat);
+        object_property_add(obj, def->name, "bool", get_feature,
+                            set_feature, NULL, (void *) feat, NULL);
+        object_property_set_description(obj, def->name, def->desc , NULL);
+    }
+    for (group = 0; group < S390_FEAT_GROUP_MAX; group++) {
+        const S390FeatGroupDef *def = s390_feat_group_def(group);
+        object_property_add(obj, def->name, "bool", get_feature_group,
+                            set_feature_group, NULL, (void *) group, NULL);
+        object_property_set_description(obj, def->name, def->desc , NULL);
+    }
+}
+
+static void s390_cpu_model_initfn(Object *obj)
+{
+    S390CPU *cpu = S390_CPU(obj);
+    S390CPUClass *xcc = S390_CPU_GET_CLASS(cpu);
+
+    cpu->model = g_malloc0(sizeof(*cpu->model));
+    /* copy the model, so we can modify it */
+    cpu->model->def = xcc->cpu_def;
+    if (xcc->is_static) {
+        /* base model - features will never change */
+        bitmap_copy(cpu->model->features, cpu->model->def->base_feat,
+                    S390_FEAT_MAX);
+    } else {
+        /* latest model - features can change */
+        bitmap_copy(cpu->model->features,
+                    cpu->model->def->default_feat, S390_FEAT_MAX);
+    }
+}
+
+#ifdef CONFIG_KVM
+static void s390_host_cpu_model_initfn(Object *obj)
+{
+    S390CPU *cpu = S390_CPU(obj);
+    Error *err = NULL;
+
+    if (!kvm_enabled() || !kvm_s390_cpu_models_supported()) {
+        return;
+    }
+
+    cpu->model = g_malloc0(sizeof(*cpu->model));
+    kvm_s390_get_host_cpu_model(cpu->model, &err);
+    if (err) {
+        error_report_err(err);
+        g_free(cpu->model);
+        /* fallback to unsupported cpu models */
+        cpu->model = NULL;
+    }
+}
+#endif
+
+static void s390_qemu_cpu_model_initfn(Object *obj)
+{
+    S390CPU *cpu = S390_CPU(obj);
+
+    cpu->model = g_malloc0(sizeof(*cpu->model));
+    /* TCG emulates a z900 */
+    cpu->model->def = &s390_cpu_defs[0];
+    bitmap_copy(cpu->model->features, cpu->model->def->default_feat,
+                S390_FEAT_MAX);
+}
+
+static void s390_cpu_model_finalize(Object *obj)
+{
+    S390CPU *cpu = S390_CPU(obj);
+
+    g_free(cpu->model);
+    cpu->model = NULL;
+}
+
+static bool get_is_migration_safe(Object *obj, Error **errp)
+{
+    return S390_CPU_GET_CLASS(obj)->is_migration_safe;
+}
+
+static bool get_is_static(Object *obj, Error **errp)
+{
+    return S390_CPU_GET_CLASS(obj)->is_static;
+}
+
+static char *get_description(Object *obj, Error **errp)
+{
+    return g_strdup(S390_CPU_GET_CLASS(obj)->desc);
+}
+
+void s390_cpu_model_class_register_props(ObjectClass *oc)
+{
+    object_class_property_add_bool(oc, "migration-safe", get_is_migration_safe,
+                                   NULL, NULL);
+    object_class_property_add_bool(oc, "static", get_is_static,
+                                   NULL, NULL);
+    object_class_property_add_str(oc, "description", get_description, NULL,
+                                  NULL);
+}
+
+#ifdef CONFIG_KVM
+static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data)
+{
+    S390CPUClass *xcc = S390_CPU_CLASS(oc);
+
+    xcc->kvm_required = true;
+    xcc->desc = "KVM only: All recognized features";
+}
+#endif
+
+static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data)
+{
+    S390CPUClass *xcc = S390_CPU_CLASS(oc);
+
+    /* all base models are migration safe */
+    xcc->cpu_def = (const S390CPUDef *) data;
+    xcc->is_migration_safe = true;
+    xcc->is_static = true;
+    xcc->desc = xcc->cpu_def->desc;
+}
+
+static void s390_cpu_model_class_init(ObjectClass *oc, void *data)
+{
+    S390CPUClass *xcc = S390_CPU_CLASS(oc);
+
+    /* model that can change between QEMU versions */
+    xcc->cpu_def = (const S390CPUDef *) data;
+    xcc->is_migration_safe = true;
+    xcc->desc = xcc->cpu_def->desc;
+}
+
+static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data)
+{
+    S390CPUClass *xcc = S390_CPU_CLASS(oc);
+
+    xcc->is_migration_safe = true;
+    xcc->desc = g_strdup_printf("QEMU Virtual CPU version %s",
+                                qemu_hw_version());
+}
+
+#define S390_CPU_TYPE_SUFFIX "-" TYPE_S390_CPU
+#define S390_CPU_TYPE_NAME(name) (name S390_CPU_TYPE_SUFFIX)
+
+/* Generate type name for a cpu model. Caller has to free the string. */
+static char *s390_cpu_type_name(const char *model_name)
+{
+    return g_strdup_printf(S390_CPU_TYPE_NAME("%s"), model_name);
+}
+
+/* Generate type name for a base cpu model. Caller has to free the string. */
+static char *s390_base_cpu_type_name(const char *model_name)
+{
+    return g_strdup_printf(S390_CPU_TYPE_NAME("%s-base"), model_name);
+}
+
+ObjectClass *s390_cpu_class_by_name(const char *name)
+{
+    char *typename = s390_cpu_type_name(name);
+    ObjectClass *oc;
+
+    oc = object_class_by_name(typename);
+    g_free(typename);
+    return oc;
+}
+
+static const TypeInfo qemu_s390_cpu_type_info = {
+    .name = S390_CPU_TYPE_NAME("qemu"),
+    .parent = TYPE_S390_CPU,
+    .instance_init = s390_qemu_cpu_model_initfn,
+    .instance_finalize = s390_cpu_model_finalize,
+    .class_init = s390_qemu_cpu_model_class_init,
+};
+
+#ifdef CONFIG_KVM
+static const TypeInfo host_s390_cpu_type_info = {
+    .name = S390_CPU_TYPE_NAME("host"),
+    .parent = TYPE_S390_CPU,
+    .instance_init = s390_host_cpu_model_initfn,
+    .instance_finalize = s390_cpu_model_finalize,
+    .class_init = s390_host_cpu_model_class_init,
+};
+#endif
+
+static void register_types(void)
+{
+    int i;
+
+    /* init all bitmaps from gnerated data initially */
+    for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) {
+        s390_init_feat_bitmap(s390_cpu_defs[i].base_init,
+                              s390_cpu_defs[i].base_feat);
+        s390_init_feat_bitmap(s390_cpu_defs[i].default_init,
+                              s390_cpu_defs[i].default_feat);
+        s390_init_feat_bitmap(s390_cpu_defs[i].full_init,
+                              s390_cpu_defs[i].full_feat);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) {
+        char *base_name = s390_base_cpu_type_name(s390_cpu_defs[i].name);
+        TypeInfo ti_base = {
+            .name = base_name,
+            .parent = TYPE_S390_CPU,
+            .instance_init = s390_cpu_model_initfn,
+            .instance_finalize = s390_cpu_model_finalize,
+            .class_init = s390_base_cpu_model_class_init,
+            .class_data = (void *) &s390_cpu_defs[i],
+        };
+        char *name = s390_cpu_type_name(s390_cpu_defs[i].name);
+        TypeInfo ti = {
+            .name = name,
+            .parent = TYPE_S390_CPU,
+            .instance_init = s390_cpu_model_initfn,
+            .instance_finalize = s390_cpu_model_finalize,
+            .class_init = s390_cpu_model_class_init,
+            .class_data = (void *) &s390_cpu_defs[i],
+        };
+
+        type_register_static(&ti_base);
+        type_register_static(&ti);
+        g_free(base_name);
+        g_free(name);
+    }
+
+    type_register_static(&qemu_s390_cpu_type_info);
+#ifdef CONFIG_KVM
+    type_register_static(&host_s390_cpu_type_info);
+#endif
+}
+
+type_init(register_types)
diff --git a/target-s390x/cpu_models.h b/target-s390x/cpu_models.h
new file mode 100644
index 0000000000..136a602313
--- /dev/null
+++ b/target-s390x/cpu_models.h
@@ -0,0 +1,119 @@
+/*
+ * CPU models for s390x
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef TARGET_S390X_CPU_MODELS_H
+#define TARGET_S390X_CPU_MODELS_H
+
+#include "cpu_features.h"
+#include "qom/cpu.h"
+
+/* static CPU definition */
+typedef struct S390CPUDef {
+    const char *name;       /* name exposed to the user */
+    const char *desc;       /* description exposed to the user */
+    uint8_t gen;            /* hw generation identification */
+    uint16_t type;          /* cpu type identification */
+    uint8_t ec_ga;          /* EC GA version (on which also the BC is based) */
+    uint8_t mha_pow;        /* Maximum Host Adress Power, mha = 2^pow-1 */
+    uint32_t hmfai;         /* hypervisor-managed facilities */
+    /* base/min features, must never be changed between QEMU versions */
+    S390FeatBitmap base_feat;
+    /* used to init base_feat from generated data */
+    S390FeatInit base_init;
+    /* deafault features, QEMU version specific */
+    S390FeatBitmap default_feat;
+    /* used to init default_feat from generated data */
+    S390FeatInit default_init;
+    /* max allowed features, QEMU version specific */
+    S390FeatBitmap full_feat;
+    /* used to init full_feat from generated data */
+    S390FeatInit full_init;
+} S390CPUDef;
+
+/* CPU model based on a CPU definition */
+typedef struct S390CPUModel {
+    const S390CPUDef *def;
+    S390FeatBitmap features;
+    /* values copied from the "host" model, can change during migration */
+    uint16_t lowest_ibc;    /* lowest IBC that the hardware supports */
+    uint32_t cpu_id;        /* CPU id */
+    uint8_t cpu_ver;        /* CPU version, usually "ff" for kvm */
+} S390CPUModel;
+
+/*
+ * CPU ID
+ *
+ * bits 0-7: Zeroes (ff for kvm)
+ * bits 8-31: CPU ID (serial number)
+ * bits 32-48: Machine type
+ * bits 48-63: Zeroes
+ */
+#define cpuid_type(x)     (((x) >> 16) & 0xffff)
+#define cpuid_id(x)       (((x) >> 32) & 0xffffff)
+#define cpuid_ver(x)      (((x) >> 56) & 0xff)
+
+#define lowest_ibc(x)     (((uint32_t)(x) >> 16) & 0xfff)
+#define unblocked_ibc(x)  ((uint32_t)(x) & 0xfff)
+#define has_ibc(x)        (lowest_ibc(x) != 0)
+
+#define S390_GEN_Z10 0xa
+#define ibc_gen(x)        (x == 0 ? 0 : ((x >> 4) + S390_GEN_Z10))
+#define ibc_ec_ga(x)      (x & 0xf)
+
+uint32_t s390_get_hmfai(void);
+uint8_t s390_get_mha_pow(void);
+uint32_t s390_get_ibc_val(void);
+static inline uint16_t s390_ibc_from_cpu_model(const S390CPUModel *model)
+{
+    uint16_t ibc = 0;
+
+    if (model->def->gen >= S390_GEN_Z10) {
+        ibc = ((model->def->gen - S390_GEN_Z10) << 4) + model->def->ec_ga;
+    }
+    return ibc;
+}
+void s390_get_feat_block(S390FeatType type, uint8_t *data);
+bool s390_has_feat(S390Feat feat);
+uint8_t s390_get_gen_for_cpu_type(uint16_t type);
+static inline bool s390_known_cpu_type(uint16_t type)
+{
+    return s390_get_gen_for_cpu_type(type) != 0;
+}
+static inline uint64_t s390_cpuid_from_cpu_model(const S390CPUModel *model)
+{
+    return ((uint64_t)model->cpu_ver << 56) |
+           ((uint64_t)model->cpu_id << 32) |
+           ((uint64_t)model->def->type << 16);
+}
+S390CPUDef const *s390_find_cpu_def(uint16_t type, uint8_t gen, uint8_t ec_ga,
+                                    S390FeatBitmap features);
+
+#ifdef CONFIG_KVM
+bool kvm_s390_cpu_models_supported(void);
+void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp);
+void kvm_s390_apply_cpu_model(const S390CPUModel *model,  Error **errp);
+#else
+static inline void kvm_s390_get_host_cpu_model(S390CPUModel *model,
+                                               Error **errp)
+{
+}
+static inline void kvm_s390_apply_cpu_model(const S390CPUModel *model,
+                                            Error **errp)
+{
+}
+static inline bool kvm_s390_cpu_models_supported(void)
+{
+    return false;
+}
+#endif
+
+#endif /* TARGET_S390X_CPU_MODELS_H */
diff --git a/target-s390x/gen-features.c b/target-s390x/gen-features.c
new file mode 100644
index 0000000000..e674738ae3
--- /dev/null
+++ b/target-s390x/gen-features.c
@@ -0,0 +1,592 @@
+/*
+ * S390 feature list generator
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * Author(s): Michael Mueller <mimu@linux.vnet.ibm.com>
+ *            David Hildenbrand <dahi@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ *
+ */
+
+
+#include "inttypes.h"
+#include "stdio.h"
+#include "cpu_features_def.h"
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
+
+/***** BEGIN FEATURE DEFS *****/
+
+#define S390_FEAT_GROUP_PLO \
+    S390_FEAT_PLO_CL, \
+    S390_FEAT_PLO_CLG, \
+    S390_FEAT_PLO_CLGR, \
+    S390_FEAT_PLO_CLX, \
+    S390_FEAT_PLO_CS, \
+    S390_FEAT_PLO_CSG, \
+    S390_FEAT_PLO_CSGR, \
+    S390_FEAT_PLO_CSX, \
+    S390_FEAT_PLO_DCS, \
+    S390_FEAT_PLO_DCSG, \
+    S390_FEAT_PLO_DCSGR, \
+    S390_FEAT_PLO_DCSX, \
+    S390_FEAT_PLO_CSST, \
+    S390_FEAT_PLO_CSSTG, \
+    S390_FEAT_PLO_CSSTGR, \
+    S390_FEAT_PLO_CSSTX, \
+    S390_FEAT_PLO_CSDST, \
+    S390_FEAT_PLO_CSDSTG, \
+    S390_FEAT_PLO_CSDSTGR, \
+    S390_FEAT_PLO_CSDSTX, \
+    S390_FEAT_PLO_CSTST, \
+    S390_FEAT_PLO_CSTSTG, \
+    S390_FEAT_PLO_CSTSTGR, \
+    S390_FEAT_PLO_CSTSTX
+
+#define S390_FEAT_GROUP_TOD_CLOCK_STEERING \
+    S390_FEAT_TOD_CLOCK_STEERING, \
+    S390_FEAT_PTFF_QTO, \
+    S390_FEAT_PTFF_QSI, \
+    S390_FEAT_PTFF_QPT, \
+    S390_FEAT_PTFF_STO
+
+#define S390_FEAT_GROUP_GEN13_PTFF \
+    S390_FEAT_PTFF_QUI, \
+    S390_FEAT_PTFF_QTOU, \
+    S390_FEAT_PTFF_STOU
+
+#define S390_FEAT_GROUP_MSA \
+    S390_FEAT_MSA, \
+    S390_FEAT_KMAC_DEA, \
+    S390_FEAT_KMAC_TDEA_128, \
+    S390_FEAT_KMAC_TDEA_192, \
+    S390_FEAT_KMC_DEA, \
+    S390_FEAT_KMC_TDEA_128, \
+    S390_FEAT_KMC_TDEA_192, \
+    S390_FEAT_KM_DEA, \
+    S390_FEAT_KM_TDEA_128, \
+    S390_FEAT_KM_TDEA_192, \
+    S390_FEAT_KIMD_SHA_1, \
+    S390_FEAT_KLMD_SHA_1
+
+#define S390_FEAT_GROUP_MSA_EXT_1 \
+    S390_FEAT_KMC_AES_128, \
+    S390_FEAT_KM_AES_128, \
+    S390_FEAT_KIMD_SHA_256, \
+    S390_FEAT_KLMD_SHA_256
+
+#define S390_FEAT_GROUP_MSA_EXT_2 \
+    S390_FEAT_KMC_AES_192, \
+    S390_FEAT_KMC_AES_256, \
+    S390_FEAT_KMC_PRNG, \
+    S390_FEAT_KM_AES_192, \
+    S390_FEAT_KM_AES_256, \
+    S390_FEAT_KIMD_SHA_512, \
+    S390_FEAT_KLMD_SHA_512
+
+#define S390_FEAT_GROUP_MSA_EXT_3 \
+    S390_FEAT_MSA_EXT_3, \
+    S390_FEAT_KMAC_EDEA, \
+    S390_FEAT_KMAC_ETDEA_128, \
+    S390_FEAT_KMAC_ETDEA_192, \
+    S390_FEAT_KMC_EAES_128, \
+    S390_FEAT_KMC_EAES_192, \
+    S390_FEAT_KMC_EAES_256, \
+    S390_FEAT_KMC_EDEA, \
+    S390_FEAT_KMC_ETDEA_128, \
+    S390_FEAT_KMC_ETDEA_192, \
+    S390_FEAT_KM_EDEA, \
+    S390_FEAT_KM_ETDEA_128, \
+    S390_FEAT_KM_ETDEA_192, \
+    S390_FEAT_KM_EAES_128, \
+    S390_FEAT_KM_EAES_192, \
+    S390_FEAT_KM_EAES_256, \
+    S390_FEAT_PCKMO_EDEA, \
+    S390_FEAT_PCKMO_ETDEA_128, \
+    S390_FEAT_PCKMO_ETDEA_256, \
+    S390_FEAT_PCKMO_AES_128, \
+    S390_FEAT_PCKMO_AES_192, \
+    S390_FEAT_PCKMO_AES_256
+
+#define S390_FEAT_GROUP_MSA_EXT_4 \
+    S390_FEAT_MSA_EXT_4, \
+    S390_FEAT_KMAC_AES_128, \
+    S390_FEAT_KMAC_AES_192, \
+    S390_FEAT_KMAC_AES_256, \
+    S390_FEAT_KMAC_EAES_128, \
+    S390_FEAT_KMAC_EAES_192, \
+    S390_FEAT_KMAC_EAES_256, \
+    S390_FEAT_KM_XTS_AES_128, \
+    S390_FEAT_KM_XTS_AES_256, \
+    S390_FEAT_KM_XTS_EAES_128, \
+    S390_FEAT_KM_XTS_EAES_256, \
+    S390_FEAT_KIMD_GHASH, \
+    S390_FEAT_KMCTR_DEA, \
+    S390_FEAT_KMCTR_TDEA_128, \
+    S390_FEAT_KMCTR_TDEA_192, \
+    S390_FEAT_KMCTR_EDEA, \
+    S390_FEAT_KMCTR_ETDEA_128, \
+    S390_FEAT_KMCTR_ETDEA_192, \
+    S390_FEAT_KMCTR_AES_128, \
+    S390_FEAT_KMCTR_AES_192, \
+    S390_FEAT_KMCTR_AES_256, \
+    S390_FEAT_KMCTR_EAES_128, \
+    S390_FEAT_KMCTR_EAES_192, \
+    S390_FEAT_KMCTR_EAES_256, \
+    S390_FEAT_KMF_DEA, \
+    S390_FEAT_KMF_TDEA_128, \
+    S390_FEAT_KMF_TDEA_192, \
+    S390_FEAT_KMF_EDEA, \
+    S390_FEAT_KMF_ETDEA_128, \
+    S390_FEAT_KMF_ETDEA_192, \
+    S390_FEAT_KMF_AES_128, \
+    S390_FEAT_KMF_AES_192, \
+    S390_FEAT_KMF_AES_256, \
+    S390_FEAT_KMF_EAES_128, \
+    S390_FEAT_KMF_EAES_192, \
+    S390_FEAT_KMF_EAES_256, \
+    S390_FEAT_KMO_DEA, \
+    S390_FEAT_KMO_TDEA_128, \
+    S390_FEAT_KMO_TDEA_192, \
+    S390_FEAT_KMO_EDEA, \
+    S390_FEAT_KMO_ETDEA_128, \
+    S390_FEAT_KMO_ETDEA_192, \
+    S390_FEAT_KMO_AES_128, \
+    S390_FEAT_KMO_AES_192, \
+    S390_FEAT_KMO_AES_256, \
+    S390_FEAT_KMO_EAES_128, \
+    S390_FEAT_KMO_EAES_192, \
+    S390_FEAT_KMO_EAES_256, \
+    S390_FEAT_PCC_CMAC_DEA, \
+    S390_FEAT_PCC_CMAC_TDEA_128, \
+    S390_FEAT_PCC_CMAC_TDEA_192, \
+    S390_FEAT_PCC_CMAC_ETDEA_128, \
+    S390_FEAT_PCC_CMAC_ETDEA_192, \
+    S390_FEAT_PCC_CMAC_TDEA, \
+    S390_FEAT_PCC_CMAC_AES_128, \
+    S390_FEAT_PCC_CMAC_AES_192, \
+    S390_FEAT_PCC_CMAC_AES_256, \
+    S390_FEAT_PCC_CMAC_EAES_128, \
+    S390_FEAT_PCC_CMAC_EAES_192, \
+    S390_FEAT_PCC_CMAC_EAES_256, \
+    S390_FEAT_PCC_XTS_AES_128, \
+    S390_FEAT_PCC_XTS_AES_256, \
+    S390_FEAT_PCC_XTS_EAES_128, \
+    S390_FEAT_PCC_XTS_EAES_256
+
+#define S390_FEAT_GROUP_MSA_EXT_5 \
+    S390_FEAT_MSA_EXT_5, \
+    S390_FEAT_PPNO_SHA_512_DRNG
+
+/* cpu feature groups */
+static uint16_t group_PLO[] = {
+    S390_FEAT_GROUP_PLO,
+};
+static uint16_t group_TOD_CLOCK_STEERING[] = {
+    S390_FEAT_GROUP_TOD_CLOCK_STEERING,
+};
+static uint16_t group_GEN13_PTFF[] = {
+    S390_FEAT_GROUP_GEN13_PTFF,
+};
+static uint16_t group_MSA[] = {
+    S390_FEAT_GROUP_MSA,
+};
+static uint16_t group_MSA_EXT_1[] = {
+    S390_FEAT_GROUP_MSA_EXT_1,
+};
+static uint16_t group_MSA_EXT_2[] = {
+    S390_FEAT_GROUP_MSA_EXT_2,
+};
+static uint16_t group_MSA_EXT_3[] = {
+    S390_FEAT_GROUP_MSA_EXT_3,
+};
+static uint16_t group_MSA_EXT_4[] = {
+    S390_FEAT_GROUP_MSA_EXT_4,
+};
+static uint16_t group_MSA_EXT_5[] = {
+    S390_FEAT_GROUP_MSA_EXT_5,
+};
+
+/* base features in order of release */
+static uint16_t base_GEN7_GA1[] = {
+    S390_FEAT_GROUP_PLO,
+    S390_FEAT_ESAN3,
+    S390_FEAT_ZARCH,
+};
+#define base_GEN7_GA2 EmptyFeat
+#define base_GEN7_GA3 EmptyFeat
+static uint16_t base_GEN8_GA1[] = {
+    S390_FEAT_DAT_ENH,
+    S390_FEAT_EXTENDED_TRANSLATION_2,
+    S390_FEAT_GROUP_MSA,
+    S390_FEAT_LONG_DISPLACEMENT,
+    S390_FEAT_LONG_DISPLACEMENT_FAST,
+    S390_FEAT_HFP_MADDSUB,
+};
+#define base_GEN8_GA2 EmptyFeat
+#define base_GEN8_GA3 EmptyFeat
+#define base_GEN8_GA4 EmptyFeat
+#define base_GEN8_GA5 EmptyFeat
+static uint16_t base_GEN9_GA1[] = {
+    S390_FEAT_IDTE_SEGMENT,
+    S390_FEAT_ASN_LX_REUSE,
+    S390_FEAT_STFLE,
+    S390_FEAT_SENSE_RUNNING_STATUS,
+    S390_FEAT_EXTENDED_IMMEDIATE,
+    S390_FEAT_EXTENDED_TRANSLATION_3,
+    S390_FEAT_HFP_UNNORMALIZED_EXT,
+    S390_FEAT_ETF2_ENH,
+    S390_FEAT_STORE_CLOCK_FAST,
+    S390_FEAT_GROUP_TOD_CLOCK_STEERING,
+    S390_FEAT_ETF3_ENH,
+    S390_FEAT_DAT_ENH_2,
+};
+#define base_GEN9_GA2 EmptyFeat
+#define base_GEN9_GA3 EmptyFeat
+static uint16_t base_GEN10_GA1[] = {
+    S390_FEAT_CONDITIONAL_SSKE,
+    S390_FEAT_PARSING_ENH,
+    S390_FEAT_MOVE_WITH_OPTIONAL_SPEC,
+    S390_FEAT_EXTRACT_CPU_TIME,
+    S390_FEAT_COMPARE_AND_SWAP_AND_STORE,
+    S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2,
+    S390_FEAT_GENERAL_INSTRUCTIONS_EXT,
+    S390_FEAT_EXECUTE_EXT,
+    S390_FEAT_FLOATING_POINT_SUPPPORT_ENH,
+    S390_FEAT_DFP,
+    S390_FEAT_DFP_FAST,
+    S390_FEAT_PFPO,
+};
+#define base_GEN10_GA2 EmptyFeat
+#define base_GEN10_GA3 EmptyFeat
+static uint16_t base_GEN11_GA1[] = {
+    S390_FEAT_NONQ_KEY_SETTING,
+    S390_FEAT_ENHANCED_MONITOR,
+    S390_FEAT_FLOATING_POINT_EXT,
+    S390_FEAT_SET_PROGRAM_PARAMETERS,
+    S390_FEAT_STFLE_45,
+    S390_FEAT_CMPSC_ENH,
+    S390_FEAT_INTERLOCKED_ACCESS_2,
+};
+#define base_GEN11_GA2 EmptyFeat
+static uint16_t base_GEN12_GA1[] = {
+    S390_FEAT_DFP_ZONED_CONVERSION,
+    S390_FEAT_STFLE_49,
+    S390_FEAT_LOCAL_TLB_CLEARING,
+};
+#define base_GEN12_GA2 EmptyFeat
+static uint16_t base_GEN13_GA1[] = {
+    S390_FEAT_STFLE_53,
+    S390_FEAT_DFP_PACKED_CONVERSION,
+    S390_FEAT_GROUP_GEN13_PTFF,
+};
+#define base_GEN13_GA2 EmptyFeat
+
+/* full features differing to the base in order of release */
+static uint16_t full_GEN7_GA1[] = {
+    S390_FEAT_SIE_F2,
+    S390_FEAT_SIE_SKEY,
+    S390_FEAT_SIE_GPERE,
+    S390_FEAT_SIE_IB,
+    S390_FEAT_SIE_CEI,
+};
+static uint16_t full_GEN7_GA2[] = {
+    S390_FEAT_EXTENDED_TRANSLATION_2,
+};
+static uint16_t full_GEN7_GA3[] = {
+    S390_FEAT_LONG_DISPLACEMENT,
+    S390_FEAT_SIE_SIIF,
+};
+static uint16_t full_GEN8_GA1[] = {
+    S390_FEAT_SIE_GSLS,
+    S390_FEAT_SIE_64BSCAO,
+};
+#define full_GEN8_GA2 EmptyFeat
+static uint16_t full_GEN8_GA3[] = {
+    S390_FEAT_ASN_LX_REUSE,
+    S390_FEAT_EXTENDED_TRANSLATION_3,
+};
+#define full_GEN8_GA4 EmptyFeat
+#define full_GEN8_GA5 EmptyFeat
+static uint16_t full_GEN9_GA1[] = {
+    S390_FEAT_STORE_HYPERVISOR_INFO,
+    S390_FEAT_GROUP_MSA_EXT_1,
+    S390_FEAT_CMM,
+    S390_FEAT_SIE_CMMA,
+};
+static uint16_t full_GEN9_GA2[] = {
+    S390_FEAT_MOVE_WITH_OPTIONAL_SPEC,
+    S390_FEAT_EXTRACT_CPU_TIME,
+    S390_FEAT_COMPARE_AND_SWAP_AND_STORE,
+    S390_FEAT_FLOATING_POINT_SUPPPORT_ENH,
+    S390_FEAT_DFP,
+};
+static uint16_t full_GEN9_GA3[] = {
+    S390_FEAT_CONDITIONAL_SSKE,
+    S390_FEAT_PFPO,
+};
+static uint16_t full_GEN10_GA1[] = {
+    S390_FEAT_EDAT,
+    S390_FEAT_CONFIGURATION_TOPOLOGY,
+    S390_FEAT_GROUP_MSA_EXT_2,
+    S390_FEAT_ESOP,
+    S390_FEAT_SIE_PFMFI,
+    S390_FEAT_SIE_SIGPIF,
+};
+static uint16_t full_GEN10_GA2[] = {
+    S390_FEAT_SET_PROGRAM_PARAMETERS,
+    S390_FEAT_SIE_IBS,
+};
+static uint16_t full_GEN10_GA3[] = {
+    S390_FEAT_GROUP_MSA_EXT_3,
+};
+static uint16_t full_GEN11_GA1[] = {
+    S390_FEAT_IPTE_RANGE,
+    S390_FEAT_ACCESS_EXCEPTION_FS_INDICATION,
+    S390_FEAT_GROUP_MSA_EXT_4,
+};
+#define full_GEN11_GA2 EmptyFeat
+static uint16_t full_GEN12_GA1[] = {
+    S390_FEAT_CONSTRAINT_TRANSACTIONAL_EXE,
+    S390_FEAT_TRANSACTIONAL_EXE,
+    S390_FEAT_RUNTIME_INSTRUMENTATION,
+    S390_FEAT_EDAT_2,
+};
+static uint16_t full_GEN12_GA2[] = {
+    S390_FEAT_GROUP_MSA_EXT_5,
+};
+static uint16_t full_GEN13_GA1[] = {
+    S390_FEAT_VECTOR,
+};
+#define full_GEN13_GA2 EmptyFeat
+
+/* default features differing to the base in order of release */
+#define default_GEN7_GA1 EmptyFeat
+#define default_GEN7_GA2 EmptyFeat
+#define default_GEN7_GA3 EmptyFeat
+#define default_GEN8_GA1 EmptyFeat
+#define default_GEN8_GA2 EmptyFeat
+#define default_GEN8_GA3 EmptyFeat
+#define default_GEN8_GA4 EmptyFeat
+#define default_GEN8_GA5 EmptyFeat
+static uint16_t default_GEN9_GA1[] = {
+    S390_FEAT_STORE_HYPERVISOR_INFO,
+    S390_FEAT_GROUP_MSA_EXT_1,
+    S390_FEAT_CMM,
+};
+#define default_GEN9_GA2 EmptyFeat
+#define default_GEN9_GA3 EmptyFeat
+static uint16_t default_GEN10_GA1[] = {
+    S390_FEAT_EDAT,
+    S390_FEAT_GROUP_MSA_EXT_2,
+};
+#define default_GEN10_GA2 EmptyFeat
+#define default_GEN10_GA3 EmptyFeat
+static uint16_t default_GEN11_GA1[] = {
+    S390_FEAT_GROUP_MSA_EXT_3,
+    S390_FEAT_IPTE_RANGE,
+    S390_FEAT_ACCESS_EXCEPTION_FS_INDICATION,
+    S390_FEAT_GROUP_MSA_EXT_4,
+};
+#define default_GEN11_GA2 EmptyFeat
+static uint16_t default_GEN12_GA1[] = {
+    S390_FEAT_CONSTRAINT_TRANSACTIONAL_EXE,
+    S390_FEAT_TRANSACTIONAL_EXE,
+    S390_FEAT_RUNTIME_INSTRUMENTATION,
+    S390_FEAT_EDAT_2,
+};
+#define default_GEN12_GA2 EmptyFeat
+static uint16_t default_GEN13_GA1[] = {
+    S390_FEAT_GROUP_MSA_EXT_5,
+    S390_FEAT_VECTOR,
+};
+#define default_GEN13_GA2 EmptyFeat
+
+/****** END FEATURE DEFS ******/
+
+#define _YEARS  "2016"
+#define _NAME_H "TARGET_S390X_GEN_FEATURES_H"
+
+#define CPU_FEAT_INITIALIZER(_name)                    \
+    {                                                  \
+        .name = "S390_FEAT_LIST_" #_name,              \
+        .base_bits =                                   \
+            { .data = base_##_name,                    \
+              .len = ARRAY_SIZE(base_##_name) },       \
+        .default_bits =                                \
+            { .data = default_##_name,                 \
+              .len = ARRAY_SIZE(default_##_name) },    \
+        .full_bits =                                   \
+            { .data = full_##_name,                    \
+              .len = ARRAY_SIZE(full_##_name) },       \
+    }
+
+typedef struct BitSpec {
+    uint16_t *data;
+    uint32_t len;
+} BitSpec;
+
+typedef struct {
+    const char *name;
+    BitSpec base_bits;
+    BitSpec default_bits;
+    BitSpec full_bits;
+} CpuFeatDefSpec;
+
+static uint16_t EmptyFeat[] = {};
+
+/*******************************
+ * processor GA series
+ *******************************/
+static CpuFeatDefSpec CpuFeatDef[] = {
+    CPU_FEAT_INITIALIZER(GEN7_GA1),
+    CPU_FEAT_INITIALIZER(GEN7_GA2),
+    CPU_FEAT_INITIALIZER(GEN7_GA3),
+    CPU_FEAT_INITIALIZER(GEN8_GA1),
+    CPU_FEAT_INITIALIZER(GEN8_GA2),
+    CPU_FEAT_INITIALIZER(GEN8_GA3),
+    CPU_FEAT_INITIALIZER(GEN8_GA4),
+    CPU_FEAT_INITIALIZER(GEN8_GA5),
+    CPU_FEAT_INITIALIZER(GEN9_GA1),
+    CPU_FEAT_INITIALIZER(GEN9_GA2),
+    CPU_FEAT_INITIALIZER(GEN9_GA3),
+    CPU_FEAT_INITIALIZER(GEN10_GA1),
+    CPU_FEAT_INITIALIZER(GEN10_GA2),
+    CPU_FEAT_INITIALIZER(GEN10_GA3),
+    CPU_FEAT_INITIALIZER(GEN11_GA1),
+    CPU_FEAT_INITIALIZER(GEN11_GA2),
+    CPU_FEAT_INITIALIZER(GEN12_GA1),
+    CPU_FEAT_INITIALIZER(GEN12_GA2),
+    CPU_FEAT_INITIALIZER(GEN13_GA1),
+    CPU_FEAT_INITIALIZER(GEN13_GA2),
+};
+
+#define FEAT_GROUP_INITIALIZER(_name)                  \
+    {                                                  \
+        .name = "S390_FEAT_GROUP_LIST_" #_name,        \
+        .bits =                                        \
+            { .data = group_##_name,                   \
+              .len = ARRAY_SIZE(group_##_name) },      \
+    }
+
+typedef struct {
+    const char *name;
+    BitSpec bits;
+} FeatGroupDefSpec;
+
+/*******************************
+ * feature groups
+ *******************************/
+static FeatGroupDefSpec FeatGroupDef[] = {
+    FEAT_GROUP_INITIALIZER(PLO),
+    FEAT_GROUP_INITIALIZER(TOD_CLOCK_STEERING),
+    FEAT_GROUP_INITIALIZER(GEN13_PTFF),
+    FEAT_GROUP_INITIALIZER(MSA),
+    FEAT_GROUP_INITIALIZER(MSA_EXT_1),
+    FEAT_GROUP_INITIALIZER(MSA_EXT_2),
+    FEAT_GROUP_INITIALIZER(MSA_EXT_3),
+    FEAT_GROUP_INITIALIZER(MSA_EXT_4),
+    FEAT_GROUP_INITIALIZER(MSA_EXT_5),
+};
+
+static void set_bits(uint64_t list[], BitSpec bits)
+{
+    uint32_t i;
+
+    for (i = 0; i < bits.len; i++) {
+        list[bits.data[i] / 64] |= 1ULL << (bits.data[i] % 64);
+    }
+}
+
+static void print_feature_defs(void)
+{
+    uint64_t base_feat[S390_FEAT_MAX / 64 + 1] = {};
+    uint64_t default_feat[S390_FEAT_MAX / 64 + 1] = {};
+    uint64_t full_feat[S390_FEAT_MAX / 64 + 1] = {};
+    int i, j;
+
+    printf("\n/* CPU model feature list data */\n");
+
+    for (i = 0; i < ARRAY_SIZE(CpuFeatDef); i++) {
+        set_bits(base_feat, CpuFeatDef[i].base_bits);
+        /* add the base to the default features */
+        set_bits(default_feat, CpuFeatDef[i].base_bits);
+        set_bits(default_feat, CpuFeatDef[i].default_bits);
+        /* add the base to the full features */
+        set_bits(full_feat, CpuFeatDef[i].base_bits);
+        set_bits(full_feat, CpuFeatDef[i].full_bits);
+
+        printf("#define %s_BASE\t", CpuFeatDef[i].name);
+        for (j = 0; j < ARRAY_SIZE(base_feat); j++) {
+            printf("0x%016"PRIx64"ULL", base_feat[j]);
+            if (j < ARRAY_SIZE(base_feat) - 1) {
+                printf(",");
+            } else {
+                printf("\n");
+            }
+        }
+        printf("#define %s_DEFAULT\t", CpuFeatDef[i].name);
+        for (j = 0; j < ARRAY_SIZE(default_feat); j++) {
+            printf("0x%016"PRIx64"ULL", default_feat[j]);
+            if (j < ARRAY_SIZE(default_feat) - 1) {
+                printf(",");
+            } else {
+                printf("\n");
+            }
+        }
+        printf("#define %s_FULL\t\t", CpuFeatDef[i].name);
+        for (j = 0; j < ARRAY_SIZE(full_feat); j++) {
+            printf("0x%016"PRIx64"ULL", full_feat[j]);
+            if (j < ARRAY_SIZE(full_feat) - 1) {
+                printf(",");
+            } else {
+                printf("\n");
+            }
+        }
+    }
+}
+
+static void print_feature_group_defs(void)
+{
+    int i, j;
+
+    printf("\n/* CPU feature group list data */\n");
+
+    for (i = 0; i < ARRAY_SIZE(FeatGroupDef); i++) {
+        uint64_t feat[S390_FEAT_MAX / 64 + 1] = {};
+
+        set_bits(feat, FeatGroupDef[i].bits);
+        printf("#define %s\t", FeatGroupDef[i].name);
+        for (j = 0; j < ARRAY_SIZE(feat); j++) {
+            printf("0x%016"PRIx64"ULL", feat[j]);
+            if (j < ARRAY_SIZE(feat) - 1) {
+                printf(",");
+            } else {
+                printf("\n");
+            }
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    printf("/*\n"
+           " * AUTOMATICALLY GENERATED, DO NOT MODIFY HERE, EDIT\n"
+           " * SOURCE FILE \"%s\" INSTEAD.\n"
+           " *\n"
+           " * Copyright %s IBM Corp.\n"
+           " *\n"
+           " * This work is licensed under the terms of the GNU GPL, "
+           "version 2 or (at\n * your option) any later version. See "
+           "the COPYING file in the top-level\n * directory.\n"
+           " */\n\n"
+           "#ifndef %s\n#define %s\n", __FILE__, _YEARS, _NAME_H, _NAME_H);
+    print_feature_defs();
+    print_feature_group_defs();
+    printf("\n#endif\n");
+    return 0;
+}
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 54a5177345..68bd2f9784 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -70,7 +70,38 @@ void s390x_cpu_timer(void *opaque)
 
 S390CPU *cpu_s390x_create(const char *cpu_model, Error **errp)
 {
-    return S390_CPU(object_new(TYPE_S390_CPU));
+    static bool features_parsed;
+    char *name, *features;
+    const char *typename;
+    ObjectClass *oc;
+    CPUClass *cc;
+
+    name = g_strdup(cpu_model);
+    features = strchr(name, ',');
+    if (features) {
+        features[0] = 0;
+        features++;
+    }
+
+    oc = cpu_class_by_name(TYPE_S390_CPU, name);
+    if (!oc) {
+        error_setg(errp, "Unknown CPU definition \'%s\'", name);
+        g_free(name);
+        return NULL;
+    }
+    typename = object_class_get_name(oc);
+
+    if (!features_parsed) {
+        features_parsed = true;
+        cc = CPU_CLASS(oc);
+        cc->parse_features(typename, features, errp);
+    }
+    g_free(name);
+
+    if (*errp) {
+        return NULL;
+    }
+    return S390_CPU(CPU(object_new(typename)));
 }
 
 S390CPU *s390x_new_cpu(const char *cpu_model, int64_t id, Error **errp)
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
index a5a288bec5..590bfa4f12 100644
--- a/target-s390x/ioinst.c
+++ b/target-s390x/ioinst.c
@@ -508,7 +508,7 @@ static void ioinst_handle_chsc_scsc(ChscReq *req, ChscResp *res)
     memset(chsc_chars, 0, sizeof(chsc_chars));
 
     general_chars[0] = cpu_to_be32(0x03000000);
-    general_chars[1] = cpu_to_be32(0x00059000);
+    general_chars[1] = cpu_to_be32(0x00079000);
     general_chars[3] = cpu_to_be32(0x00080000);
 
     chsc_chars[0] = cpu_to_be32(0x40000000);
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 80ac6215fb..97afe02599 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -109,6 +109,7 @@
 #define ICPT_WAITPSW                    0x1c
 #define ICPT_SOFT_INTERCEPT             0x24
 #define ICPT_CPU_STOP                   0x28
+#define ICPT_OPEREXC                    0x2c
 #define ICPT_IO                         0x40
 
 #define NR_LOCAL_IRQS 32
@@ -131,6 +132,8 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
 
+static QemuMutex qemu_sigp_mutex;
+
 static int cap_sync_regs;
 static int cap_async_pf;
 static int cap_mem_op;
@@ -174,6 +177,18 @@ int kvm_s390_set_mem_limit(KVMState *s, uint64_t new_limit, uint64_t *hw_limit)
     return kvm_vm_ioctl(s, KVM_SET_DEVICE_ATTR, &attr);
 }
 
+static bool kvm_s390_cmma_available(void)
+{
+    static bool initialized, value;
+
+    if (!initialized) {
+        initialized = true;
+        value = kvm_vm_check_mem_attr(kvm_state, KVM_S390_VM_MEM_ENABLE_CMMA) &&
+                kvm_vm_check_mem_attr(kvm_state, KVM_S390_VM_MEM_CLR_CMMA);
+    }
+    return value;
+}
+
 void kvm_s390_cmma_reset(void)
 {
     int rc;
@@ -182,11 +197,15 @@ void kvm_s390_cmma_reset(void)
         .attr = KVM_S390_VM_MEM_CLR_CMMA,
     };
 
+    if (!mem_path || !kvm_s390_cmma_available()) {
+        return;
+    }
+
     rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
     trace_kvm_clear_cmma(rc);
 }
 
-static void kvm_s390_enable_cmma(KVMState *s)
+static void kvm_s390_enable_cmma(void)
 {
     int rc;
     struct kvm_device_attr attr = {
@@ -194,12 +213,7 @@ static void kvm_s390_enable_cmma(KVMState *s)
         .attr = KVM_S390_VM_MEM_ENABLE_CMMA,
     };
 
-    if (!kvm_vm_check_mem_attr(s, KVM_S390_VM_MEM_ENABLE_CMMA) ||
-        !kvm_vm_check_mem_attr(s, KVM_S390_VM_MEM_CLR_CMMA)) {
-        return;
-    }
-
-    rc = kvm_vm_ioctl(s, KVM_SET_DEVICE_ATTR, &attr);
+    rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
     trace_kvm_enable_cmma(rc);
 }
 
@@ -248,8 +262,10 @@ static void kvm_s390_init_dea_kw(void)
 
 void kvm_s390_crypto_reset(void)
 {
-    kvm_s390_init_aes_kw();
-    kvm_s390_init_dea_kw();
+    if (s390_has_feat(S390_FEAT_MSA_EXT_3)) {
+        kvm_s390_init_aes_kw();
+        kvm_s390_init_dea_kw();
+    }
 }
 
 int kvm_arch_init(MachineState *ms, KVMState *s)
@@ -259,10 +275,6 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP);
     cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ);
 
-    if (!mem_path) {
-        kvm_s390_enable_cmma(s);
-    }
-
     if (!kvm_check_extension(s, KVM_CAP_S390_GMAP)
         || !kvm_check_extension(s, KVM_CAP_S390_COW)) {
         phys_mem_set_alloc(legacy_s390_alloc);
@@ -277,6 +289,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
 
+    qemu_mutex_init(&qemu_sigp_mutex);
+
     return 0;
 }
 
@@ -665,16 +679,37 @@ static void *legacy_s390_alloc(size_t size, uint64_t *align)
     return mem == MAP_FAILED ? NULL : mem;
 }
 
-/* DIAG 501 is used for sw breakpoints */
-static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};
+static uint8_t const *sw_bp_inst;
+static uint8_t sw_bp_ilen;
+
+static void determine_sw_breakpoint_instr(void)
+{
+        /* DIAG 501 is used for sw breakpoints with old kernels */
+        static const uint8_t diag_501[] = {0x83, 0x24, 0x05, 0x01};
+        /* Instruction 0x0000 is used for sw breakpoints with recent kernels */
+        static const uint8_t instr_0x0000[] = {0x00, 0x00};
+
+        if (sw_bp_inst) {
+            return;
+        }
+        if (kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_USER_INSTR0, 0)) {
+            sw_bp_inst = diag_501;
+            sw_bp_ilen = sizeof(diag_501);
+            DPRINTF("KVM: will use 4-byte sw breakpoints.\n");
+        } else {
+            sw_bp_inst = instr_0x0000;
+            sw_bp_ilen = sizeof(instr_0x0000);
+            DPRINTF("KVM: will use 2-byte sw breakpoints.\n");
+        }
+}
 
 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
 {
+    determine_sw_breakpoint_instr();
 
     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
-                            sizeof(diag_501), 0) ||
-        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)diag_501,
-                            sizeof(diag_501), 1)) {
+                            sw_bp_ilen, 0) ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)sw_bp_inst, sw_bp_ilen, 1)) {
         return -EINVAL;
     }
     return 0;
@@ -682,14 +717,14 @@ int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
 
 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
 {
-    uint8_t t[sizeof(diag_501)];
+    uint8_t t[MAX_ILEN];
 
-    if (cpu_memory_rw_debug(cs, bp->pc, t, sizeof(diag_501), 0)) {
+    if (cpu_memory_rw_debug(cs, bp->pc, t, sw_bp_ilen, 0)) {
         return -EINVAL;
-    } else if (memcmp(t, diag_501, sizeof(diag_501))) {
+    } else if (memcmp(t, sw_bp_inst, sw_bp_ilen)) {
         return -EINVAL;
     } else if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
-                                   sizeof(diag_501), 1)) {
+                                   sw_bp_ilen, 1)) {
         return -EINVAL;
     }
 
@@ -1310,7 +1345,7 @@ static int handle_sw_breakpoint(S390CPU *cpu, struct kvm_run *run)
 
     cpu_synchronize_state(CPU(cpu));
 
-    pc = env->psw.addr - 4;
+    pc = env->psw.addr - sw_bp_ilen;
     if (kvm_find_sw_breakpoint(CPU(cpu), pc)) {
         env->psw.addr = pc;
         return EXCP_DEBUG;
@@ -1354,7 +1389,6 @@ static int handle_diag(S390CPU *cpu, struct kvm_run *run, uint32_t ipb)
 }
 
 typedef struct SigpInfo {
-    S390CPU *cpu;
     uint64_t param;
     int cc;
     uint64_t *status_reg;
@@ -1367,38 +1401,40 @@ static void set_sigp_status(SigpInfo *si, uint64_t status)
     si->cc = SIGP_CC_STATUS_STORED;
 }
 
-static void sigp_start(void *arg)
+static void sigp_start(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
 
-    if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+    if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
         si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
         return;
     }
 
-    s390_cpu_set_state(CPU_STATE_OPERATING, si->cpu);
+    s390_cpu_set_state(CPU_STATE_OPERATING, cpu);
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_stop(void *arg)
+static void sigp_stop(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
     struct kvm_s390_irq irq = {
         .type = KVM_S390_SIGP_STOP,
     };
 
-    if (s390_cpu_get_state(si->cpu) != CPU_STATE_OPERATING) {
+    if (s390_cpu_get_state(cpu) != CPU_STATE_OPERATING) {
         si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
         return;
     }
 
     /* disabled wait - sleeping in user space */
-    if (CPU(si->cpu)->halted) {
-        s390_cpu_set_state(CPU_STATE_STOPPED, si->cpu);
+    if (cs->halted) {
+        s390_cpu_set_state(CPU_STATE_STOPPED, cpu);
     } else {
         /* execute the stop function */
-        si->cpu->env.sigp_order = SIGP_STOP;
-        kvm_s390_vcpu_interrupt(si->cpu, &irq);
+        cpu->env.sigp_order = SIGP_STOP;
+        kvm_s390_vcpu_interrupt(cpu, &irq);
     }
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -1465,65 +1501,67 @@ static int kvm_s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch)
     return 0;
 }
 
-static void sigp_stop_and_store_status(void *arg)
+static void sigp_stop_and_store_status(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
     struct kvm_s390_irq irq = {
         .type = KVM_S390_SIGP_STOP,
     };
 
     /* disabled wait - sleeping in user space */
-    if (s390_cpu_get_state(si->cpu) == CPU_STATE_OPERATING &&
-        CPU(si->cpu)->halted) {
-        s390_cpu_set_state(CPU_STATE_STOPPED, si->cpu);
+    if (s390_cpu_get_state(cpu) == CPU_STATE_OPERATING && cs->halted) {
+        s390_cpu_set_state(CPU_STATE_STOPPED, cpu);
     }
 
-    switch (s390_cpu_get_state(si->cpu)) {
+    switch (s390_cpu_get_state(cpu)) {
     case CPU_STATE_OPERATING:
-        si->cpu->env.sigp_order = SIGP_STOP_STORE_STATUS;
-        kvm_s390_vcpu_interrupt(si->cpu, &irq);
+        cpu->env.sigp_order = SIGP_STOP_STORE_STATUS;
+        kvm_s390_vcpu_interrupt(cpu, &irq);
         /* store will be performed when handling the stop intercept */
         break;
     case CPU_STATE_STOPPED:
         /* already stopped, just store the status */
-        cpu_synchronize_state(CPU(si->cpu));
-        kvm_s390_store_status(si->cpu, KVM_S390_STORE_STATUS_DEF_ADDR, true);
+        cpu_synchronize_state(cs);
+        kvm_s390_store_status(cpu, KVM_S390_STORE_STATUS_DEF_ADDR, true);
         break;
     }
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_store_status_at_address(void *arg)
+static void sigp_store_status_at_address(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
     uint32_t address = si->param & 0x7ffffe00u;
 
     /* cpu has to be stopped */
-    if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+    if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
         set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
         return;
     }
 
-    cpu_synchronize_state(CPU(si->cpu));
+    cpu_synchronize_state(cs);
 
-    if (kvm_s390_store_status(si->cpu, address, false)) {
+    if (kvm_s390_store_status(cpu, address, false)) {
         set_sigp_status(si, SIGP_STAT_INVALID_PARAMETER);
         return;
     }
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_store_adtl_status(void *arg)
+static void sigp_store_adtl_status(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
 
-    if (!kvm_check_extension(kvm_state, KVM_CAP_S390_VECTOR_REGISTERS)) {
+    if (!s390_has_feat(S390_FEAT_VECTOR)) {
         set_sigp_status(si, SIGP_STAT_INVALID_ORDER);
         return;
     }
 
     /* cpu has to be stopped */
-    if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+    if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
         set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
         return;
     }
@@ -1534,31 +1572,32 @@ static void sigp_store_adtl_status(void *arg)
         return;
     }
 
-    cpu_synchronize_state(CPU(si->cpu));
+    cpu_synchronize_state(cs);
 
-    if (kvm_s390_store_adtl_status(si->cpu, si->param)) {
+    if (kvm_s390_store_adtl_status(cpu, si->param)) {
         set_sigp_status(si, SIGP_STAT_INVALID_PARAMETER);
         return;
     }
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_restart(void *arg)
+static void sigp_restart(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
     struct kvm_s390_irq irq = {
         .type = KVM_S390_RESTART,
     };
 
-    switch (s390_cpu_get_state(si->cpu)) {
+    switch (s390_cpu_get_state(cpu)) {
     case CPU_STATE_STOPPED:
         /* the restart irq has to be delivered prior to any other pending irq */
-        cpu_synchronize_state(CPU(si->cpu));
-        do_restart_interrupt(&si->cpu->env);
-        s390_cpu_set_state(CPU_STATE_OPERATING, si->cpu);
+        cpu_synchronize_state(cs);
+        do_restart_interrupt(&cpu->env);
+        s390_cpu_set_state(CPU_STATE_OPERATING, cpu);
         break;
     case CPU_STATE_OPERATING:
-        kvm_s390_vcpu_interrupt(si->cpu, &irq);
+        kvm_s390_vcpu_interrupt(cpu, &irq);
         break;
     }
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
@@ -1566,20 +1605,18 @@ static void sigp_restart(void *arg)
 
 int kvm_s390_cpu_restart(S390CPU *cpu)
 {
-    SigpInfo si = {
-        .cpu = cpu,
-    };
+    SigpInfo si = {};
 
-    run_on_cpu(CPU(cpu), sigp_restart, &si);
+    run_on_cpu(CPU(cpu), sigp_restart, RUN_ON_CPU_HOST_PTR(&si));
     DPRINTF("DONE: KVM cpu restart: %p\n", &cpu->env);
     return 0;
 }
 
-static void sigp_initial_cpu_reset(void *arg)
+static void sigp_initial_cpu_reset(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
-    CPUState *cs = CPU(si->cpu);
-    S390CPUClass *scc = S390_CPU_GET_CLASS(si->cpu);
+    S390CPU *cpu = S390_CPU(cs);
+    S390CPUClass *scc = S390_CPU_GET_CLASS(cpu);
+    SigpInfo *si = arg.host_ptr;
 
     cpu_synchronize_state(cs);
     scc->initial_cpu_reset(cs);
@@ -1587,11 +1624,11 @@ static void sigp_initial_cpu_reset(void *arg)
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_cpu_reset(void *arg)
+static void sigp_cpu_reset(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
-    CPUState *cs = CPU(si->cpu);
-    S390CPUClass *scc = S390_CPU_GET_CLASS(si->cpu);
+    S390CPU *cpu = S390_CPU(cs);
+    S390CPUClass *scc = S390_CPU_GET_CLASS(cpu);
+    SigpInfo *si = arg.host_ptr;
 
     cpu_synchronize_state(cs);
     scc->cpu_reset(cs);
@@ -1599,12 +1636,13 @@ static void sigp_cpu_reset(void *arg)
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static void sigp_set_prefix(void *arg)
+static void sigp_set_prefix(CPUState *cs, run_on_cpu_data arg)
 {
-    SigpInfo *si = arg;
+    S390CPU *cpu = S390_CPU(cs);
+    SigpInfo *si = arg.host_ptr;
     uint32_t addr = si->param & 0x7fffe000u;
 
-    cpu_synchronize_state(CPU(si->cpu));
+    cpu_synchronize_state(cs);
 
     if (!address_space_access_valid(&address_space_memory, addr,
                                     sizeof(struct LowCore), false)) {
@@ -1613,13 +1651,13 @@ static void sigp_set_prefix(void *arg)
     }
 
     /* cpu has to be stopped */
-    if (s390_cpu_get_state(si->cpu) != CPU_STATE_STOPPED) {
+    if (s390_cpu_get_state(cpu) != CPU_STATE_STOPPED) {
         set_sigp_status(si, SIGP_STAT_INCORRECT_STATE);
         return;
     }
 
-    si->cpu->env.psa = addr;
-    cpu_synchronize_post_init(CPU(si->cpu));
+    cpu->env.psa = addr;
+    cpu_synchronize_post_init(cs);
     si->cc = SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
@@ -1627,7 +1665,6 @@ static int handle_sigp_single_dst(S390CPU *dst_cpu, uint8_t order,
                                   uint64_t param, uint64_t *status_reg)
 {
     SigpInfo si = {
-        .cpu = dst_cpu,
         .param = param,
         .status_reg = status_reg,
     };
@@ -1646,31 +1683,31 @@ static int handle_sigp_single_dst(S390CPU *dst_cpu, uint8_t order,
 
     switch (order) {
     case SIGP_START:
-        run_on_cpu(CPU(dst_cpu), sigp_start, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_start, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_STOP:
-        run_on_cpu(CPU(dst_cpu), sigp_stop, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_stop, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_RESTART:
-        run_on_cpu(CPU(dst_cpu), sigp_restart, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_restart, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_STOP_STORE_STATUS:
-        run_on_cpu(CPU(dst_cpu), sigp_stop_and_store_status, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_stop_and_store_status, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_STORE_STATUS_ADDR:
-        run_on_cpu(CPU(dst_cpu), sigp_store_status_at_address, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_store_status_at_address, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_STORE_ADTL_STATUS:
-        run_on_cpu(CPU(dst_cpu), sigp_store_adtl_status, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_store_adtl_status, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_SET_PREFIX:
-        run_on_cpu(CPU(dst_cpu), sigp_set_prefix, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_set_prefix, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_INITIAL_CPU_RESET:
-        run_on_cpu(CPU(dst_cpu), sigp_initial_cpu_reset, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_initial_cpu_reset, RUN_ON_CPU_HOST_PTR(&si));
         break;
     case SIGP_CPU_RESET:
-        run_on_cpu(CPU(dst_cpu), sigp_cpu_reset, &si);
+        run_on_cpu(CPU(dst_cpu), sigp_cpu_reset, RUN_ON_CPU_HOST_PTR(&si));
         break;
     default:
         DPRINTF("KVM: unknown SIGP: 0x%x\n", order);
@@ -1743,6 +1780,11 @@ static int handle_sigp(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
     status_reg = &env->regs[r1];
     param = (r1 % 2) ? env->regs[r1] : env->regs[r1 + 1];
 
+    if (qemu_mutex_trylock(&qemu_sigp_mutex)) {
+        ret = SIGP_CC_BUSY;
+        goto out;
+    }
+
     switch (order) {
     case SIGP_SET_ARCH:
         ret = sigp_set_architecture(cpu, param, status_reg);
@@ -1752,7 +1794,9 @@ static int handle_sigp(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
         dst_cpu = s390_cpu_addr2state(env->regs[r3]);
         ret = handle_sigp_single_dst(dst_cpu, order, param, status_reg);
     }
+    qemu_mutex_unlock(&qemu_sigp_mutex);
 
+out:
     trace_kvm_sigp_finished(order, CPU(cpu)->cpu_index,
                             dst_cpu ? CPU(dst_cpu)->cpu_index : -1, ret);
 
@@ -1864,6 +1908,14 @@ static int handle_intercept(S390CPU *cpu)
             cpu->env.sigp_order = 0;
             r = EXCP_HALTED;
             break;
+        case ICPT_OPEREXC:
+            /* currently only instr 0x0000 after enabled via capability */
+            r = handle_sw_breakpoint(cpu, run);
+            if (r == -ENOENT) {
+                enter_pgmcheck(cpu, PGM_OPERATION);
+                r = 0;
+            }
+            break;
         case ICPT_SOFT_INTERCEPT:
             fprintf(stderr, "KVM unimplemented icpt SOFT\n");
             exit(1);
@@ -1949,7 +2001,7 @@ static void insert_stsi_3_2_2(S390CPU *cpu, __u64 addr, uint8_t ar)
         strcpy((char *)sysib.ext_names[0], "KVMguest");
     }
     /* Insert UUID */
-    memcpy(sysib.vm[0].uuid, qemu_uuid, sizeof(sysib.vm[0].uuid));
+    memcpy(sysib.vm[0].uuid, &qemu_uuid, sizeof(sysib.vm[0].uuid));
 
     s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, sizeof(sysib));
 }
@@ -2089,7 +2141,7 @@ static uint64_t build_channel_report_mcic(void)
         MCIC_VB_WP | MCIC_VB_MS | MCIC_VB_PM | MCIC_VB_IA | MCIC_VB_FP |
         MCIC_VB_GR | MCIC_VB_CR | MCIC_VB_ST | MCIC_VB_AR | MCIC_VB_PR |
         MCIC_VB_FC | MCIC_VB_CT | MCIC_VB_CC;
-    if (kvm_check_extension(kvm_state, KVM_CAP_S390_VECTOR_REGISTERS)) {
+    if (s390_has_feat(S390_FEAT_VECTOR)) {
         mcic |= MCIC_VB_VR;
     }
     return mcic;
@@ -2282,3 +2334,320 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
 {
     abort();
 }
+
+static inline int test_bit_inv(long nr, const unsigned long *addr)
+{
+    return test_bit(BE_BIT_NR(nr), addr);
+}
+
+static inline void set_bit_inv(long nr, unsigned long *addr)
+{
+    set_bit(BE_BIT_NR(nr), addr);
+}
+
+static int query_cpu_subfunc(S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_subfunc prop;
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_MACHINE_SUBFUNC,
+        .addr = (uint64_t) &prop,
+    };
+    int rc;
+
+    rc = kvm_vm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr);
+    if (rc) {
+        return  rc;
+    }
+
+    /*
+     * We're going to add all subfunctions now, if the corresponding feature
+     * is available that unlocks the query functions.
+     */
+    s390_add_from_feat_block(features, S390_FEAT_TYPE_PLO, prop.plo);
+    if (test_bit(S390_FEAT_TOD_CLOCK_STEERING, features)) {
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_PTFF, prop.ptff);
+    }
+    if (test_bit(S390_FEAT_MSA, features)) {
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KMAC, prop.kmac);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KMC, prop.kmc);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KM, prop.km);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KIMD, prop.kimd);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KLMD, prop.klmd);
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_3, features)) {
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_PCKMO, prop.pckmo);
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_4, features)) {
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KMCTR, prop.kmctr);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KMF, prop.kmf);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_KMO, prop.kmo);
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_PCC, prop.pcc);
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_5, features)) {
+        s390_add_from_feat_block(features, S390_FEAT_TYPE_PPNO, prop.ppno);
+    }
+    return 0;
+}
+
+static int configure_cpu_subfunc(const S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_subfunc prop = {};
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_PROCESSOR_SUBFUNC,
+        .addr = (uint64_t) &prop,
+    };
+
+    if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                           KVM_S390_VM_CPU_PROCESSOR_SUBFUNC)) {
+        /* hardware support might be missing, IBC will handle most of this */
+        return 0;
+    }
+
+    s390_fill_feat_block(features, S390_FEAT_TYPE_PLO, prop.plo);
+    if (test_bit(S390_FEAT_TOD_CLOCK_STEERING, features)) {
+        s390_fill_feat_block(features, S390_FEAT_TYPE_PTFF, prop.ptff);
+        prop.ptff[0] |= 0x80; /* query is always available */
+    }
+    if (test_bit(S390_FEAT_MSA, features)) {
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KMAC, prop.kmac);
+        prop.kmac[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KMC, prop.kmc);
+        prop.kmc[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KM, prop.km);
+        prop.km[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KIMD, prop.kimd);
+        prop.kimd[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KLMD, prop.klmd);
+        prop.klmd[0] |= 0x80; /* query is always available */
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_3, features)) {
+        s390_fill_feat_block(features, S390_FEAT_TYPE_PCKMO, prop.pckmo);
+        prop.pckmo[0] |= 0x80; /* query is always available */
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_4, features)) {
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KMCTR, prop.kmctr);
+        prop.kmctr[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KMF, prop.kmf);
+        prop.kmf[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_KMO, prop.kmo);
+        prop.kmo[0] |= 0x80; /* query is always available */
+        s390_fill_feat_block(features, S390_FEAT_TYPE_PCC, prop.pcc);
+        prop.pcc[0] |= 0x80; /* query is always available */
+    }
+    if (test_bit(S390_FEAT_MSA_EXT_5, features)) {
+        s390_fill_feat_block(features, S390_FEAT_TYPE_PPNO, prop.ppno);
+        prop.ppno[0] |= 0x80; /* query is always available */
+    }
+    return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
+}
+
+static int kvm_to_feat[][2] = {
+    { KVM_S390_VM_CPU_FEAT_ESOP, S390_FEAT_ESOP },
+    { KVM_S390_VM_CPU_FEAT_SIEF2, S390_FEAT_SIE_F2 },
+    { KVM_S390_VM_CPU_FEAT_64BSCAO , S390_FEAT_SIE_64BSCAO },
+    { KVM_S390_VM_CPU_FEAT_SIIF, S390_FEAT_SIE_SIIF },
+    { KVM_S390_VM_CPU_FEAT_GPERE, S390_FEAT_SIE_GPERE },
+    { KVM_S390_VM_CPU_FEAT_GSLS, S390_FEAT_SIE_GSLS },
+    { KVM_S390_VM_CPU_FEAT_IB, S390_FEAT_SIE_IB },
+    { KVM_S390_VM_CPU_FEAT_CEI, S390_FEAT_SIE_CEI },
+    { KVM_S390_VM_CPU_FEAT_IBS, S390_FEAT_SIE_IBS },
+    { KVM_S390_VM_CPU_FEAT_SKEY, S390_FEAT_SIE_SKEY },
+    { KVM_S390_VM_CPU_FEAT_CMMA, S390_FEAT_SIE_CMMA },
+    { KVM_S390_VM_CPU_FEAT_PFMFI, S390_FEAT_SIE_PFMFI},
+    { KVM_S390_VM_CPU_FEAT_SIGPIF, S390_FEAT_SIE_SIGPIF},
+};
+
+static int query_cpu_feat(S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_feat prop;
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_MACHINE_FEAT,
+        .addr = (uint64_t) &prop,
+    };
+    int rc;
+    int i;
+
+    rc = kvm_vm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr);
+    if (rc) {
+        return  rc;
+    }
+
+    for (i = 0; i < ARRAY_SIZE(kvm_to_feat); i++) {
+        if (test_bit_inv(kvm_to_feat[i][0], (unsigned long *)prop.feat)) {
+            set_bit(kvm_to_feat[i][1], features);
+        }
+    }
+    return 0;
+}
+
+static int configure_cpu_feat(const S390FeatBitmap features)
+{
+    struct kvm_s390_vm_cpu_feat prop = {};
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_PROCESSOR_FEAT,
+        .addr = (uint64_t) &prop,
+    };
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(kvm_to_feat); i++) {
+        if (test_bit(kvm_to_feat[i][1], features)) {
+            set_bit_inv(kvm_to_feat[i][0], (unsigned long *)prop.feat);
+        }
+    }
+    return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
+}
+
+bool kvm_s390_cpu_models_supported(void)
+{
+    if (!cpu_model_allowed()) {
+        /* compatibility machines interfere with the cpu model */
+        return false;
+    }
+    return kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_MACHINE) &&
+           kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_PROCESSOR) &&
+           kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_MACHINE_FEAT) &&
+           kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_PROCESSOR_FEAT) &&
+           kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_MODEL,
+                             KVM_S390_VM_CPU_MACHINE_SUBFUNC);
+}
+
+void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp)
+{
+    struct kvm_s390_vm_cpu_machine prop = {};
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_MACHINE,
+        .addr = (uint64_t) &prop,
+    };
+    uint16_t unblocked_ibc = 0, cpu_type = 0;
+    int rc;
+
+    memset(model, 0, sizeof(*model));
+
+    if (!kvm_s390_cpu_models_supported()) {
+        error_setg(errp, "KVM doesn't support CPU models");
+        return;
+    }
+
+    /* query the basic cpu model properties */
+    rc = kvm_vm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr);
+    if (rc) {
+        error_setg(errp, "KVM: Error querying host CPU model: %d", rc);
+        return;
+    }
+
+    cpu_type = cpuid_type(prop.cpuid);
+    if (has_ibc(prop.ibc)) {
+        model->lowest_ibc = lowest_ibc(prop.ibc);
+        unblocked_ibc = unblocked_ibc(prop.ibc);
+    }
+    model->cpu_id = cpuid_id(prop.cpuid);
+    model->cpu_ver = 0xff;
+
+    /* get supported cpu features indicated via STFL(E) */
+    s390_add_from_feat_block(model->features, S390_FEAT_TYPE_STFL,
+                             (uint8_t *) prop.fac_mask);
+    /* dat-enhancement facility 2 has no bit but was introduced with stfle */
+    if (test_bit(S390_FEAT_STFLE, model->features)) {
+        set_bit(S390_FEAT_DAT_ENH_2, model->features);
+    }
+    /* get supported cpu features indicated e.g. via SCLP */
+    rc = query_cpu_feat(model->features);
+    if (rc) {
+        error_setg(errp, "KVM: Error querying CPU features: %d", rc);
+        return;
+    }
+    /* get supported cpu subfunctions indicated via query / test bit */
+    rc = query_cpu_subfunc(model->features);
+    if (rc) {
+        error_setg(errp, "KVM: Error querying CPU subfunctions: %d", rc);
+        return;
+    }
+
+    /* with cpu model support, CMM is only indicated if really available */
+    if (kvm_s390_cmma_available()) {
+        set_bit(S390_FEAT_CMM, model->features);
+    }
+
+    if (s390_known_cpu_type(cpu_type)) {
+        /* we want the exact model, even if some features are missing */
+        model->def = s390_find_cpu_def(cpu_type, ibc_gen(unblocked_ibc),
+                                       ibc_ec_ga(unblocked_ibc), NULL);
+    } else {
+        /* model unknown, e.g. too new - search using features */
+        model->def = s390_find_cpu_def(0, ibc_gen(unblocked_ibc),
+                                       ibc_ec_ga(unblocked_ibc),
+                                       model->features);
+    }
+    if (!model->def) {
+        error_setg(errp, "KVM: host CPU model could not be identified");
+        return;
+    }
+    /* strip of features that are not part of the maximum model */
+    bitmap_and(model->features, model->features, model->def->full_feat,
+               S390_FEAT_MAX);
+}
+
+void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp)
+{
+    struct kvm_s390_vm_cpu_processor prop  = {
+        .fac_list = { 0 },
+    };
+    struct kvm_device_attr attr = {
+        .group = KVM_S390_VM_CPU_MODEL,
+        .attr = KVM_S390_VM_CPU_PROCESSOR,
+        .addr = (uint64_t) &prop,
+    };
+    int rc;
+
+    if (!model) {
+        /* compatibility handling if cpu models are disabled */
+        if (kvm_s390_cmma_available() && !mem_path) {
+            kvm_s390_enable_cmma();
+        }
+        return;
+    }
+    if (!kvm_s390_cpu_models_supported()) {
+        error_setg(errp, "KVM doesn't support CPU models");
+        return;
+    }
+    prop.cpuid = s390_cpuid_from_cpu_model(model);
+    prop.ibc = s390_ibc_from_cpu_model(model);
+    /* configure cpu features indicated via STFL(e) */
+    s390_fill_feat_block(model->features, S390_FEAT_TYPE_STFL,
+                         (uint8_t *) prop.fac_list);
+    rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr);
+    if (rc) {
+        error_setg(errp, "KVM: Error configuring the CPU model: %d", rc);
+        return;
+    }
+    /* configure cpu features indicated e.g. via SCLP */
+    rc = configure_cpu_feat(model->features);
+    if (rc) {
+        error_setg(errp, "KVM: Error configuring CPU features: %d", rc);
+        return;
+    }
+    /* configure cpu subfunctions indicated via query / test bit */
+    rc = configure_cpu_subfunc(model->features);
+    if (rc) {
+        error_setg(errp, "KVM: Error configuring CPU subfunctions: %d", rc);
+        return;
+    }
+    /* enable CMM via CMMA - disable on hugetlbfs */
+    if (test_bit(S390_FEAT_CMM, model->features)) {
+        if (mem_path) {
+            error_report("Warning: CMM will not be enabled because it is not "
+                         "compatible to hugetlbfs.");
+        } else {
+            kvm_s390_enable_cmma();
+        }
+    }
+}
diff --git a/target-s390x/machine.c b/target-s390x/machine.c
index aa39e5daa4..edc3a4717b 100644
--- a/target-s390x/machine.c
+++ b/target-s390x/machine.c
@@ -78,12 +78,7 @@ static const VMStateDescription vmstate_fpu = {
 
 static bool vregs_needed(void *opaque)
 {
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        return kvm_check_extension(kvm_state, KVM_CAP_S390_VECTOR_REGISTERS);
-    }
-#endif
-    return 0;
+    return s390_has_feat(S390_FEAT_VECTOR);
 }
 
 static const VMStateDescription vmstate_vregs = {
@@ -147,12 +142,7 @@ static const VMStateDescription vmstate_vregs = {
 
 static bool riccb_needed(void *opaque)
 {
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        return kvm_s390_get_ri();
-    }
-#endif
-    return 0;
+    return s390_has_feat(S390_FEAT_RUNTIME_INSTRUMENTATION);
 }
 
 const VMStateDescription vmstate_riccb = {
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 86da1947b9..c9604ea9c7 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -126,7 +126,7 @@ static int modified_clear_reset(S390CPU *cpu)
     pause_all_vcpus();
     cpu_synchronize_all_states();
     CPU_FOREACH(t) {
-        run_on_cpu(t, s390_do_cpu_full_reset, t);
+        run_on_cpu(t, s390_do_cpu_full_reset, RUN_ON_CPU_NULL);
     }
     s390_cmma_reset();
     subsystem_reset();
@@ -145,7 +145,7 @@ static int load_normal_reset(S390CPU *cpu)
     pause_all_vcpus();
     cpu_synchronize_all_states();
     CPU_FOREACH(t) {
-        run_on_cpu(t, s390_do_cpu_reset, t);
+        run_on_cpu(t, s390_do_cpu_reset, RUN_ON_CPU_NULL);
     }
     s390_cmma_reset();
     subsystem_reset();
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 1a07d70b21..02bc7058fd 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -5432,9 +5432,11 @@ void gen_intermediate_code(CPUS390XState *env, struct TranslationBlock *tb)
 #if defined(S390X_DEBUG_DISAS)
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc.pc - pc_start, 1);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-sh4/README.sh4 b/target-sh4/README.sh4
index e578830f79..ece046442a 100644
--- a/target-sh4/README.sh4
+++ b/target-sh4/README.sh4
@@ -25,7 +25,7 @@ Goals
 
 The primary model being worked on is the soft MMU target to be able to
 emulate the Shix 2.0 board by Alexis Polti, described at
-http://perso.enst.fr/~polti/realisations/shix20/
+https://web.archive.org/web/20070917001736/http://perso.enst.fr/~polti/realisations/shix20/
 
 Ultimately, qemu will be coupled with a system C or a verilog
 simulator to simulate the whole board functionalities.
diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c
index f589532e18..a38f6a6ded 100644
--- a/target-sh4/cpu.c
+++ b/target-sh4/cpu.c
@@ -244,6 +244,13 @@ static void superh_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     SuperHCPUClass *scc = SUPERH_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cpu_reset(cs);
     qemu_init_vcpu(cs);
@@ -258,7 +265,6 @@ static void superh_cpu_initfn(Object *obj)
     CPUSH4State *env = &cpu->env;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     env->movcal_backup_tail = &(env->movcal_backup);
 
@@ -303,13 +309,6 @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
     cc->gdb_num_core_regs = 59;
 
     dc->vmsd = &vmstate_sh_cpu;
-
-    /*
-     * Reason: superh_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo superh_cpu_type_info = {
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index ca80cf70ca..c89a14733f 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -1927,9 +1927,11 @@ void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
 	qemu_log("IN:\n");	/* , lookup_symbol(pc_start)); */
         log_target_disas(cs, pc_start, ctx.pc - pc_start, 0);
 	qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index e4089f2074..4e07b92fbd 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -117,8 +117,7 @@ static int cpu_sparc_register(SPARCCPU *cpu, const char *cpu_model)
         return -1;
     }
 
-    env->def = g_new0(sparc_def_t, 1);
-    memcpy(env->def, def, sizeof(*def));
+    env->def = g_memdup(def, sizeof(*def));
 
     featurestr = strtok(NULL, ",");
     sparc_cpu_parse_features(CPU(cpu), featurestr, &err);
@@ -793,7 +792,9 @@ static bool sparc_cpu_has_work(CPUState *cs)
 
 static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
 {
+    CPUState *cs = CPU(dev);
     SPARCCPUClass *scc = SPARC_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
 #if defined(CONFIG_USER_ONLY)
     SPARCCPU *cpu = SPARC_CPU(dev);
     CPUSPARCState *env = &cpu->env;
@@ -803,7 +804,13 @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp)
     }
 #endif
 
-    qemu_init_vcpu(CPU(dev));
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    qemu_init_vcpu(cs);
 
     scc->parent_realize(dev, errp);
 }
@@ -815,7 +822,6 @@ static void sparc_cpu_initfn(Object *obj)
     CPUSPARCState *env = &cpu->env;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled()) {
         gen_intermediate_code_init(env);
@@ -868,13 +874,6 @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
 #else
     cc->gdb_num_core_regs = 72;
 #endif
-
-    /*
-     * Reason: sparc_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo sparc_cpu_type_info = {
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index a3d64a4e52..5fb0ed1aad 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -102,6 +102,11 @@
 #define CC_DST (env->cc_dst)
 #define CC_OP  (env->cc_op)
 
+/* Even though lazy evaluation of CPU condition codes tends to be less
+ * important on RISC systems where condition codes are only updated
+ * when explicitly requested, SPARC uses it to update 32-bit and 64-bit
+ * condition codes.
+ */
 enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_FLAGS,   /* all cc are back in status register */
@@ -220,9 +225,9 @@ enum {
 #define MAX_NWINDOWS 32
 
 #if !defined(TARGET_SPARC64)
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 #else
-#define NB_MMU_MODES 6
+#define NB_MMU_MODES 7
 typedef struct trap_state {
     uint64_t tpc;
     uint64_t tnpc;
@@ -544,6 +549,7 @@ void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cpu, vaddr addr,
                                                  MMUAccessType access_type,
                                                  int mmu_idx,
                                                  uintptr_t retaddr);
+void cpu_raise_exception_ra(CPUSPARCState *, int, uintptr_t) QEMU_NORETURN;
 
 #ifndef NO_CPU_IO_DEFS
 /* cpu_init.c */
@@ -632,22 +638,16 @@ int cpu_sparc_signal_handler(int host_signum, void *pinfo, void *puc);
 /* MMU modes definitions */
 #if defined (TARGET_SPARC64)
 #define MMU_USER_IDX   0
-#define MMU_MODE0_SUFFIX _user
 #define MMU_USER_SECONDARY_IDX   1
-#define MMU_MODE1_SUFFIX _user_secondary
 #define MMU_KERNEL_IDX 2
-#define MMU_MODE2_SUFFIX _kernel
 #define MMU_KERNEL_SECONDARY_IDX 3
-#define MMU_MODE3_SUFFIX _kernel_secondary
 #define MMU_NUCLEUS_IDX 4
-#define MMU_MODE4_SUFFIX _nucleus
 #define MMU_HYPV_IDX   5
-#define MMU_MODE5_SUFFIX _hypv
+#define MMU_PHYS_IDX   6
 #else
 #define MMU_USER_IDX   0
-#define MMU_MODE0_SUFFIX _user
 #define MMU_KERNEL_IDX 1
-#define MMU_MODE1_SUFFIX _kernel
+#define MMU_PHYS_IDX   2
 #endif
 
 #if defined (TARGET_SPARC64)
@@ -667,18 +667,27 @@ static inline int cpu_supervisor_mode(CPUSPARCState *env1)
 }
 #endif
 
-static inline int cpu_mmu_index(CPUSPARCState *env1, bool ifetch)
+static inline int cpu_mmu_index(CPUSPARCState *env, bool ifetch)
 {
 #if defined(CONFIG_USER_ONLY)
     return MMU_USER_IDX;
 #elif !defined(TARGET_SPARC64)
-    return env1->psrs;
+    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
+        return MMU_PHYS_IDX;
+    } else {
+        return env->psrs;
+    }
 #else
-    if (env1->tl > 0) {
+    /* IMMU or DMMU disabled.  */
+    if (ifetch
+        ? (env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0
+        : (env->lsu & DMMU_E) == 0) {
+        return MMU_PHYS_IDX;
+    } else if (env->tl > 0) {
         return MMU_NUCLEUS_IDX;
-    } else if (cpu_hypervisor_mode(env1)) {
+    } else if (cpu_hypervisor_mode(env)) {
         return MMU_HYPV_IDX;
-    } else if (cpu_supervisor_mode(env1)) {
+    } else if (cpu_supervisor_mode(env)) {
         return MMU_KERNEL_IDX;
     } else {
         return MMU_USER_IDX;
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index bedc6722a1..359b0b15ed 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -24,6 +24,14 @@
 #include "exec/helper-proto.h"
 #include "sysemu/sysemu.h"
 
+void cpu_raise_exception_ra(CPUSPARCState *env, int tt, uintptr_t ra)
+{
+    CPUState *cs = CPU(sparc_env_get_cpu(env));
+
+    cs->exception_index = tt;
+    cpu_loop_exit_restore(cs, ra);
+}
+
 void helper_raise_exception(CPUSPARCState *env, int tt)
 {
     CPUState *cs = CPU(sparc_env_get_cpu(env));
@@ -59,7 +67,7 @@ uint64_t helper_tick_get_count(CPUSPARCState *env, void *opaque, int mem_idx)
     CPUTimer *timer = opaque;
 
     if (timer->npt && mem_idx < MMU_KERNEL_IDX) {
-        helper_raise_exception(env, TT_PRIV_INSN);
+        cpu_raise_exception_ra(env, TT_PRIV_INSN, GETPC());
     }
 
     return cpu_tick_get_count(timer);
@@ -76,10 +84,9 @@ void helper_tick_set_limit(void *opaque, uint64_t limit)
 }
 #endif
 
-static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
-                                       target_ulong b, int cc)
+static target_ulong do_udiv(CPUSPARCState *env, target_ulong a,
+                            target_ulong b, int cc, uintptr_t ra)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     int overflow = 0;
     uint64_t x0;
     uint32_t x1;
@@ -88,8 +95,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
     }
 
     x0 = x0 / x1;
@@ -108,18 +114,17 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
 
 target_ulong helper_udiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_udiv_common(env, a, b, 0);
+    return do_udiv(env, a, b, 0, GETPC());
 }
 
 target_ulong helper_udiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_udiv_common(env, a, b, 1);
+    return do_udiv(env, a, b, 1, GETPC());
 }
 
-static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
-                                       target_ulong b, int cc)
+static target_ulong do_sdiv(CPUSPARCState *env, target_ulong a,
+                            target_ulong b, int cc, uintptr_t ra)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     int overflow = 0;
     int64_t x0;
     int32_t x1;
@@ -128,8 +133,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, ra);
     } else if (x1 == -1 && x0 == INT64_MIN) {
         x0 = INT32_MAX;
         overflow = 1;
@@ -151,12 +155,12 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
 
 target_ulong helper_sdiv(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_sdiv_common(env, a, b, 0);
+    return do_sdiv(env, a, b, 0, GETPC());
 }
 
 target_ulong helper_sdiv_cc(CPUSPARCState *env, target_ulong a, target_ulong b)
 {
-    return helper_sdiv_common(env, a, b, 1);
+    return do_sdiv(env, a, b, 1, GETPC());
 }
 
 #ifdef TARGET_SPARC64
@@ -164,10 +168,7 @@ int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        SPARCCPU *cpu = sparc_env_get_cpu(env);
-
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     } else if (b == -1) {
         /* Avoid overflow trap with i386 divide insn.  */
         return -a;
@@ -180,10 +181,7 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        SPARCCPU *cpu = sparc_env_get_cpu(env);
-
-        cpu_restore_state(CPU(cpu), GETPC());
-        helper_raise_exception(env, TT_DIV_ZERO);
+        cpu_raise_exception_ra(env, TT_DIV_ZERO, GETPC());
     }
     return a / b;
 }
@@ -192,7 +190,6 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     target_ulong dst;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
@@ -215,14 +212,12 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state(CPU(cpu), GETPC());
-    helper_raise_exception(env, TT_TOVF);
+    cpu_raise_exception_ra(env, TT_TOVF, GETPC());
 }
 
 target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
                              target_ulong src2)
 {
-    SPARCCPU *cpu = sparc_env_get_cpu(env);
     target_ulong dst;
 
     /* Tag overflow occurs if either input has bits 0 or 1 set.  */
@@ -245,8 +240,7 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state(CPU(cpu), GETPC());
-    helper_raise_exception(env, TT_TOVF);
+    cpu_raise_exception_ra(env, TT_TOVF, GETPC());
 }
 
 #ifndef TARGET_SPARC64
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index caa2a895d0..0cf1bfb73a 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -17,8 +17,6 @@ DEF_HELPER_1(rdcwp, tl, env)
 DEF_HELPER_2(wrcwp, void, env, tl)
 DEF_HELPER_FLAGS_2(array8, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_FLAGS_1(popc, TCG_CALL_NO_RWG_SE, tl, tl)
-DEF_HELPER_FLAGS_3(ldda_asi, TCG_CALL_NO_WG, void, env, tl, int)
-DEF_HELPER_FLAGS_5(casx_asi, TCG_CALL_NO_WG, tl, env, tl, tl, tl, i32)
 DEF_HELPER_FLAGS_2(set_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(clear_softint, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_2(write_softint, TCG_CALL_NO_RWG, void, env, i64)
@@ -26,9 +24,6 @@ DEF_HELPER_FLAGS_2(tick_set_count, TCG_CALL_NO_RWG, void, ptr, i64)
 DEF_HELPER_FLAGS_3(tick_get_count, TCG_CALL_NO_WG, i64, env, ptr, int)
 DEF_HELPER_FLAGS_2(tick_set_limit, TCG_CALL_NO_RWG, void, ptr, i64)
 #endif
-#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
-DEF_HELPER_FLAGS_5(cas_asi, TCG_CALL_NO_WG, tl, env, tl, tl, tl, i32)
-#endif
 DEF_HELPER_FLAGS_3(check_align, TCG_CALL_NO_WG, void, env, tl, i32)
 DEF_HELPER_1(debug, void, env)
 DEF_HELPER_1(save, void, env)
@@ -43,8 +38,6 @@ DEF_HELPER_3(tsubcctv, tl, env, tl, tl)
 DEF_HELPER_FLAGS_3(sdivx, TCG_CALL_NO_WG, s64, env, s64, s64)
 DEF_HELPER_FLAGS_3(udivx, TCG_CALL_NO_WG, i64, env, i64, i64)
 #endif
-DEF_HELPER_FLAGS_3(ldqf, TCG_CALL_NO_WG, void, env, tl, int)
-DEF_HELPER_FLAGS_3(stqf, TCG_CALL_NO_WG, void, env, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 DEF_HELPER_FLAGS_4(ld_asi, TCG_CALL_NO_WG, i64, env, tl, int, i32)
 DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 6ce5ccc37f..de7d53ae20 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -254,18 +254,6 @@ static void replace_tlb_1bit_lru(SparcTLBEntry *tlb,
 
 #endif
 
-#if defined(TARGET_SPARC64) || defined(CONFIG_USER_ONLY)
-static inline target_ulong address_mask(CPUSPARCState *env1, target_ulong addr)
-{
-#ifdef TARGET_SPARC64
-    if (AM_CHECK(env1)) {
-        addr &= 0xffffffffULL;
-    }
-#endif
-    return addr;
-}
-#endif
-
 #ifdef TARGET_SPARC64
 /* returns true if access using this ASI is to have address translated by MMU
    otherwise access is to raw physical address */
@@ -290,28 +278,41 @@ static inline int is_translating_asi(int asi)
     }
 }
 
+static inline target_ulong address_mask(CPUSPARCState *env1, target_ulong addr)
+{
+    if (AM_CHECK(env1)) {
+        addr &= 0xffffffffULL;
+    }
+    return addr;
+}
+
 static inline target_ulong asi_address_mask(CPUSPARCState *env,
                                             int asi, target_ulong addr)
 {
     if (is_translating_asi(asi)) {
-        return address_mask(env, addr);
-    } else {
-        return addr;
+        addr = address_mask(env, addr);
     }
+    return addr;
 }
 #endif
 
-void helper_check_align(CPUSPARCState *env, target_ulong addr, uint32_t align)
+static void do_check_align(CPUSPARCState *env, target_ulong addr,
+                           uint32_t align, uintptr_t ra)
 {
     if (addr & align) {
 #ifdef DEBUG_UNALIGNED
         printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
                "\n", addr, env->pc);
 #endif
-        helper_raise_exception(env, TT_UNALIGNED);
+        cpu_raise_exception_ra(env, TT_UNALIGNED, ra);
     }
 }
 
+void helper_check_align(CPUSPARCState *env, target_ulong addr, uint32_t align)
+{
+    do_check_align(env, addr, align, GETPC());
+}
+
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) &&   \
     defined(DEBUG_MXCC)
 static void dump_mxcc(CPUSPARCState *env)
@@ -440,7 +441,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     uint32_t last_addr = addr;
 #endif
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     switch (asi) {
     case ASI_M_MXCC: /* SuperSparc MXCC registers, or... */
     /* case ASI_LEON_CACHEREGS:  Leon3 cache control */
@@ -554,64 +555,11 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
             break;
         }
         break;
-    case ASI_USERDATA: /* User data access */
-        switch (size) {
-        case 1:
-            ret = cpu_ldub_user(env, addr);
-            break;
-        case 2:
-            ret = cpu_lduw_user(env, addr);
-            break;
-        default:
-        case 4:
-            ret = cpu_ldl_user(env, addr);
-            break;
-        case 8:
-            ret = cpu_ldq_user(env, addr);
-            break;
-        }
-        break;
-    case ASI_KERNELDATA: /* Supervisor data access */
-    case ASI_P: /* Implicit primary context data access (v9 only?) */
-        switch (size) {
-        case 1:
-            ret = cpu_ldub_kernel(env, addr);
-            break;
-        case 2:
-            ret = cpu_lduw_kernel(env, addr);
-            break;
-        default:
-        case 4:
-            ret = cpu_ldl_kernel(env, addr);
-            break;
-        case 8:
-            ret = cpu_ldq_kernel(env, addr);
-            break;
-        }
-        break;
     case ASI_M_TXTC_TAG:   /* SparcStation 5 I-cache tag */
     case ASI_M_TXTC_DATA:  /* SparcStation 5 I-cache data */
     case ASI_M_DATAC_TAG:  /* SparcStation 5 D-cache tag */
     case ASI_M_DATAC_DATA: /* SparcStation 5 D-cache data */
         break;
-    case ASI_M_BYPASS:    /* MMU passthrough */
-    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
-        switch (size) {
-        case 1:
-            ret = ldub_phys(cs->as, addr);
-            break;
-        case 2:
-            ret = lduw_phys(cs->as, addr);
-            break;
-        default:
-        case 4:
-            ret = ldl_phys(cs->as, addr);
-            break;
-        case 8:
-            ret = ldq_phys(cs->as, addr);
-            break;
-        }
-        break;
     case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         switch (size) {
         case 1:
@@ -679,6 +627,14 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         cpu_unassigned_access(cs, addr, false, false, asi, size);
         ret = 0;
         break;
+
+    case ASI_USERDATA: /* User data access */
+    case ASI_KERNELDATA: /* Supervisor data access */
+    case ASI_P: /* Implicit primary context data access (v9 only?) */
+    case ASI_M_BYPASS:    /* MMU passthrough */
+    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
     }
     if (sign) {
         switch (size) {
@@ -708,7 +664,7 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     SPARCCPU *cpu = sparc_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     switch (asi) {
     case ASI_M_MXCC: /* SuperSparc MXCC registers, or... */
     /* case ASI_LEON_CACHEREGS:  Leon3 cache control */
@@ -881,10 +837,10 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
             case 0: /* Control Register */
                 env->mmuregs[reg] = (env->mmuregs[reg] & 0xff000000) |
                     (val & 0x00ffffff);
-                /* Mappings generated during no-fault mode or MMU
-                   disabled mode are invalid in normal mode */
-                if ((oldreg & (MMU_E | MMU_NF | env->def->mmu_bm)) !=
-                    (env->mmuregs[reg] & (MMU_E | MMU_NF | env->def->mmu_bm))) {
+                /* Mappings generated during no-fault mode
+                   are invalid in normal mode.  */
+                if ((oldreg ^ env->mmuregs[reg])
+                    & (MMU_NF | env->def->mmu_bm)) {
                     tlb_flush(CPU(cpu), 1);
                 }
                 break;
@@ -929,41 +885,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     case ASI_M_DIAGS:   /* Turbosparc DTLB Diagnostic */
     case ASI_M_IODIAG:  /* Turbosparc IOTLB Diagnostic */
         break;
-    case ASI_USERDATA: /* User data access */
-        switch (size) {
-        case 1:
-            cpu_stb_user(env, addr, val);
-            break;
-        case 2:
-            cpu_stw_user(env, addr, val);
-            break;
-        default:
-        case 4:
-            cpu_stl_user(env, addr, val);
-            break;
-        case 8:
-            cpu_stq_user(env, addr, val);
-            break;
-        }
-        break;
-    case ASI_KERNELDATA: /* Supervisor data access */
-    case ASI_P:
-        switch (size) {
-        case 1:
-            cpu_stb_kernel(env, addr, val);
-            break;
-        case 2:
-            cpu_stw_kernel(env, addr, val);
-            break;
-        default:
-        case 4:
-            cpu_stl_kernel(env, addr, val);
-            break;
-        case 8:
-            cpu_stq_kernel(env, addr, val);
-            break;
-        }
-        break;
     case ASI_M_TXTC_TAG:   /* I-cache tag */
     case ASI_M_TXTC_DATA:  /* I-cache data */
     case ASI_M_DATAC_TAG:  /* D-cache tag */
@@ -974,52 +895,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
     case ASI_M_FLUSH_CTX:    /* I/D-cache flush context */
     case ASI_M_FLUSH_USER:   /* I/D-cache flush user */
         break;
-    case ASI_M_BCOPY: /* Block copy, sta access */
-        {
-            /* val = src
-               addr = dst
-               copy 32 bytes */
-            unsigned int i;
-            uint32_t src = val & ~3, dst = addr & ~3, temp;
-
-            for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
-                temp = cpu_ldl_kernel(env, src);
-                cpu_stl_kernel(env, dst, temp);
-            }
-        }
-        break;
-    case ASI_M_BFILL: /* Block fill, stda access */
-        {
-            /* addr = dst
-               fill 32 bytes with val */
-            unsigned int i;
-            uint32_t dst = addr & ~7;
-
-            for (i = 0; i < 32; i += 8, dst += 8) {
-                cpu_stq_kernel(env, dst, val);
-            }
-        }
-        break;
-    case ASI_M_BYPASS:    /* MMU passthrough */
-    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
-        {
-            switch (size) {
-            case 1:
-                stb_phys(cs->as, addr, val);
-                break;
-            case 2:
-                stw_phys(cs->as, addr, val);
-                break;
-            case 4:
-            default:
-                stl_phys(cs->as, addr, val);
-                break;
-            case 8:
-                stq_phys(cs->as, addr, val);
-                break;
-            }
-        }
-        break;
     case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         {
             switch (size) {
@@ -1091,6 +966,16 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, uint64_t val,
         cpu_unassigned_access(CPU(sparc_env_get_cpu(env)),
                               addr, true, false, asi, size);
         break;
+
+    case ASI_USERDATA: /* User data access */
+    case ASI_KERNELDATA: /* Supervisor data access */
+    case ASI_P:
+    case ASI_M_BYPASS:    /* MMU passthrough */
+    case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+    case ASI_M_BCOPY: /* Block copy, sta access */
+    case ASI_M_BFILL: /* Block fill, stda access */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
     }
 #ifdef DEBUG_ASI
     dump_asi("write", addr, asi, size, val);
@@ -1107,68 +992,54 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     int size = 1 << (memop & MO_SIZE);
     int sign = memop & MO_SIGN;
     uint64_t ret = 0;
-#if defined(DEBUG_ASI)
-    target_ulong last_addr = addr;
-#endif
 
     if (asi < 0x80) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
-
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
     switch (asi) {
     case ASI_PNF:  /* Primary no-fault */
     case ASI_PNFL: /* Primary no-fault LE */
-        if (page_check_range(addr, size, PAGE_READ) == -1) {
-#ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
-#endif
-            return 0;
-        }
-        /* Fall through */
-    case ASI_P: /* Primary */
-    case ASI_PL: /* Primary LE */
-        {
-            switch (size) {
-            case 1:
-                ret = cpu_ldub_data(env, addr);
-                break;
-            case 2:
-                ret = cpu_lduw_data(env, addr);
-                break;
-            case 4:
-                ret = cpu_ldl_data(env, addr);
-                break;
-            default:
-            case 8:
-                ret = cpu_ldq_data(env, addr);
-                break;
-            }
-        }
-        break;
     case ASI_SNF:  /* Secondary no-fault */
     case ASI_SNFL: /* Secondary no-fault LE */
         if (page_check_range(addr, size, PAGE_READ) == -1) {
-#ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
-#endif
-            return 0;
+            ret = 0;
+            break;
+        }
+        switch (size) {
+        case 1:
+            ret = cpu_ldub_data(env, addr);
+            break;
+        case 2:
+            ret = cpu_lduw_data(env, addr);
+            break;
+        case 4:
+            ret = cpu_ldl_data(env, addr);
+            break;
+        case 8:
+            ret = cpu_ldq_data(env, addr);
+            break;
+        default:
+            g_assert_not_reached();
         }
-        /* Fall through */
+        break;
+        break;
+
+    case ASI_P: /* Primary */
+    case ASI_PL: /* Primary LE */
     case ASI_S:  /* Secondary */
     case ASI_SL: /* Secondary LE */
-        /* XXX */
-        break;
+        /* These are always handled inline.  */
+        g_assert_not_reached();
+
     default:
-        break;
+        cpu_raise_exception_ra(env, TT_DATA_ACCESS, GETPC());
     }
 
     /* Convert from little endian */
     switch (asi) {
-    case ASI_PL:   /* Primary LE */
-    case ASI_SL:   /* Secondary LE */
     case ASI_PNFL: /* Primary no-fault LE */
     case ASI_SNFL: /* Secondary no-fault LE */
         switch (size) {
@@ -1181,11 +1052,7 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         case 8:
             ret = bswap64(ret);
             break;
-        default:
-            break;
         }
-    default:
-        break;
     }
 
     /* Convert to signed number */
@@ -1200,12 +1067,10 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         case 4:
             ret = (int32_t) ret;
             break;
-        default:
-            break;
         }
     }
 #ifdef DEBUG_ASI
-    dump_asi("read ", last_addr, asi, size, ret);
+    dump_asi("read", addr, asi, size, ret);
 #endif
     return ret;
 }
@@ -1218,66 +1083,24 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     dump_asi("write", addr, asi, size, val);
 #endif
     if (asi < 0x80) {
-        helper_raise_exception(env, TT_PRIV_ACT);
-    }
-
-    helper_check_align(env, addr, size - 1);
-    addr = asi_address_mask(env, asi, addr);
-
-    /* Convert to little endian */
-    switch (asi) {
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch (size) {
-        case 2:
-            val = bswap16(val);
-            break;
-        case 4:
-            val = bswap32(val);
-            break;
-        case 8:
-            val = bswap64(val);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
+    do_check_align(env, addr, size - 1, GETPC());
 
     switch (asi) {
     case ASI_P:  /* Primary */
     case ASI_PL: /* Primary LE */
-        {
-            switch (size) {
-            case 1:
-                cpu_stb_data(env, addr, val);
-                break;
-            case 2:
-                cpu_stw_data(env, addr, val);
-                break;
-            case 4:
-                cpu_stl_data(env, addr, val);
-                break;
-            case 8:
-            default:
-                cpu_stq_data(env, addr, val);
-                break;
-            }
-        }
-        break;
     case ASI_S:  /* Secondary */
     case ASI_SL: /* Secondary LE */
-        /* XXX */
-        return;
+        /* These are always handled inline.  */
+        g_assert_not_reached();
 
     case ASI_PNF:  /* Primary no-fault, RO */
     case ASI_SNF:  /* Secondary no-fault, RO */
     case ASI_PNFL: /* Primary no-fault LE, RO */
     case ASI_SNFL: /* Secondary no-fault LE, RO */
     default:
-        helper_raise_exception(env, TT_DATA_ACCESS);
-        return;
+        cpu_raise_exception_ra(env, TT_DATA_ACCESS, GETPC());
     }
 }
 
@@ -1300,36 +1123,62 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
-    /* process nonfaulting loads first */
-    if ((asi & 0xf6) == 0x82) {
-        int mmu_idx;
-
-        /* secondary space access has lowest asi bit equal to 1 */
-        if (env->pstate & PS_PRIV) {
-            mmu_idx = (asi & 1) ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX;
-        } else {
-            mmu_idx = (asi & 1) ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX;
-        }
+    switch (asi) {
+    case ASI_PNF:
+    case ASI_PNFL:
+    case ASI_SNF:
+    case ASI_SNFL:
+        {
+            TCGMemOpIdx oi;
+            int idx = (env->pstate & PS_PRIV
+                       ? (asi & 1 ? MMU_KERNEL_SECONDARY_IDX : MMU_KERNEL_IDX)
+                       : (asi & 1 ? MMU_USER_SECONDARY_IDX : MMU_USER_IDX));
 
-        if (cpu_get_phys_page_nofault(env, addr, mmu_idx) == -1ULL) {
+            if (cpu_get_phys_page_nofault(env, addr, idx) == -1ULL) {
 #ifdef DEBUG_ASI
-            dump_asi("read ", last_addr, asi, size, ret);
+                dump_asi("read ", last_addr, asi, size, ret);
 #endif
-            /* env->exception_index is set in get_physical_address_data(). */
-            helper_raise_exception(env, cs->exception_index);
+                /* exception_index is set in get_physical_address_data. */
+                cpu_raise_exception_ra(env, cs->exception_index, GETPC());
+            }
+            oi = make_memop_idx(memop, idx);
+            switch (size) {
+            case 1:
+                ret = helper_ret_ldub_mmu(env, addr, oi, GETPC());
+                break;
+            case 2:
+                if (asi & 8) {
+                    ret = helper_le_lduw_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_lduw_mmu(env, addr, oi, GETPC());
+                }
+                break;
+            case 4:
+                if (asi & 8) {
+                    ret = helper_le_ldul_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_ldul_mmu(env, addr, oi, GETPC());
+                }
+                break;
+            case 8:
+                if (asi & 8) {
+                    ret = helper_le_ldq_mmu(env, addr, oi, GETPC());
+                } else {
+                    ret = helper_be_ldq_mmu(env, addr, oi, GETPC());
+                }
+                break;
+            default:
+                g_assert_not_reached();
+            }
         }
+        break;
 
-        /* convert nonfaulting load ASIs to normal load ASIs */
-        asi &= ~0x02;
-    }
-
-    switch (asi) {
     case ASI_AIUP:  /* As if user primary */
     case ASI_AIUS:  /* As if user secondary */
     case ASI_AIUPL: /* As if user primary LE */
@@ -1338,138 +1187,30 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
     case ASI_S:  /* Secondary */
     case ASI_PL: /* Primary LE */
     case ASI_SL: /* Secondary LE */
-        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (cpu_hypervisor_mode(env)) {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_hypv(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_hypv(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_hypv(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_hypv(env, addr);
-                    break;
-                }
-            } else {
-                /* secondary space access has lowest asi bit equal to 1 */
-                if (asi & 1) {
-                    switch (size) {
-                    case 1:
-                        ret = cpu_ldub_kernel_secondary(env, addr);
-                        break;
-                    case 2:
-                        ret = cpu_lduw_kernel_secondary(env, addr);
-                        break;
-                    case 4:
-                        ret = cpu_ldl_kernel_secondary(env, addr);
-                        break;
-                    default:
-                    case 8:
-                        ret = cpu_ldq_kernel_secondary(env, addr);
-                        break;
-                    }
-                } else {
-                    switch (size) {
-                    case 1:
-                        ret = cpu_ldub_kernel(env, addr);
-                        break;
-                    case 2:
-                        ret = cpu_lduw_kernel(env, addr);
-                        break;
-                    case 4:
-                        ret = cpu_ldl_kernel(env, addr);
-                        break;
-                    default:
-                    case 8:
-                        ret = cpu_ldq_kernel(env, addr);
-                        break;
-                    }
-                }
-            }
-        } else {
-            /* secondary space access has lowest asi bit equal to 1 */
-            if (asi & 1) {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_user_secondary(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_user_secondary(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_user_secondary(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_user_secondary(env, addr);
-                    break;
-                }
-            } else {
-                switch (size) {
-                case 1:
-                    ret = cpu_ldub_user(env, addr);
-                    break;
-                case 2:
-                    ret = cpu_lduw_user(env, addr);
-                    break;
-                case 4:
-                    ret = cpu_ldl_user(env, addr);
-                    break;
-                default:
-                case 8:
-                    ret = cpu_ldq_user(env, addr);
-                    break;
-                }
-            }
-        }
-        break;
-    case ASI_REAL:        /* Bypass */
+    case ASI_REAL:      /* Bypass */
     case ASI_REAL_IO:   /* Bypass, non-cacheable */
-    case ASI_REAL_L:      /* Bypass LE */
+    case ASI_REAL_L:    /* Bypass LE */
     case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-        {
-            switch (size) {
-            case 1:
-                ret = ldub_phys(cs->as, addr);
-                break;
-            case 2:
-                ret = lduw_phys(cs->as, addr);
-                break;
-            case 4:
-                ret = ldl_phys(cs->as, addr);
-                break;
-            default:
-            case 8:
-                ret = ldq_phys(cs->as, addr);
-                break;
-            }
-            break;
-        }
     case ASI_N:  /* Nucleus */
     case ASI_NL: /* Nucleus Little Endian (LE) */
-        {
-            switch (size) {
-            case 1:
-                ret = cpu_ldub_nucleus(env, addr);
-                break;
-            case 2:
-                ret = cpu_lduw_nucleus(env, addr);
-                break;
-            case 4:
-                ret = cpu_ldl_nucleus(env, addr);
-                break;
-            default:
-            case 8:
-                ret = cpu_ldq_nucleus(env, addr);
-                break;
-            }
-            break;
-        }
+    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
+    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
+    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
+    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
+    case ASI_TWINX_REAL:   /* Real address, twinx */
+    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
+    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
+    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+    case ASI_TWINX_N:  /* Nucleus, twinx */
+    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
+    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
+    case ASI_TWINX_P:  /* Primary, twinx */
+    case ASI_TWINX_PL: /* Primary, twinx, LE */
+    case ASI_TWINX_S:  /* Secondary, twinx */
+    case ASI_TWINX_SL: /* Secondary, twinx, LE */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
+
     case ASI_UPA_CONFIG: /* UPA config */
         /* XXX */
         break;
@@ -1597,51 +1338,6 @@ uint64_t helper_ld_asi(CPUSPARCState *env, target_ulong addr,
         cpu_unassigned_access(cs, addr, false, false, 1, size);
         ret = 0;
         break;
-
-    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
-    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        /* These are all 128-bit atomic; only ldda (now ldtxa) allowed */
-        helper_raise_exception(env, TT_ILL_INSN);
-        return 0;
-    }
-
-    /* Convert from little endian */
-    switch (asi) {
-    case ASI_NL: /* Nucleus Little Endian (LE) */
-    case ASI_AIUPL: /* As if user primary LE */
-    case ASI_AIUSL: /* As if user secondary LE */
-    case ASI_REAL_L:      /* Bypass LE */
-    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch(size) {
-        case 2:
-            ret = bswap16(ret);
-            break;
-        case 4:
-            ret = bswap32(ret);
-            break;
-        case 8:
-            ret = bswap64(ret);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
     }
 
     /* Convert to signed number */
@@ -1683,38 +1379,12 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
         || (cpu_has_hypervisor(env)
             && asi >= 0x30 && asi < 0x80
             && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
+        cpu_raise_exception_ra(env, TT_PRIV_ACT, GETPC());
     }
 
-    helper_check_align(env, addr, size - 1);
+    do_check_align(env, addr, size - 1, GETPC());
     addr = asi_address_mask(env, asi, addr);
 
-    /* Convert to little endian */
-    switch (asi) {
-    case ASI_NL: /* Nucleus Little Endian (LE) */
-    case ASI_AIUPL: /* As if user primary LE */
-    case ASI_AIUSL: /* As if user secondary LE */
-    case ASI_REAL_L: /* Bypass LE */
-    case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-    case ASI_PL: /* Primary LE */
-    case ASI_SL: /* Secondary LE */
-        switch (size) {
-        case 2:
-            val = bswap16(val);
-            break;
-        case 4:
-            val = bswap32(val);
-            break;
-        case 8:
-            val = bswap64(val);
-            break;
-        default:
-            break;
-        }
-    default:
-        break;
-    }
-
     switch (asi) {
     case ASI_AIUP:  /* As if user primary */
     case ASI_AIUS:  /* As if user secondary */
@@ -1724,160 +1394,36 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_S:  /* Secondary */
     case ASI_PL: /* Primary LE */
     case ASI_SL: /* Secondary LE */
-        if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (cpu_hypervisor_mode(env)) {
-                switch (size) {
-                case 1:
-                    cpu_stb_hypv(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_hypv(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_hypv(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_hypv(env, addr, val);
-                    break;
-                }
-            } else {
-                /* secondary space access has lowest asi bit equal to 1 */
-                if (asi & 1) {
-                    switch (size) {
-                    case 1:
-                        cpu_stb_kernel_secondary(env, addr, val);
-                        break;
-                    case 2:
-                        cpu_stw_kernel_secondary(env, addr, val);
-                        break;
-                    case 4:
-                        cpu_stl_kernel_secondary(env, addr, val);
-                        break;
-                    case 8:
-                    default:
-                        cpu_stq_kernel_secondary(env, addr, val);
-                        break;
-                    }
-                } else {
-                    switch (size) {
-                    case 1:
-                        cpu_stb_kernel(env, addr, val);
-                        break;
-                    case 2:
-                        cpu_stw_kernel(env, addr, val);
-                        break;
-                    case 4:
-                        cpu_stl_kernel(env, addr, val);
-                        break;
-                    case 8:
-                    default:
-                        cpu_stq_kernel(env, addr, val);
-                        break;
-                    }
-                }
-            }
-        } else {
-            /* secondary space access has lowest asi bit equal to 1 */
-            if (asi & 1) {
-                switch (size) {
-                case 1:
-                    cpu_stb_user_secondary(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_user_secondary(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_user_secondary(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_user_secondary(env, addr, val);
-                    break;
-                }
-            } else {
-                switch (size) {
-                case 1:
-                    cpu_stb_user(env, addr, val);
-                    break;
-                case 2:
-                    cpu_stw_user(env, addr, val);
-                    break;
-                case 4:
-                    cpu_stl_user(env, addr, val);
-                    break;
-                case 8:
-                default:
-                    cpu_stq_user(env, addr, val);
-                    break;
-                }
-            }
-        }
-        break;
     case ASI_REAL:      /* Bypass */
     case ASI_REAL_IO:   /* Bypass, non-cacheable */
     case ASI_REAL_L:    /* Bypass LE */
     case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
-        {
-            switch (size) {
-            case 1:
-                stb_phys(cs->as, addr, val);
-                break;
-            case 2:
-                stw_phys(cs->as, addr, val);
-                break;
-            case 4:
-                stl_phys(cs->as, addr, val);
-                break;
-            case 8:
-            default:
-                stq_phys(cs->as, addr, val);
-                break;
-            }
-        }
-        return;
     case ASI_N:  /* Nucleus */
     case ASI_NL: /* Nucleus Little Endian (LE) */
-        {
-            switch (size) {
-            case 1:
-                cpu_stb_nucleus(env, addr, val);
-                break;
-            case 2:
-                cpu_stw_nucleus(env, addr, val);
-                break;
-            case 4:
-                cpu_stl_nucleus(env, addr, val);
-                break;
-            default:
-            case 8:
-                cpu_stq_nucleus(env, addr, val);
-                break;
-            }
-            break;
-        }
+    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
+    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
+    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
+    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
+    case ASI_TWINX_REAL:   /* Real address, twinx */
+    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
+    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
+    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+    case ASI_TWINX_N:  /* Nucleus, twinx */
+    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
+    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
+    case ASI_TWINX_P:  /* Primary, twinx */
+    case ASI_TWINX_PL: /* Primary, twinx, LE */
+    case ASI_TWINX_S:  /* Secondary, twinx */
+    case ASI_TWINX_SL: /* Secondary, twinx, LE */
+        /* These are always handled inline.  */
+        g_assert_not_reached();
 
     case ASI_UPA_CONFIG: /* UPA config */
         /* XXX */
         return;
     case ASI_LSU_CONTROL: /* LSU */
-        {
-            uint64_t oldreg;
-
-            oldreg = env->lsu;
-            env->lsu = val & (DMMU_E | IMMU_E);
-            /* Mappings generated during D/I MMU disabled mode are
-               invalid in normal mode */
-            if (oldreg != env->lsu) {
-                DPRINTF_MMU("LSU change: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
-                            oldreg, env->lsu);
-#ifdef DEBUG_MMU
-                dump_mmu(stdout, fprintf, env);
-#endif
-                tlb_flush(CPU(cpu), 1);
-            }
-            return;
-        }
+        env->lsu = val & (DMMU_E | IMMU_E);
+        return;
     case ASI_IMMU: /* I-MMU regs */
         {
             int reg = (addr >> 3) & 0xf;
@@ -2016,24 +1562,6 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     case ASI_INTR_RECEIVE: /* Interrupt data receive */
         env->ivec_status = val & 0x20;
         return;
-    case ASI_NUCLEUS_QUAD_LDD:   /* Nucleus quad LDD 128 bit atomic */
-    case ASI_NUCLEUS_QUAD_LDD_L: /* Nucleus quad LDD 128 bit atomic LE */
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-    /* ??? From the UA2011 document; overlaps BLK_INIT_QUAD_LDD_* */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        /* Only stda allowed */
-        helper_raise_exception(env, TT_ILL_INSN);
-        return;
     case ASI_DCACHE_DATA: /* D-cache data */
     case ASI_DCACHE_TAG: /* D-cache tag access */
     case ASI_ESTATE_ERROR_EN: /* E-cache error enable */
@@ -2066,203 +1594,8 @@ void helper_st_asi(CPUSPARCState *env, target_ulong addr, target_ulong val,
     }
 }
 #endif /* CONFIG_USER_ONLY */
-
-/* 128-bit LDDA; result returned in QT0.  */
-void helper_ldda_asi(CPUSPARCState *env, target_ulong addr, int asi)
-{
-    uint64_t h, l;
-
-    if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || (cpu_has_hypervisor(env)
-            && asi >= 0x30 && asi < 0x80
-            && !(env->hpstate & HS_PRIV))) {
-        helper_raise_exception(env, TT_PRIV_ACT);
-    }
-
-    addr = asi_address_mask(env, asi, addr);
-
-    switch (asi) {
-#if !defined(CONFIG_USER_ONLY)
-    case ASI_TWINX_AIUP:   /* As if user primary, twinx */
-    case ASI_TWINX_AIUP_L: /* As if user primary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
-        h = cpu_ldq_user(env, addr);
-        l = cpu_ldq_user(env, addr + 8);
-        break;
-    case ASI_TWINX_AIUS:   /* As if user secondary, twinx */
-    case ASI_TWINX_AIUS_L: /* As if user secondary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
-        h = cpu_ldq_user_secondary(env, addr);
-        l = cpu_ldq_user_secondary(env, addr + 8);
-        break;
-    case ASI_TWINX_REAL:   /* Real address, twinx */
-    case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
-        helper_check_align(env, addr, 0xf);
-        {
-            CPUState *cs = CPU(sparc_env_get_cpu(env));
-            h = ldq_phys(cs->as, addr);
-            l = ldq_phys(cs->as, addr + 8);
-        }
-        break;
-    case ASI_NUCLEUS_QUAD_LDD:
-    case ASI_NUCLEUS_QUAD_LDD_L:
-    case ASI_TWINX_N:  /* Nucleus, twinx */
-    case ASI_TWINX_NL: /* Nucleus, twinx, LE */
-        helper_check_align(env, addr, 0xf);
-        h = cpu_ldq_nucleus(env, addr);
-        l = cpu_ldq_nucleus(env, addr + 8);
-        break;
-    case ASI_TWINX_S: /* Secondary, twinx */
-    case ASI_TWINX_SL: /* Secondary, twinx, LE */
-        if (!cpu_hypervisor_mode(env)) {
-            helper_check_align(env, addr, 0xf);
-            if (env->pstate & PS_PRIV) {
-                h = cpu_ldq_kernel_secondary(env, addr);
-                l = cpu_ldq_kernel_secondary(env, addr + 8);
-            } else {
-                h = cpu_ldq_user_secondary(env, addr);
-                l = cpu_ldq_user_secondary(env, addr + 8);
-            }
-            break;
-        }
-        /* fallthru */
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-        helper_check_align(env, addr, 0xf);
-        h = cpu_ldq_data(env, addr);
-        l = cpu_ldq_data(env, addr + 8);
-        break;
-#else
-    case ASI_TWINX_P:  /* Primary, twinx */
-    case ASI_TWINX_PL: /* Primary, twinx, LE */
-    case ASI_TWINX_S:  /* Primary, twinx */
-    case ASI_TWINX_SL: /* Primary, twinx, LE */
-        /* ??? Should be available, but we need to implement
-           an atomic 128-bit load.  */
-        helper_raise_exception(env, TT_PRIV_ACT);
-#endif
-    default:
-        /* Non-twinx asi, so this is the legacy ldda insn, which
-           performs two word sized operations.  */
-        /* ??? The UA2011 manual recommends emulating this with
-           a single 64-bit load.  However, LE asis *are* treated
-           as two 32-bit loads individually byte swapped.  */
-        helper_check_align(env, addr, 0x7);
-        QT0.high = (uint32_t)helper_ld_asi(env, addr, asi, MO_UL);
-        QT0.low = (uint32_t)helper_ld_asi(env, addr + 4, asi, MO_UL);
-        return;
-    }
-
-    if (asi & 8) {
-        h = bswap64(h);
-        l = bswap64(l);
-    }
-    QT0.high = h;
-    QT0.low = l;
-}
-
-target_ulong helper_casx_asi(CPUSPARCState *env, target_ulong addr,
-                             target_ulong val1, target_ulong val2,
-                             uint32_t asi)
-{
-    target_ulong ret;
-
-    ret = helper_ld_asi(env, addr, asi, MO_Q);
-    if (val2 == ret) {
-        helper_st_asi(env, addr, val1, asi, MO_Q);
-    }
-    return ret;
-}
 #endif /* TARGET_SPARC64 */
 
-#if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
-target_ulong helper_cas_asi(CPUSPARCState *env, target_ulong addr,
-                            target_ulong val1, target_ulong val2, uint32_t asi)
-{
-    target_ulong ret;
-
-    val2 &= 0xffffffffUL;
-    ret = helper_ld_asi(env, addr, asi, MO_UL);
-    ret &= 0xffffffffUL;
-    if (val2 == ret) {
-        helper_st_asi(env, addr, val1 & 0xffffffffUL, asi, MO_UL);
-    }
-    return ret;
-}
-#endif /* !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64) */
-
-void helper_ldqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
-{
-    /* XXX add 128 bit load */
-    CPU_QuadU u;
-
-    helper_check_align(env, addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        u.ll.upper = cpu_ldq_user(env, addr);
-        u.ll.lower = cpu_ldq_user(env, addr + 8);
-        QT0 = u.q;
-        break;
-    case MMU_KERNEL_IDX:
-        u.ll.upper = cpu_ldq_kernel(env, addr);
-        u.ll.lower = cpu_ldq_kernel(env, addr + 8);
-        QT0 = u.q;
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        u.ll.upper = cpu_ldq_hypv(env, addr);
-        u.ll.lower = cpu_ldq_hypv(env, addr + 8);
-        QT0 = u.q;
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_ldqf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    u.ll.upper = cpu_ldq_data(env, address_mask(env, addr));
-    u.ll.lower = cpu_ldq_data(env, address_mask(env, addr + 8));
-    QT0 = u.q;
-#endif
-}
-
-void helper_stqf(CPUSPARCState *env, target_ulong addr, int mem_idx)
-{
-    /* XXX add 128 bit store */
-    CPU_QuadU u;
-
-    helper_check_align(env, addr, 7);
-#if !defined(CONFIG_USER_ONLY)
-    switch (mem_idx) {
-    case MMU_USER_IDX:
-        u.q = QT0;
-        cpu_stq_user(env, addr, u.ll.upper);
-        cpu_stq_user(env, addr + 8, u.ll.lower);
-        break;
-    case MMU_KERNEL_IDX:
-        u.q = QT0;
-        cpu_stq_kernel(env, addr, u.ll.upper);
-        cpu_stq_kernel(env, addr + 8, u.ll.lower);
-        break;
-#ifdef TARGET_SPARC64
-    case MMU_HYPV_IDX:
-        u.q = QT0;
-        cpu_stq_hypv(env, addr, u.ll.upper);
-        cpu_stq_hypv(env, addr + 8, u.ll.lower);
-        break;
-#endif
-    default:
-        DPRINTF_MMU("helper_stqf: need to check MMU idx %d\n", mem_idx);
-        break;
-    }
-#else
-    u.q = QT0;
-    cpu_stq_data(env, address_mask(env, addr), u.ll.upper);
-    cpu_stq_data(env, address_mask(env, addr + 8), u.ll.lower);
-#endif
-}
-
 #if !defined(CONFIG_USER_ONLY)
 #ifndef TARGET_SPARC64
 void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
@@ -2314,11 +1647,8 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
     }
 
     if ((env->mmuregs[0] & MMU_E) && !(env->mmuregs[0] & MMU_NF)) {
-        if (is_exec) {
-            helper_raise_exception(env, TT_CODE_ACCESS);
-        } else {
-            helper_raise_exception(env, TT_DATA_ACCESS);
-        }
+        int tt = is_exec ? TT_CODE_ACCESS : TT_DATA_ACCESS;
+        cpu_raise_exception_ra(env, tt, GETPC());
     }
 
     /* flush neverland mappings created during no-fault mode,
@@ -2334,17 +1664,14 @@ void sparc_cpu_unassigned_access(CPUState *cs, hwaddr addr,
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
     CPUSPARCState *env = &cpu->env;
+    int tt = is_exec ? TT_CODE_ACCESS : TT_DATA_ACCESS;
 
 #ifdef DEBUG_UNASSIGNED
     printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
 
-    if (is_exec) {
-        helper_raise_exception(env, TT_CODE_ACCESS);
-    } else {
-        helper_raise_exception(env, TT_DATA_ACCESS);
-    }
+    cpu_raise_exception_ra(env, tt, GETPC());
 }
 #endif
 #endif
@@ -2362,10 +1689,7 @@ void QEMU_NORETURN sparc_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
     printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
-    if (retaddr) {
-        cpu_restore_state(CPU(cpu), retaddr);
-    }
-    helper_raise_exception(env, TT_UNALIGNED);
+    cpu_raise_exception_ra(env, TT_UNALIGNED, retaddr);
 }
 
 /* try to fill the TLB and return an exception if error. If retaddr is
@@ -2379,10 +1703,7 @@ void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
 
     ret = sparc_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx);
     if (ret) {
-        if (retaddr) {
-            cpu_restore_state(cs, retaddr);
-        }
-        cpu_loop_exit(cs);
+        cpu_loop_exit_restore(cs, retaddr);
     }
 }
 #endif
diff --git a/target-sparc/machine.c b/target-sparc/machine.c
index 59c92f7582..aea6397861 100644
--- a/target-sparc/machine.c
+++ b/target-sparc/machine.c
@@ -6,10 +6,7 @@
 #include "hw/boards.h"
 #include "qemu/timer.h"
 
-#include "cpu.h"
-#include "exec/exec-all.h"
 #include "migration/cpu.h"
-#include "exec/exec-all.h"
 
 #ifdef TARGET_SPARC64
 static const VMStateDescription vmstate_cpu_timer = {
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index 32b629fb0d..044e88c4c5 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -92,7 +92,7 @@ static int get_physical_address(CPUSPARCState *env, hwaddr *physical,
 
     is_user = mmu_idx == MMU_USER_IDX;
 
-    if ((env->mmuregs[0] & MMU_E) == 0) { /* MMU disabled */
+    if (mmu_idx == MMU_PHYS_IDX) {
         *page_size = TARGET_PAGE_SIZE;
         /* Boot mode: instruction fetches are taken from PROM */
         if (rw == 2 && (env->mmuregs[0] & env->def->mmu_bm)) {
@@ -494,23 +494,21 @@ static int get_physical_address_data(CPUSPARCState *env,
     unsigned int i;
     uint64_t context;
     uint64_t sfsr = 0;
-
-    int is_user = (mmu_idx == MMU_USER_IDX ||
-                   mmu_idx == MMU_USER_SECONDARY_IDX);
-
-    if ((env->lsu & DMMU_E) == 0) { /* DMMU disabled */
-        *physical = ultrasparc_truncate_physical(address);
-        *prot = PAGE_READ | PAGE_WRITE;
-        return 0;
-    }
+    bool is_user = false;
 
     switch (mmu_idx) {
+    case MMU_PHYS_IDX:
+        g_assert_not_reached();
     case MMU_USER_IDX:
+        is_user = true;
+        /* fallthru */
     case MMU_KERNEL_IDX:
         context = env->dmmu.mmu_primary_context & 0x1fff;
         sfsr |= SFSR_CT_PRIMARY;
         break;
     case MMU_USER_SECONDARY_IDX:
+        is_user = true;
+        /* fallthru */
     case MMU_KERNEL_SECONDARY_IDX:
         context = env->dmmu.mmu_secondary_context & 0x1fff;
         sfsr |= SFSR_CT_SECONDARY;
@@ -613,15 +611,22 @@ static int get_physical_address_code(CPUSPARCState *env,
     CPUState *cs = CPU(sparc_env_get_cpu(env));
     unsigned int i;
     uint64_t context;
+    bool is_user = false;
 
-    int is_user = (mmu_idx == MMU_USER_IDX ||
-                   mmu_idx == MMU_USER_SECONDARY_IDX);
-
-    if ((env->lsu & IMMU_E) == 0 || (env->pstate & PS_RED) != 0) {
-        /* IMMU disabled */
-        *physical = ultrasparc_truncate_physical(address);
-        *prot = PAGE_EXEC;
-        return 0;
+    switch (mmu_idx) {
+    case MMU_PHYS_IDX:
+    case MMU_USER_SECONDARY_IDX:
+    case MMU_KERNEL_SECONDARY_IDX:
+        g_assert_not_reached();
+    case MMU_USER_IDX:
+        is_user = true;
+        /* fallthru */
+    case MMU_KERNEL_IDX:
+        context = env->dmmu.mmu_primary_context & 0x1fff;
+        break;
+    default:
+        context = 0;
+        break;
     }
 
     if (env->tl == 0) {
@@ -700,6 +705,12 @@ static int get_physical_address(CPUSPARCState *env, hwaddr *physical,
         }
     }
 
+    if (mmu_idx == MMU_PHYS_IDX) {
+        *physical = ultrasparc_truncate_physical(address);
+        *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+        return 0;
+    }
+
     if (rw == 2) {
         return get_physical_address_code(env, physical, prot, address,
                                          mmu_idx);
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index e7691e4458..2205f89837 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -242,7 +242,29 @@ static void gen_op_store_QT0_fpr(unsigned int dst)
                    offsetof(CPU_QuadU, ll.lower));
 }
 
+static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst,
+                            TCGv_i64 v1, TCGv_i64 v2)
+{
+    dst = QFPREG(dst);
+
+    tcg_gen_mov_i64(cpu_fpr[dst / 2], v1);
+    tcg_gen_mov_i64(cpu_fpr[dst / 2 + 1], v2);
+    gen_update_fprs_dirty(dc, dst);
+}
+
 #ifdef TARGET_SPARC64
+static TCGv_i64 gen_load_fpr_Q0(DisasContext *dc, unsigned int src)
+{
+    src = QFPREG(src);
+    return cpu_fpr[src / 2];
+}
+
+static TCGv_i64 gen_load_fpr_Q1(DisasContext *dc, unsigned int src)
+{
+    src = QFPREG(src);
+    return cpu_fpr[src / 2 + 1];
+}
+
 static void gen_move_Q(DisasContext *dc, unsigned int rd, unsigned int rs)
 {
     rd = QFPREG(rd);
@@ -2001,6 +2023,21 @@ static inline void gen_ne_fop_QD(DisasContext *dc, int rd, int rs,
     gen_update_fprs_dirty(dc, QFPREG(rd));
 }
 
+static void gen_swap(DisasContext *dc, TCGv dst, TCGv src,
+                     TCGv addr, int mmu_idx, TCGMemOp memop)
+{
+    gen_address_mask(dc, addr);
+    tcg_gen_atomic_xchg_tl(dst, addr, src, mmu_idx, memop);
+}
+
+static void gen_ldstub(DisasContext *dc, TCGv dst, TCGv addr, int mmu_idx)
+{
+    TCGv m1 = tcg_const_tl(0xff);
+    gen_address_mask(dc, addr);
+    tcg_gen_atomic_xchg_tl(dst, addr, m1, mmu_idx, MO_UB);
+    tcg_temp_free(m1);
+}
+
 /* asi moves */
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
 typedef enum {
@@ -2010,6 +2047,8 @@ typedef enum {
     GET_ASI_DTWINX,
     GET_ASI_BLOCK,
     GET_ASI_SHORT,
+    GET_ASI_BCOPY,
+    GET_ASI_BFILL,
 } ASIType;
 
 typedef struct {
@@ -2046,6 +2085,19 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
             mem_idx = MMU_KERNEL_IDX;
             type = GET_ASI_DIRECT;
             break;
+        case ASI_M_BYPASS:    /* MMU passthrough */
+        case ASI_LEON_BYPASS: /* LEON MMU passthrough */
+            mem_idx = MMU_PHYS_IDX;
+            type = GET_ASI_DIRECT;
+            break;
+        case ASI_M_BCOPY: /* Block copy, sta access */
+            mem_idx = MMU_KERNEL_IDX;
+            type = GET_ASI_BCOPY;
+            break;
+        case ASI_M_BFILL: /* Block fill, stda access */
+            mem_idx = MMU_KERNEL_IDX;
+            type = GET_ASI_BFILL;
+            break;
         }
     } else {
         gen_exception(dc, TT_PRIV_INSN);
@@ -2066,10 +2118,22 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         type = GET_ASI_EXCP;
     } else {
         switch (asi) {
+        case ASI_REAL:      /* Bypass */
+        case ASI_REAL_IO:   /* Bypass, non-cacheable */
+        case ASI_REAL_L:    /* Bypass LE */
+        case ASI_REAL_IO_L: /* Bypass, non-cacheable LE */
+        case ASI_TWINX_REAL:   /* Real address, twinx */
+        case ASI_TWINX_REAL_L: /* Real address, twinx, LE */
+        case ASI_QUAD_LDD_PHYS:
+        case ASI_QUAD_LDD_PHYS_L:
+            mem_idx = MMU_PHYS_IDX;
+            break;
         case ASI_N:  /* Nucleus */
         case ASI_NL: /* Nucleus LE */
         case ASI_TWINX_N:
         case ASI_TWINX_NL:
+        case ASI_NUCLEUS_QUAD_LDD:
+        case ASI_NUCLEUS_QUAD_LDD_L:
             mem_idx = MMU_NUCLEUS_IDX;
             break;
         case ASI_AIUP:  /* As if user primary */
@@ -2123,6 +2187,10 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
             break;
         }
         switch (asi) {
+        case ASI_REAL:
+        case ASI_REAL_IO:
+        case ASI_REAL_L:
+        case ASI_REAL_IO_L:
         case ASI_N:
         case ASI_NL:
         case ASI_AIUP:
@@ -2135,6 +2203,8 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         case ASI_PL:
             type = GET_ASI_DIRECT;
             break;
+        case ASI_TWINX_REAL:
+        case ASI_TWINX_REAL_L:
         case ASI_TWINX_N:
         case ASI_TWINX_NL:
         case ASI_TWINX_AIUP:
@@ -2145,6 +2215,10 @@ static DisasASI get_asi(DisasContext *dc, int insn, TCGMemOp memop)
         case ASI_TWINX_PL:
         case ASI_TWINX_S:
         case ASI_TWINX_SL:
+        case ASI_QUAD_LDD_PHYS:
+        case ASI_QUAD_LDD_PHYS_L:
+        case ASI_NUCLEUS_QUAD_LDD:
+        case ASI_NUCLEUS_QUAD_LDD_L:
             type = GET_ASI_DTWINX;
             break;
         case ASI_BLK_COMMIT_P:
@@ -2241,6 +2315,38 @@ static void gen_st_asi(DisasContext *dc, TCGv src, TCGv addr,
         gen_address_mask(dc, addr);
         tcg_gen_qemu_st_tl(src, addr, da.mem_idx, da.memop);
         break;
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY)
+    case GET_ASI_BCOPY:
+        /* Copy 32 bytes from the address in SRC to ADDR.  */
+        /* ??? The original qemu code suggests 4-byte alignment, dropping
+           the low bits, but the only place I can see this used is in the
+           Linux kernel with 32 byte alignment, which would make more sense
+           as a cacheline-style operation.  */
+        {
+            TCGv saddr = tcg_temp_new();
+            TCGv daddr = tcg_temp_new();
+            TCGv four = tcg_const_tl(4);
+            TCGv_i32 tmp = tcg_temp_new_i32();
+            int i;
+
+            tcg_gen_andi_tl(saddr, src, -4);
+            tcg_gen_andi_tl(daddr, addr, -4);
+            for (i = 0; i < 32; i += 4) {
+                /* Since the loads and stores are paired, allow the
+                   copy to happen in the host endianness.  */
+                tcg_gen_qemu_ld_i32(tmp, saddr, da.mem_idx, MO_UL);
+                tcg_gen_qemu_st_i32(tmp, daddr, da.mem_idx, MO_UL);
+                tcg_gen_add_tl(saddr, saddr, four);
+                tcg_gen_add_tl(daddr, daddr, four);
+            }
+
+            tcg_temp_free(saddr);
+            tcg_temp_free(daddr);
+            tcg_temp_free(four);
+            tcg_temp_free_i32(tmp);
+        }
+        break;
+#endif
     default:
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
@@ -2275,48 +2381,37 @@ static void gen_swap_asi(DisasContext *dc, TCGv dst, TCGv src,
     switch (da.type) {
     case GET_ASI_EXCP:
         break;
+    case GET_ASI_DIRECT:
+        gen_swap(dc, dst, src, addr, da.mem_idx, da.memop);
+        break;
     default:
-        {
-            TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_UL);
-            TCGv_i64 s64, t64;
-
-            save_state(dc);
-            t64 = tcg_temp_new_i64();
-            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
-
-            s64 = tcg_temp_new_i64();
-            tcg_gen_extu_tl_i64(s64, src);
-            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
-            tcg_temp_free_i64(s64);
-            tcg_temp_free_i32(r_mop);
-            tcg_temp_free_i32(r_asi);
-
-            tcg_gen_trunc_i64_tl(dst, t64);
-            tcg_temp_free_i64(t64);
-        }
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
         break;
     }
 }
 
-static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv val2,
+static void gen_cas_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
                         int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEUL);
-    TCGv val1, dst;
-    TCGv_i32 r_asi;
+    TCGv oldv;
 
-    if (da.type == GET_ASI_EXCP) {
+    switch (da.type) {
+    case GET_ASI_EXCP:
         return;
+    case GET_ASI_DIRECT:
+        oldv = tcg_temp_new();
+        tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
+                                  da.mem_idx, da.memop);
+        gen_store_gpr(dc, rd, oldv);
+        tcg_temp_free(oldv);
+        break;
+    default:
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
+        break;
     }
-
-    save_state(dc);
-    val1 = gen_load_gpr(dc, rd);
-    dst = gen_dest_gpr(dc, rd);
-    r_asi = tcg_const_i32(da.asi);
-    gen_helper_cas_asi(dst, cpu_env, addr, val1, val2, r_asi);
-    tcg_temp_free_i32(r_asi);
-    gen_store_gpr(dc, rd, dst);
 }
 
 static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
@@ -2326,25 +2421,12 @@ static void gen_ldstub_asi(DisasContext *dc, TCGv dst, TCGv addr, int insn)
     switch (da.type) {
     case GET_ASI_EXCP:
         break;
+    case GET_ASI_DIRECT:
+        gen_ldstub(dc, dst, addr, da.mem_idx);
+        break;
     default:
-        {
-            TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_UB);
-            TCGv_i64 s64, t64;
-
-            save_state(dc);
-            t64 = tcg_temp_new_i64();
-            gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_mop);
-
-            s64 = tcg_const_i64(0xff);
-            gen_helper_st_asi(cpu_env, addr, s64, r_asi, r_mop);
-            tcg_temp_free_i64(s64);
-            tcg_temp_free_i32(r_mop);
-            tcg_temp_free_i32(r_asi);
-
-            tcg_gen_trunc_i64_tl(dst, t64);
-            tcg_temp_free_i64(t64);
-        }
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
         break;
     }
 }
@@ -2356,6 +2438,7 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
 {
     DisasASI da = get_asi(dc, insn, (size == 4 ? MO_TEUL : MO_TEQ));
     TCGv_i32 d32;
+    TCGv_i64 d64;
 
     switch (da.type) {
     case GET_ASI_EXCP:
@@ -2370,12 +2453,17 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
             gen_store_fpr_F(dc, rd, d32);
             break;
         case 8:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            d64 = tcg_temp_new_i64();
+            tcg_gen_qemu_ld_i64(d64, addr, da.mem_idx, da.memop | MO_ALIGN_4);
             tcg_gen_addi_tl(addr, addr, 8);
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
+            tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+            tcg_temp_free_i64(d64);
             break;
         default:
             g_assert_not_reached();
@@ -2385,20 +2473,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
     case GET_ASI_BLOCK:
         /* Valid for lddfa on aligned registers only.  */
         if (size == 8 && (rd & 7) == 0) {
+            TCGMemOp memop;
             TCGv eight;
             int i;
 
-            gen_check_align(addr, 0x3f);
             gen_address_mask(dc, addr);
 
+            /* The first operation checks required alignment.  */
+            memop = da.memop | MO_ALIGN_64;
             eight = tcg_const_tl(8);
             for (i = 0; ; ++i) {
                 tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr,
-                                    da.mem_idx, da.memop);
+                                    da.mem_idx, memop);
                 if (i == 7) {
                     break;
                 }
                 tcg_gen_add_tl(addr, addr, eight);
+                memop = da.memop;
             }
             tcg_temp_free(eight);
         } else {
@@ -2428,22 +2519,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
                but we can just use the integer asi helper for them.  */
             switch (size) {
             case 4:
-                {
-                    TCGv d64 = tcg_temp_new_i64();
-                    gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
-                    d32 = gen_dest_fpr_F(dc);
-                    tcg_gen_extrl_i64_i32(d32, d64);
-                    tcg_temp_free_i64(d64);
-                    gen_store_fpr_F(dc, rd, d32);
-                }
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
+                d32 = gen_dest_fpr_F(dc);
+                tcg_gen_extrl_i64_i32(d32, d64);
+                tcg_temp_free_i64(d64);
+                gen_store_fpr_F(dc, rd, d32);
                 break;
             case 8:
                 gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop);
                 break;
             case 16:
-                gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, r_mop);
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
                 tcg_gen_addi_tl(addr, addr, 8);
                 gen_helper_ld_asi(cpu_fpr[rd/2+1], cpu_env, addr, r_asi, r_mop);
+                tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+                tcg_temp_free_i64(d64);
                 break;
             default:
                 g_assert_not_reached();
@@ -2473,10 +2565,17 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
             tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop);
             break;
         case 8:
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            /* Only 4-byte alignment required.  However, it is legal for the
+               cpu to signal the alignment fault, and the OS trap handler is
+               required to fix it up.  Requiring 16-byte alignment here avoids
+               having to probe the second page before performing the first
+               write.  */
+            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_16);
             tcg_gen_addi_tl(addr, addr, 8);
             tcg_gen_qemu_st_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop);
             break;
@@ -2488,20 +2587,23 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
     case GET_ASI_BLOCK:
         /* Valid for stdfa on aligned registers only.  */
         if (size == 8 && (rd & 7) == 0) {
+            TCGMemOp memop;
             TCGv eight;
             int i;
 
-            gen_check_align(addr, 0x3f);
             gen_address_mask(dc, addr);
 
+            /* The first operation checks required alignment.  */
+            memop = da.memop | MO_ALIGN_64;
             eight = tcg_const_tl(8);
             for (i = 0; ; ++i) {
                 tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr,
-                                    da.mem_idx, da.memop);
+                                    da.mem_idx, memop);
                 if (i == 7) {
                     break;
                 }
                 tcg_gen_add_tl(addr, addr, eight);
+                memop = da.memop;
             }
             tcg_temp_free(eight);
         } else {
@@ -2539,9 +2641,8 @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
         return;
 
     case GET_ASI_DTWINX:
-        gen_check_align(addr, 15);
         gen_address_mask(dc, addr);
-        tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop);
+        tcg_gen_qemu_ld_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16);
         tcg_gen_addi_tl(addr, addr, 8);
         tcg_gen_qemu_ld_i64(lo, addr, da.mem_idx, da.memop);
         break;
@@ -2566,15 +2667,27 @@ static void gen_ldda_asi(DisasContext *dc, TCGv addr, int insn, int rd)
         break;
 
     default:
+        /* ??? In theory we've handled all of the ASIs that are valid
+           for ldda, and this should raise DAE_invalid_asi.  However,
+           real hardware allows others.  This can be seen with e.g.
+           FreeBSD 10.3 wrt ASI_IC_TAG.  */
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
+            TCGv_i32 r_mop = tcg_const_i32(da.memop);
+            TCGv_i64 tmp = tcg_temp_new_i64();
 
             save_state(dc);
-            gen_helper_ldda_asi(cpu_env, addr, r_asi);
+            gen_helper_ld_asi(tmp, cpu_env, addr, r_asi, r_mop);
             tcg_temp_free_i32(r_asi);
+            tcg_temp_free_i32(r_mop);
 
-            tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUSPARCState, qt0.high));
-            tcg_gen_ld_i64(lo, cpu_env, offsetof(CPUSPARCState, qt0.low));
+            /* See above.  */
+            if ((da.memop & MO_BSWAP) == MO_TE) {
+                tcg_gen_extr32_i64(lo, hi, tmp);
+            } else {
+                tcg_gen_extr32_i64(hi, lo, tmp);
+            }
+            tcg_temp_free_i64(tmp);
         }
         break;
     }
@@ -2594,9 +2707,8 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         break;
 
     case GET_ASI_DTWINX:
-        gen_check_align(addr, 15);
         gen_address_mask(dc, addr);
-        tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop);
+        tcg_gen_qemu_st_i64(hi, addr, da.mem_idx, da.memop | MO_ALIGN_16);
         tcg_gen_addi_tl(addr, addr, 8);
         tcg_gen_qemu_st_i64(lo, addr, da.mem_idx, da.memop);
         break;
@@ -2620,15 +2732,21 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         break;
 
     default:
+        /* ??? In theory we've handled all of the ASIs that are valid
+           for stda, and this should raise DAE_invalid_asi.  */
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
-            TCGv_i32 r_mop = tcg_const_i32(MO_Q);
-            TCGv_i64 t64;
+            TCGv_i32 r_mop = tcg_const_i32(da.memop);
+            TCGv_i64 t64 = tcg_temp_new_i64();
 
-            save_state(dc);
+            /* See above.  */
+            if ((da.memop & MO_BSWAP) == MO_TE) {
+                tcg_gen_concat32_i64(t64, lo, hi);
+            } else {
+                tcg_gen_concat32_i64(t64, hi, lo);
+            }
 
-            t64 = tcg_temp_new_i64();
-            tcg_gen_concat_tl_i64(t64, lo, hi);
+            save_state(dc);
             gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_mop);
             tcg_temp_free_i32(r_mop);
             tcg_temp_free_i32(r_asi);
@@ -2638,23 +2756,27 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
     }
 }
 
-static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv val2,
+static void gen_casx_asi(DisasContext *dc, TCGv addr, TCGv cmpv,
                          int insn, int rd)
 {
     DisasASI da = get_asi(dc, insn, MO_TEQ);
-    TCGv val1 = gen_load_gpr(dc, rd);
-    TCGv dst = gen_dest_gpr(dc, rd);
-    TCGv_i32 r_asi;
+    TCGv oldv;
 
-    if (da.type == GET_ASI_EXCP) {
+    switch (da.type) {
+    case GET_ASI_EXCP:
         return;
+    case GET_ASI_DIRECT:
+        oldv = tcg_temp_new();
+        tcg_gen_atomic_cmpxchg_tl(oldv, addr, cmpv, gen_load_gpr(dc, rd),
+                                  da.mem_idx, da.memop);
+        gen_store_gpr(dc, rd, oldv);
+        tcg_temp_free(oldv);
+        break;
+    default:
+        /* ??? Should be DAE_invalid_asi.  */
+        gen_exception(dc, TT_DATA_ACCESS);
+        break;
     }
-
-    save_state(dc);
-    r_asi = tcg_const_i32(da.asi);
-    gen_helper_casx_asi(dst, cpu_env, addr, val1, val2, r_asi);
-    tcg_temp_free_i32(r_asi);
-    gen_store_gpr(dc, rd, dst);
 }
 
 #elif !defined(CONFIG_USER_ONLY)
@@ -2712,6 +2834,27 @@ static void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
         gen_address_mask(dc, addr);
         tcg_gen_qemu_st_i64(t64, addr, da.mem_idx, da.memop);
         break;
+    case GET_ASI_BFILL:
+        /* Store 32 bytes of T64 to ADDR.  */
+        /* ??? The original qemu code suggests 8-byte alignment, dropping
+           the low bits, but the only place I can see this used is in the
+           Linux kernel with 32 byte alignment, which would make more sense
+           as a cacheline-style operation.  */
+        {
+            TCGv d_addr = tcg_temp_new();
+            TCGv eight = tcg_const_tl(8);
+            int i;
+
+            tcg_gen_andi_tl(d_addr, addr, -8);
+            for (i = 0; i < 32; i += 8) {
+                tcg_gen_qemu_st_i64(t64, d_addr, da.mem_idx, da.memop);
+                tcg_gen_add_tl(d_addr, d_addr, eight);
+            }
+
+            tcg_temp_free(d_addr);
+            tcg_temp_free(eight);
+        }
+        break;
     default:
         {
             TCGv_i32 r_asi = tcg_const_i32(da.asi);
@@ -3454,7 +3597,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
-                save_state(dc);
                 gen_helper_flushw(cpu_env);
 #else
                 if (!supervisor(dc))
@@ -5058,12 +5200,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     /* nop */
                     break;
                 case 0x3c:      /* save */
-                    save_state(dc);
                     gen_helper_save(cpu_env);
                     gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
                 case 0x3d:      /* restore */
-                    save_state(dc);
                     gen_helper_restore(cpu_env);
                     gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
@@ -5163,31 +5303,15 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_address_mask(dc, cpu_addr);
                     tcg_gen_qemu_ld16s(cpu_val, cpu_addr, dc->mem_idx);
                     break;
-                case 0xd:       /* ldstub -- XXX: should be atomically */
-                    {
-                        TCGv r_const;
-                        TCGv tmp = tcg_temp_new();
-
-                        gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld8u(tmp, cpu_addr, dc->mem_idx);
-                        r_const = tcg_const_tl(0xff);
-                        tcg_gen_qemu_st8(r_const, cpu_addr, dc->mem_idx);
-                        tcg_gen_mov_tl(cpu_val, tmp);
-                        tcg_temp_free(r_const);
-                        tcg_temp_free(tmp);
-                    }
+                case 0xd:       /* ldstub */
+                    gen_ldstub(dc, cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x0f:
                     /* swap, swap register with memory. Also atomically */
-                    {
-                        TCGv t0 = get_temp_tl(dc);
-                        CHECK_IU_FEATURE(dc, SWAP);
-                        cpu_src1 = gen_load_gpr(dc, rd);
-                        gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
-                        tcg_gen_qemu_st32(cpu_src1, cpu_addr, dc->mem_idx);
-                        tcg_gen_mov_tl(cpu_val, t0);
-                    }
+                    CHECK_IU_FEATURE(dc, SWAP);
+                    cpu_src1 = gen_load_gpr(dc, rd);
+                    gen_swap(dc, cpu_val, cpu_src1, cpu_addr,
+                             dc->mem_idx, MO_TEUL);
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
                 case 0x10:      /* lda, V9 lduwa, load word alternate */
@@ -5278,18 +5402,15 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
-                TCGv t0;
-
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
                     cpu_dst_32 = gen_dest_fpr_F(dc);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
@@ -5297,35 +5418,37 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                     gen_address_mask(dc, cpu_addr);
                     if (rd == 1) {
                         TCGv_i64 t64 = tcg_temp_new_i64();
-                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        tcg_gen_qemu_ld_i64(t64, cpu_addr,
+                                            dc->mem_idx, MO_TEQ);
                         gen_helper_ldxfsr(cpu_fsr, cpu_env, cpu_fsr, t64);
                         tcg_temp_free_i64(t64);
                         break;
                     }
 #endif
                     cpu_dst_32 = get_temp_i32(dc);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_helper_ldfsr(cpu_fsr, cpu_env, cpu_fsr, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        CHECK_FPU_FEATURE(dc, FLOAT128);
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_ldqf(cpu_env, cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                        gen_op_store_QT0_fpr(QFPREG(rd));
-                        gen_update_fprs_dirty(dc, QFPREG(rd));
-                    }
+                    CHECK_FPU_FEATURE(dc, FLOAT128);
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_64 = tcg_temp_new_i64();
+                    tcg_gen_qemu_ld_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
+                    cpu_src2_64 = tcg_temp_new_i64();
+                    tcg_gen_qemu_ld_i64(cpu_src2_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
+                    gen_store_fpr_Q(dc, rd, cpu_src1_64, cpu_src2_64);
+                    tcg_temp_free_i64(cpu_src1_64);
+                    tcg_temp_free_i64(cpu_src2_64);
                     break;
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
                     cpu_dst_64 = gen_dest_fpr_D(dc, rd);
-                    tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_dst_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 default:
@@ -5398,13 +5521,10 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 }
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
-                    {
-                        TCGv t = get_temp_tl(dc);
-                        gen_address_mask(dc, cpu_addr);
-                        cpu_src1_32 = gen_load_fpr_F(dc, rd);
-                        tcg_gen_ext_i32_tl(t, cpu_src1_32);
-                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                    tcg_gen_qemu_st_i32(cpu_src1_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
                     {
@@ -5421,16 +5541,20 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x26:
 #ifdef TARGET_SPARC64
                     /* V9 stqf, store quad fpreg */
-                    {
-                        TCGv_i32 r_const;
-
-                        CHECK_FPU_FEATURE(dc, FLOAT128);
-                        gen_op_load_fpr_QT0(QFPREG(rd));
-                        r_const = tcg_const_i32(dc->mem_idx);
-                        gen_address_mask(dc, cpu_addr);
-                        gen_helper_stqf(cpu_env, cpu_addr, r_const);
-                        tcg_temp_free_i32(r_const);
-                    }
+                    CHECK_FPU_FEATURE(dc, FLOAT128);
+                    gen_address_mask(dc, cpu_addr);
+                    /* ??? While stqf only requires 4-byte alignment, it is
+                       legal for the cpu to signal the unaligned exception.
+                       The OS trap handler is then required to fix it up.
+                       For qemu, this avoids having to probe the second page
+                       before performing the first write.  */
+                    cpu_src1_64 = gen_load_fpr_Q0(dc, rd);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr,
+                                        dc->mem_idx, MO_TEQ | MO_ALIGN_16);
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
+                    cpu_src2_64 = gen_load_fpr_Q1(dc, rd);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr,
+                                        dc->mem_idx, MO_TEQ);
                     break;
 #else /* !TARGET_SPARC64 */
                     /* stdfq, store floating point queue */
@@ -5448,7 +5572,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                 case 0x27: /* stdf, store double fpreg */
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = gen_load_fpr_D(dc, rd);
-                    tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     break;
                 default:
                     goto illegal_insn;
@@ -5468,7 +5593,6 @@ static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
                         if (gen_trap_ifnofpu(dc)) {
                             goto jmp_insn;
                         }
-                        gen_check_align(cpu_addr, 7);
                         gen_stf_asi(dc, cpu_addr, insn, 16, QFPREG(rd));
                     }
                     break;
@@ -5672,10 +5796,12 @@ void gen_intermediate_code(CPUSPARCState * env, TranslationBlock * tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("--------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, last_pc + 4 - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-sparc/win_helper.c b/target-sparc/win_helper.c
index a8a6c0cfc4..2d5b5469a9 100644
--- a/target-sparc/win_helper.c
+++ b/target-sparc/win_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "trace.h"
 
@@ -111,13 +112,13 @@ void helper_rett(CPUSPARCState *env)
     unsigned int cwp;
 
     if (env->psret == 1) {
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
     }
 
     env->psret = 1;
     cwp = cpu_cwp_inc(env, env->cwp + 1) ;
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_UNF);
+        cpu_raise_exception_ra(env, TT_WIN_UNF, GETPC());
     }
     cpu_set_cwp(env, cwp);
     env->psrs = env->psrps;
@@ -131,7 +132,7 @@ void helper_save(CPUSPARCState *env)
 
     cwp = cpu_cwp_dec(env, env->cwp - 1);
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_OVF);
+        cpu_raise_exception_ra(env, TT_WIN_OVF, GETPC());
     }
     cpu_set_cwp(env, cwp);
 }
@@ -142,7 +143,7 @@ void helper_restore(CPUSPARCState *env)
 
     cwp = cpu_cwp_inc(env, env->cwp + 1);
     if (env->wim & (1 << cwp)) {
-        helper_raise_exception(env, TT_WIN_UNF);
+        cpu_raise_exception_ra(env, TT_WIN_UNF, GETPC());
     }
     cpu_set_cwp(env, cwp);
 }
@@ -150,7 +151,7 @@ void helper_restore(CPUSPARCState *env)
 void helper_wrpsr(CPUSPARCState *env, target_ulong new_psr)
 {
     if ((new_psr & PSR_CWP) >= env->nwindows) {
-        helper_raise_exception(env, TT_ILL_INSN);
+        cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC());
     } else {
         cpu_put_psr(env, new_psr);
     }
@@ -170,14 +171,14 @@ void helper_save(CPUSPARCState *env)
 
     cwp = cpu_cwp_dec(env, env->cwp - 1);
     if (env->cansave == 0) {
-        helper_raise_exception(env, TT_SPILL | (env->otherwin != 0 ?
-                                                (TT_WOTHER |
-                                                 ((env->wstate & 0x38) >> 1)) :
-                                                ((env->wstate & 0x7) << 2)));
+        int tt = TT_SPILL | (env->otherwin != 0
+                             ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                             : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     } else {
         if (env->cleanwin - env->canrestore == 0) {
             /* XXX Clean windows without trap */
-            helper_raise_exception(env, TT_CLRWIN);
+            cpu_raise_exception_ra(env, TT_CLRWIN, GETPC());
         } else {
             env->cansave--;
             env->canrestore++;
@@ -192,10 +193,10 @@ void helper_restore(CPUSPARCState *env)
 
     cwp = cpu_cwp_inc(env, env->cwp + 1);
     if (env->canrestore == 0) {
-        helper_raise_exception(env, TT_FILL | (env->otherwin != 0 ?
-                                               (TT_WOTHER |
-                                                ((env->wstate & 0x38) >> 1)) :
-                                               ((env->wstate & 0x7) << 2)));
+        int tt = TT_FILL | (env->otherwin != 0
+                            ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                            : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     } else {
         env->cansave++;
         env->canrestore--;
@@ -206,10 +207,10 @@ void helper_restore(CPUSPARCState *env)
 void helper_flushw(CPUSPARCState *env)
 {
     if (env->cansave != env->nwindows - 2) {
-        helper_raise_exception(env, TT_SPILL | (env->otherwin != 0 ?
-                                                (TT_WOTHER |
-                                                 ((env->wstate & 0x38) >> 1)) :
-                                                ((env->wstate & 0x7) << 2)));
+        int tt = TT_SPILL | (env->otherwin != 0
+                             ? (TT_WOTHER | ((env->wstate & 0x38) >> 1))
+                             : ((env->wstate & 0x7) << 2));
+        cpu_raise_exception_ra(env, tt, GETPC());
     }
 }
 
diff --git a/target-tilegx/cpu.c b/target-tilegx/cpu.c
index 7017cb6e1a..454793f94a 100644
--- a/target-tilegx/cpu.c
+++ b/target-tilegx/cpu.c
@@ -92,6 +92,13 @@ static void tilegx_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     TileGXCPUClass *tcc = TILEGX_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cpu_reset(cs);
     qemu_init_vcpu(cs);
@@ -107,7 +114,6 @@ static void tilegx_cpu_initfn(Object *obj)
     static bool tcg_initialized;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled() && !tcg_initialized) {
         tcg_initialized = true;
@@ -162,13 +168,6 @@ static void tilegx_cpu_class_init(ObjectClass *oc, void *data)
     cc->set_pc = tilegx_cpu_set_pc;
     cc->handle_mmu_fault = tilegx_cpu_handle_mmu_fault;
     cc->gdb_num_core_regs = 0;
-
-    /*
-     * Reason: tilegx_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo tilegx_cpu_type_info = {
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index 11c9732389..9c734eeba3 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -2391,6 +2391,7 @@ void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
     TCGV_UNUSED_I64(dc->zero);
 
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
     }
     if (!max_insns) {
@@ -2429,7 +2430,10 @@ void gen_intermediate_code(CPUTLGState *env, struct TranslationBlock *tb)
     tb->size = dc->pc - pc_start;
     tb->icount = num_insns;
 
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n");
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log("\n");
+        qemu_log_unlock();
+    }
 }
 
 void restore_state_to_opc(CPUTLGState *env, TranslationBlock *tb,
diff --git a/target-tricore/cpu.c b/target-tricore/cpu.c
index 35d4ee4dea..785b76bd3a 100644
--- a/target-tricore/cpu.c
+++ b/target-tricore/cpu.c
@@ -69,6 +69,13 @@ static void tricore_cpu_realizefn(DeviceState *dev, Error **errp)
     TriCoreCPU *cpu = TRICORE_CPU(dev);
     TriCoreCPUClass *tcc = TRICORE_CPU_GET_CLASS(dev);
     CPUTriCoreState *env = &cpu->env;
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     /* Some features automatically imply others */
     if (tricore_feature(env, TRICORE_FEATURE_161)) {
@@ -95,7 +102,6 @@ static void tricore_cpu_initfn(Object *obj)
     CPUTriCoreState *env = &cpu->env;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled()) {
         tricore_tcg_init();
@@ -172,13 +178,6 @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
     cc->dump_state = tricore_cpu_dump_state;
     cc->set_pc = tricore_cpu_set_pc;
     cc->synchronize_from_tb = tricore_cpu_synchronize_from_tb;
-
-    /*
-     * Reason: tricore_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void cpu_register(const TriCoreCPUInfo *info)
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 9a50df9a88..36f734a662 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -8789,9 +8789,11 @@ void gen_intermediate_code(CPUTriCoreState *env, struct TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, ctx.pc - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 }
diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index e7a4984260..c169972b59 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -101,9 +101,17 @@ static const UniCore32CPUInfo uc32_cpus[] = {
 
 static void uc32_cpu_realizefn(DeviceState *dev, Error **errp)
 {
+    CPUState *cs = CPU(dev);
     UniCore32CPUClass *ucc = UNICORE32_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
 
-    qemu_init_vcpu(CPU(dev));
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    qemu_init_vcpu(cs);
 
     ucc->parent_realize(dev, errp);
 }
@@ -116,7 +124,6 @@ static void uc32_cpu_initfn(Object *obj)
     static bool inited;
 
     cs->env_ptr = env;
-    cpu_exec_init(cs, &error_abort);
 
 #ifdef CONFIG_USER_ONLY
     env->uncached_asr = ASR_MODE_USER;
@@ -160,13 +167,6 @@ static void uc32_cpu_class_init(ObjectClass *oc, void *data)
     cc->get_phys_page_debug = uc32_cpu_get_phys_page_debug;
 #endif
     dc->vmsd = &vmstate_uc32_cpu;
-
-    /*
-     * Reason: uc32_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 09354f92d2..514d460408 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -2024,10 +2024,12 @@ void gen_intermediate_code(CPUUniCore32State *env, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc->pc - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
     tb->size = dc->pc - pc_start;
diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index 5ad08a2799..e8e9f9175b 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -99,6 +99,13 @@ static void xtensa_cpu_realizefn(DeviceState *dev, Error **errp)
 {
     CPUState *cs = CPU(dev);
     XtensaCPUClass *xcc = XTENSA_CPU_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    cpu_exec_realizefn(cs, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
 
     cs->gdb_num_regs = xcc->config->gdb_regmap.num_regs;
 
@@ -117,7 +124,6 @@ static void xtensa_cpu_initfn(Object *obj)
 
     cs->env_ptr = env;
     env->config = xcc->config;
-    cpu_exec_init(cs, &error_abort);
 
     if (tcg_enabled() && !tcg_inited) {
         tcg_inited = true;
@@ -158,13 +164,6 @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
 #endif
     cc->debug_excp_handler = xtensa_breakpoint_handler;
     dc->vmsd = &vmstate_xtensa_cpu;
-
-    /*
-     * Reason: xtensa_cpu_initfn() calls cpu_exec_init(), which saves
-     * the object in cpus -> dangling pointer after final
-     * object_unref().
-     */
-    dc->cannot_destroy_with_object_finalize_yet = true;
 }
 
 static const TypeInfo xtensa_cpu_type_info = {
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 4c1e48748b..0858c296ea 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -36,7 +36,6 @@
 #include "tcg-op.h"
 #include "qemu/log.h"
 #include "sysemu/sysemu.h"
-#include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/semihost.h"
 
@@ -3156,10 +3155,12 @@ void gen_intermediate_code(CPUXtensaState *env, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(pc_start)) {
+        qemu_log_lock();
         qemu_log("----------------\n");
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(cs, pc_start, dc.pc - pc_start, 0);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
     tb->size = dc.pc - pc_start;
diff --git a/tcg-runtime.c b/tcg-runtime.c
index ea2ad649cb..9327b6f23b 100644
--- a/tcg-runtime.c
+++ b/tcg-runtime.c
@@ -23,17 +23,10 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
-
-/* This file is compiled once, and thus we can't include the standard
-   "exec/helper-proto.h", which has includes that are target specific.  */
-
-#include "exec/helper-head.h"
-
-#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \
-  dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2));
-
-#include "tcg-runtime.h"
-
+#include "cpu.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
 
 /* 32-bit helpers */
 
@@ -107,3 +100,62 @@ int64_t HELPER(mulsh_i64)(int64_t arg1, int64_t arg2)
     muls64(&l, &h, arg1, arg2);
     return h;
 }
+
+void HELPER(exit_atomic)(CPUArchState *env)
+{
+    cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
+}
+
+#ifndef CONFIG_SOFTMMU
+/* The softmmu versions of these helpers are in cputlb.c.  */
+
+/* Do not allow unaligned operations to proceed.  Return the host address.  */
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+                               int size, uintptr_t retaddr)
+{
+    /* Enforce qemu required alignment.  */
+    if (unlikely(addr & (size - 1))) {
+        cpu_loop_exit_atomic(ENV_GET_CPU(env), retaddr);
+    }
+    return g2h(addr);
+}
+
+/* Macro to call the above, with local variables from the use context.  */
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, GETPC())
+
+#define ATOMIC_NAME(X)   HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
+#define EXTRA_ARGS
+
+#define DATA_SIZE 1
+#include "atomic_template.h"
+
+#define DATA_SIZE 2
+#include "atomic_template.h"
+
+#define DATA_SIZE 4
+#include "atomic_template.h"
+
+#ifdef CONFIG_ATOMIC64
+#define DATA_SIZE 8
+#include "atomic_template.h"
+#endif
+
+/* The following is only callable from other helpers, and matches up
+   with the softmmu version.  */
+
+#ifdef CONFIG_ATOMIC128
+
+#undef EXTRA_ARGS
+#undef ATOMIC_NAME
+#undef ATOMIC_MMU_LOOKUP
+
+#define EXTRA_ARGS     , TCGMemOpIdx oi, uintptr_t retaddr
+#define ATOMIC_NAME(X) \
+    HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
+#define ATOMIC_MMU_LOOKUP  atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr)
+
+#define DATA_SIZE 16
+#include "atomic_template.h"
+#endif /* CONFIG_ATOMIC128 */
+
+#endif /* !CONFIG_SOFTMMU */
diff --git a/tcg/README b/tcg/README
index ce8bebab37..ae31388c59 100644
--- a/tcg/README
+++ b/tcg/README
@@ -8,6 +8,11 @@ in the QOP code generator written by Paul Brook.
 
 2) Definitions
 
+TCG receives RISC-like "TCG ops" and performs some optimizations on them,
+including liveness analysis and trivial constant expression
+evaluation.  TCG ops are then implemented in the host CPU back end,
+also known as the TCG "target".
+
 The TCG "target" is the architecture for which we generate the
 code. It is of course not the same as the "target" of QEMU which is
 the emulated architecture. As TCG started as a generic C backend used
@@ -402,6 +407,23 @@ double-word product T0.  The later is returned in two single-word outputs.
 
 Similar to mulu2, except the two inputs T1 and T2 are signed.
 
+********* Memory Barrier support
+
+* mb <$arg>
+
+Generate a target memory barrier instruction to ensure memory ordering as being
+enforced by a corresponding guest memory barrier instruction. The ordering
+enforced by the backend may be stricter than the ordering required by the guest.
+It cannot be weaker. This opcode takes a constant argument which is required to
+generate the appropriate barrier instruction. The backend should take care to
+emit the target barrier instruction only when necessary i.e., for SMP guests and
+when MTTCG is enabled.
+
+The guest translators should generate this opcode for all guest instructions
+which have ordering side effects.
+
+Please see docs/atomics.txt for more information on memory barriers.
+
 ********* 64-bit guest on 32-bit host support
 
 The following opcodes are internal to TCG.  Thus they are to be implemented by
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 08b2d031aa..1939d3528f 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -372,6 +372,11 @@ typedef enum {
     I3510_EOR       = 0x4a000000,
     I3510_EON       = 0x4a200000,
     I3510_ANDS      = 0x6a000000,
+
+    /* System instructions.  */
+    DMB_ISH         = 0xd50338bf,
+    DMB_LD          = 0x00000100,
+    DMB_ST          = 0x00000200,
 } AArch64Insn;
 
 static inline uint32_t tcg_in32(TCGContext *s)
@@ -981,6 +986,18 @@ static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
     tcg_out_mov(s, ext, orig_rl, rl);
 }
 
+static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    static const uint32_t sync[] = {
+        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
+        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
+        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
+        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
+        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
+    };
+    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
+}
+
 #ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     TCGMemOpIdx oi, uintptr_t ra)
@@ -1081,23 +1098,24 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
     int tlb_offset = is_read ?
         offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
-    int a_bits = get_alignment_bits(opc);
+    unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_mask = (1u << a_bits) - 1;
+    unsigned s_mask = (1u << s_bits) - 1;
     TCGReg base = TCG_AREG0, x3;
     uint64_t tlb_mask;
 
     /* For aligned accesses, we check the first byte and include the alignment
        bits within the address.  For unaligned access, we check that we don't
        cross pages using the address of the last byte of the access.  */
-    if (a_bits >= 0) {
-        /* A byte access or an alignment check required */
-        tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+    if (a_bits >= s_bits) {
         x3 = addr_reg;
     } else {
         tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
-                     TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1);
-        tlb_mask = TARGET_PAGE_MASK;
+                     TCG_REG_X3, addr_reg, s_mask - a_mask);
         x3 = TCG_REG_X3;
     }
+    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
 
     /* Extract the TLB index from the address into X0.
        X0<CPU_TLB_BITS:0> =
@@ -1648,6 +1666,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
@@ -1772,6 +1794,7 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
     { INDEX_op_muluh_i64, { "r", "r", "r" } },
     { INDEX_op_mulsh_i64, { "r", "r", "r" } },
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 172febafdd..ffa0d40660 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -313,6 +313,10 @@ typedef enum {
     INSN_LDRD_REG  = 0x000000d0,
     INSN_STRD_IMM  = 0x004000f0,
     INSN_STRD_REG  = 0x000000f0,
+
+    INSN_DMB_ISH   = 0x5bf07ff5,
+    INSN_DMB_MCR   = 0xba0f07ee,
+
 } ARMInsn;
 
 #define SHIFT_IMM_LSL(im)	(((im) << 7) | 0x00)
@@ -1066,6 +1070,15 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
     }
 }
 
+static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    if (use_armv7_instructions) {
+        tcg_out32(s, INSN_DMB_ISH);
+    } else if (use_armv6_instructions) {
+        tcg_out32(s, INSN_DMB_MCR);
+    }
+}
+
 #ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  *                                     int mmu_idx, uintptr_t ra)
@@ -1168,7 +1181,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
    containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
-                               TCGMemOp s_bits, int mem_index, bool is_load)
+                               TCGMemOp opc, int mem_index, bool is_load)
 {
     TCGReg base = TCG_AREG0;
     int cmp_off =
@@ -1176,6 +1189,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
          ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
 
     /* Should generate something like the following:
      *   shr    tmp, addrlo, #TARGET_PAGE_BITS                    (1)
@@ -1216,10 +1231,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
         }
     }
 
-    /* Check alignment.  */
-    if (s_bits) {
-        tcg_out_dat_imm(s, COND_AL, ARITH_TST,
-                        0, addrlo, (1 << s_bits) - 1);
+    /* Check alignment.  We don't support inline unaligned acceses,
+       but we can easily support overalignment checks.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+    if (a_bits) {
+        tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
     }
 
     /* Load the tlb addend.  */
@@ -1499,7 +1517,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
+    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
 
     /* This a conditional BL only to load a pointer within this opcode into LR
        for the slow path.  We will not be using the value for a tail call.  */
@@ -1630,7 +1648,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 
 #ifdef CONFIG_SOFTMMU
     mem_index = get_mmuidx(oi);
-    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
+    addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
 
     tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
 
@@ -1923,6 +1941,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, args[0]);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -1997,6 +2019,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
     { INDEX_op_div_i32, { "r", "r", "r" } },
     { INDEX_op_divu_i32, { "r", "r", "r" } },
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 6f8cdca756..eeb1777bbb 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -686,6 +686,18 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
     }
 }
 
+static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    /* Given the strength of x86 memory ordering, we only need care for
+       store-load ordering.  Experimentally, "lock orl $0,0(%esp)" is
+       faster than "mfence", so don't bother with the sse insn.  */
+    if (a0 & TCG_MO_ST_LD) {
+        tcg_out8(s, 0xf0);
+        tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
+        tcg_out8(s, 0);
+    }
+}
+
 static inline void tcg_out_push(TCGContext *s, int reg)
 {
     tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
@@ -1202,7 +1214,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     TCGType ttype = TCG_TYPE_I32;
     TCGType tlbtype = TCG_TYPE_I32;
     int trexw = 0, hrexw = 0, tlbrexw = 0;
-    int a_bits = get_alignment_bits(opc);
+    unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_mask = (1 << a_bits) - 1;
+    unsigned s_mask = (1 << s_bits) - 1;
     target_ulong tlb_mask;
 
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1220,17 +1235,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
     }
 
     tcg_out_mov(s, tlbtype, r0, addrlo);
-    if (a_bits >= 0) {
-        /* A byte access or an alignment check required */
+    /* If the required alignment is at least as large as the access, simply
+       copy the address and mask.  For lesser alignments, check that we don't
+       cross pages for the complete access.  */
+    if (a_bits >= s_bits) {
         tcg_out_mov(s, ttype, r1, addrlo);
-        tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
     } else {
-        /* For unaligned access check that we don't cross pages using
-           the page address of the last byte.  */
-        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo,
-                             (1 << (opc & MO_SIZE)) - 1);
-        tlb_mask = TARGET_PAGE_MASK;
+        tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
     }
+    tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
 
     tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
@@ -2130,6 +2143,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, args[0]);
+        break;
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
@@ -2195,6 +2211,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
     { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
     { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
 
+    { INDEX_op_mb, { } },
+
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
     { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
diff --git a/tcg/ia64/tcg-target.inc.c b/tcg/ia64/tcg-target.inc.c
index c91f39281b..b04d716c3d 100644
--- a/tcg/ia64/tcg-target.inc.c
+++ b/tcg/ia64/tcg-target.inc.c
@@ -247,6 +247,7 @@ enum {
     OPC_LD4_M3                = 0x0a080000000ull,
     OPC_LD8_M1                = 0x080c0000000ull,
     OPC_LD8_M3                = 0x0a0c0000000ull,
+    OPC_MF_M24                = 0x00110000000ull,
     OPC_MUX1_I3               = 0x0eca0000000ull,
     OPC_NOP_B9                = 0x04008000000ull,
     OPC_NOP_F16               = 0x00008000000ull,
@@ -1496,10 +1497,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
    R1, R3 are clobbered, leaving R56 free for...
    BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
-                                    TCGMemOp s_bits, int off_rw, int off_add,
+                                    TCGMemOp opc, int off_rw, int off_add,
                                     uint64_t bswap1, uint64_t bswap2)
 {
-     /*
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+
+    /* We don't support unaligned accesses, but overalignment is easy.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
+
+    /*
         .mii
         mov	r2 = off_rw
         extr.u	r3 = addr_reg, ...		# extract tlb page
@@ -1521,7 +1530,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
         cmp.eq	p6, p7 = r3, r58
         nop
         ;;
-      */
+    */
     tcg_out_bundle(s, miI,
                    tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
                    tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
@@ -1536,8 +1545,8 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
                                63 - CPU_TLB_ENTRY_BITS),
                    tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
-                               TCG_REG_R57, 63 - s_bits,
-                               TARGET_PAGE_BITS - s_bits - 1));
+                               TCG_REG_R57, 63 - a_bits,
+                               TARGET_PAGE_BITS - a_bits - 1));
     tcg_out_bundle(s, MmI,
                    tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
                                TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
@@ -1661,7 +1670,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
     s_bits = opc & MO_SIZE;
 
     /* Read the TLB entry */
-    tcg_out_qemu_tlb(s, addr_reg, s_bits,
+    tcg_out_qemu_tlb(s, addr_reg, opc,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend),
                      INSN_NOP_I, INSN_NOP_I);
@@ -1739,7 +1748,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
         pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
     }
 
-    tcg_out_qemu_tlb(s, addr_reg, s_bits,
+    tcg_out_qemu_tlb(s, addr_reg, opc,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
                      offsetof(CPUArchState, tlb_table[mem_index][0].addend),
                      pre1, pre2);
@@ -2223,6 +2232,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_qemu_st(s, args);
         break;
 
+    case INDEX_op_mb:
+        tcg_out_bundle(s, mmI, OPC_MF_M24, INSN_NOP_M, INSN_NOP_I);
+        break;
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
@@ -2336,6 +2348,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
     { INDEX_op_qemu_st_i32, { "SZ", "r" } },
     { INDEX_op_qemu_st_i64, { "SZ", "r" } },
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/mips/tcg-target.inc.c b/tcg/mips/tcg-target.inc.c
index 2f9be48139..abce6026f8 100644
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -292,6 +292,7 @@ typedef enum {
     OPC_JALR     = OPC_SPECIAL | 0x09,
     OPC_MOVZ     = OPC_SPECIAL | 0x0A,
     OPC_MOVN     = OPC_SPECIAL | 0x0B,
+    OPC_SYNC     = OPC_SPECIAL | 0x0F,
     OPC_MFHI     = OPC_SPECIAL | 0x10,
     OPC_MFLO     = OPC_SPECIAL | 0x12,
     OPC_MULT     = OPC_SPECIAL | 0x18,
@@ -339,6 +340,14 @@ typedef enum {
      * backwards-compatible at the assembly level.
      */
     OPC_MUL      = use_mips32r6_instructions ? OPC_MUL_R6 : OPC_MUL_R5,
+
+    /* MIPS r6 introduced names for weaker variants of SYNC.  These are
+       backward compatible to previous architecture revisions.  */
+    OPC_SYNC_WMB     = OPC_SYNC | 0x04 << 5,
+    OPC_SYNC_MB      = OPC_SYNC | 0x10 << 5,
+    OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
+    OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
+    OPC_SYNC_RMB     = OPC_SYNC | 0x13 << 5,
 } MIPSInsn;
 
 /*
@@ -1040,7 +1049,9 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
                              TCGReg addrh, TCGMemOpIdx oi,
                              tcg_insn_unit *label_ptr[2], bool is_load)
 {
-    TCGMemOp s_bits = get_memop(oi) & MO_SIZE;
+    TCGMemOp opc = get_memop(oi);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
     int mem_index = get_mmuidx(oi);
     int cmp_off
         = (is_load
@@ -1071,10 +1082,15 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
     tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
                     cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));
 
+    /* We don't currently support unaligned accesses.
+       We could do so with mips32r6.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
     /* Mask the page bits, keeping the alignment bits to compare against.
        In between on 32-bit targets, load the tlb addend for the fast path.  */
     tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
-                 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+                 TARGET_PAGE_MASK | ((1 << a_bits) - 1));
     if (TARGET_LONG_BITS == 32) {
         tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
     }
@@ -1377,6 +1393,22 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
 #endif
 }
 
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    static const MIPSInsn sync[] = {
+        /* Note that SYNC_MB is a slightly weaker than SYNC 0,
+           as the former is an ordering barrier and the latter
+           is a completion barrier.  */
+        [0 ... TCG_MO_ALL]            = OPC_SYNC_MB,
+        [TCG_MO_LD_LD]                = OPC_SYNC_RMB,
+        [TCG_MO_ST_ST]                = OPC_SYNC_WMB,
+        [TCG_MO_LD_ST]                = OPC_SYNC_RELEASE,
+        [TCG_MO_LD_ST | TCG_MO_ST_ST] = OPC_SYNC_RELEASE,
+        [TCG_MO_LD_ST | TCG_MO_LD_LD] = OPC_SYNC_ACQUIRE,
+    };
+    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
+}
+
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                               const TCGArg *args, const int *const_args)
 {
@@ -1646,6 +1678,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                         const_args[4], const_args[5], true);
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
@@ -1726,6 +1761,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
     { INDEX_op_qemu_ld_i64, { "L", "L", "lZ", "lZ" } },
     { INDEX_op_qemu_st_i64, { "SZ", "SZ", "SZ", "SZ" } },
 #endif
+
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/optimize.c b/tcg/optimize.c
index cffe89b525..0f1349086b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -468,9 +468,8 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
         default:
             return 2;
         }
-    } else {
-        return 2;
     }
+    return 2;
 }
 
 /* Return 2 if the condition can't be simplified, and the result
@@ -542,6 +541,7 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
 void tcg_optimize(TCGContext *s)
 {
     int oi, oi_next, nb_temps, nb_globals;
+    TCGArg *prev_mb_args = NULL;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -1295,5 +1295,43 @@ void tcg_optimize(TCGContext *s)
             }
             break;
         }
+
+        /* Eliminate duplicate and redundant fence instructions.  */
+        if (prev_mb_args) {
+            switch (opc) {
+            case INDEX_op_mb:
+                /* Merge two barriers of the same type into one,
+                 * or a weaker barrier into a stronger one,
+                 * or two weaker barriers into a stronger one.
+                 *   mb X; mb Y => mb X|Y
+                 *   mb; strl => mb; st
+                 *   ldaq; mb => ld; mb
+                 *   ldaq; strl => ld; mb; st
+                 * Other combinations are also merged into a strong
+                 * barrier.  This is stricter than specified but for
+                 * the purposes of TCG is better than not optimizing.
+                 */
+                prev_mb_args[0] |= args[0];
+                tcg_op_remove(s, op);
+                break;
+
+            default:
+                /* Opcodes that end the block stop the optimization.  */
+                if ((def->flags & TCG_OPF_BB_END) == 0) {
+                    break;
+                }
+                /* fallthru */
+            case INDEX_op_qemu_ld_i32:
+            case INDEX_op_qemu_ld_i64:
+            case INDEX_op_qemu_st_i32:
+            case INDEX_op_qemu_st_i64:
+            case INDEX_op_call:
+                /* Opcodes that touch guest memory stop the optimization.  */
+                prev_mb_args = NULL;
+                break;
+            }
+        } else if (opc == INDEX_op_mb) {
+            prev_mb_args = args;
+        }
     }
 }
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index eaf1bd9bfd..a3262cfb0c 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -469,6 +469,10 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define STHX   XO31(407)
 #define STWX   XO31(151)
 
+#define EIEIO  XO31(854)
+#define HWSYNC XO31(598)
+#define LWSYNC (HWSYNC | (1u << 21))
+
 #define SPR(a, b) ((((a)<<5)|(b))<<11)
 #define LR     SPR(8, 0)
 #define CTR    SPR(9, 0)
@@ -1243,6 +1247,18 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
     tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
 }
 
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    uint32_t insn = HWSYNC;
+    a0 &= TCG_MO_ALL;
+    if (a0 == TCG_MO_LD_LD) {
+        insn = LWSYNC;
+    } else if (a0 == TCG_MO_ST_ST) {
+        insn = EIEIO;
+    }
+    tcg_out32(s, insn);
+}
+
 #ifdef __powerpc64__
 void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
@@ -1404,8 +1420,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
            : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
     TCGReg base = TCG_AREG0;
-    TCGMemOp s_bits = opc & MO_SIZE;
-    int a_bits = get_alignment_bits(opc);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
 
     /* Extract the page index, shifted into place for tlb index.  */
     if (TCG_TARGET_REG_BITS == 64) {
@@ -1458,39 +1474,43 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
 
     /* Clear the non-page, non-alignment bits from the address */
-    if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
-        /* We don't support unaligned accesses on 32-bits, preserve
-         * the bottom bits and thus trigger a comparison failure on
-         * unaligned accesses
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* We don't support unaligned accesses on 32-bits.
+         * Preserve the bottom bits and thus trigger a comparison
+         * failure on unaligned accesses.
          */
-        if (a_bits < 0) {
+        if (a_bits < s_bits) {
             a_bits = s_bits;
         }
         tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
                     (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
-    } else if (a_bits) {
-        /* More than byte access, we need to handle alignment */
-        if (a_bits > 0) {
-            /* Alignment required by the front-end, same as 32-bits */
-            tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
+    } else {
+        TCGReg t = addrlo;
+
+        /* If the access is unaligned, we need to make sure we fail if we
+         * cross a page boundary.  The trick is to add the access size-1
+         * to the address before masking the low bits.  That will make the
+         * address overflow to the next page if we cross a page boundary,
+         * which will then force a mismatch of the TLB compare.
+         */
+        if (a_bits < s_bits) {
+            unsigned a_mask = (1 << a_bits) - 1;
+            unsigned s_mask = (1 << s_bits) - 1;
+            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
+            t = TCG_REG_R0;
+        }
+
+        /* Mask the address for the requested alignment.  */
+        if (TARGET_LONG_BITS == 32) {
+            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
+                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+        } else if (a_bits == 0) {
+            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
+        } else {
+            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
                         64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
             tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
-       } else {
-           /* We support unaligned accesses, we need to make sure we fail
-            * if we cross a page boundary. The trick is to add the
-            * access_size-1 to the address before masking the low bits.
-            * That will make the address overflow to the next page if we
-            * cross a page boundary which will then force a mismatch of
-            * the TLB compare since the next page cannot possibly be in
-            * the same TLB index.
-            */
-            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
-            tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
-                        0, 63 - TARGET_PAGE_BITS);
         }
-    } else {
-        /* Byte access, just chop off the bits below the page index */
-        tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
     }
 
     if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -2449,6 +2469,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
         break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, args[0]);
+        break;
+
     case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
@@ -2596,6 +2620,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
     { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
 #endif
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/s390/tcg-target.inc.c b/tcg/s390/tcg-target.inc.c
index 5a7495b063..253d4a0a0b 100644
--- a/tcg/s390/tcg-target.inc.c
+++ b/tcg/s390/tcg-target.inc.c
@@ -343,6 +343,7 @@ static tcg_insn_unit *tb_ret_addr;
 #define FACILITY_EXT_IMM	(1ULL << (63 - 21))
 #define FACILITY_GEN_INST_EXT	(1ULL << (63 - 34))
 #define FACILITY_LOAD_ON_COND   (1ULL << (63 - 45))
+#define FACILITY_FAST_BCR_SER   FACILITY_LOAD_ON_COND
 
 static uint64_t facilities;
 
@@ -1505,21 +1506,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
 static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
                                int mem_index, bool is_ld)
 {
-    int a_bits = get_alignment_bits(opc);
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
+    unsigned s_mask = (1 << s_bits) - 1;
+    unsigned a_mask = (1 << a_bits) - 1;
     int ofs, a_off;
     uint64_t tlb_mask;
 
     /* For aligned accesses, we check the first byte and include the alignment
        bits within the address.  For unaligned access, we check that we don't
        cross pages using the address of the last byte of the access.  */
-    if (a_bits >= 0) {
-        /* A byte access or an alignment check required */
-        a_off = 0;
-        tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
-    } else {
-        a_off = (1 << (opc & MO_SIZE)) - 1;
-        tlb_mask = TARGET_PAGE_MASK;
-    }
+    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
+    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
 
     if (facilities & FACILITY_GEN_INST_EXT) {
         tcg_out_risbg(s, TCG_REG_R2, addr_reg,
@@ -2172,6 +2170,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_deposit(s, args[0], args[2], args[3], args[4]);
         break;
 
+    case INDEX_op_mb:
+        /* The host memory model is quite strong, we simply need to
+           serialize the instruction stream.  */
+        if (args[0] & TCG_MO_ST_LD) {
+            tcg_out_insn(s, RR, BCR,
+                         facilities & FACILITY_FAST_BCR_SER ? 14 : 15, 0);
+        }
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
@@ -2293,6 +2300,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_movcond_i64, { "r", "r", "rC", "r", "0" } },
     { INDEX_op_deposit_i64, { "r", "0", "r" } },
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/sparc/tcg-target.inc.c b/tcg/sparc/tcg-target.inc.c
index 8e98172ca0..700c43487f 100644
--- a/tcg/sparc/tcg-target.inc.c
+++ b/tcg/sparc/tcg-target.inc.c
@@ -249,6 +249,8 @@ static const int tcg_target_call_oarg_regs[] = {
 #define STWA       (INSN_OP(3) | INSN_OP3(0x14))
 #define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
 
+#define MEMBAR     (INSN_OP(2) | INSN_OP3(0x28) | INSN_RS1(15) | (1 << 13))
+
 #ifndef ASI_PRIMARY_LITTLE
 #define ASI_PRIMARY_LITTLE 0x88
 #endif
@@ -835,6 +837,12 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
     tcg_out_nop(s);
 }
 
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+    /* Note that the TCG memory order constants mirror the Sparc MEMBAR.  */
+    tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
+}
+
 #ifdef CONFIG_SOFTMMU
 static tcg_insn_unit *qemu_ld_trampoline[16];
 static tcg_insn_unit *qemu_st_trampoline[16];
@@ -996,19 +1004,25 @@ static void tcg_target_qemu_prologue(TCGContext *s)
    is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
 
 static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
-                               TCGMemOp s_bits, int which)
+                               TCGMemOp opc, int which)
 {
     const TCGReg r0 = TCG_REG_O0;
     const TCGReg r1 = TCG_REG_O1;
     const TCGReg r2 = TCG_REG_O2;
+    unsigned s_bits = opc & MO_SIZE;
+    unsigned a_bits = get_alignment_bits(opc);
     int tlb_ofs;
 
     /* Shift the page number down.  */
     tcg_out_arithi(s, r1, addr, TARGET_PAGE_BITS, SHIFT_SRL);
 
-    /* Mask out the page offset, except for the required alignment.  */
+    /* Mask out the page offset, except for the required alignment.
+       We don't support unaligned accesses.  */
+    if (a_bits < s_bits) {
+        a_bits = s_bits;
+    }
     tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_T1,
-                 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+                 TARGET_PAGE_MASK | ((1 << a_bits) - 1));
 
     /* Mask the tlb index.  */
     tcg_out_arithi(s, r1, r1, CPU_TLB_SIZE - 1, ARITH_AND);
@@ -1087,7 +1101,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_insn_unit *func;
     tcg_insn_unit *label_ptr;
 
-    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
+    addrz = tcg_out_tlb_load(s, addr, memi, memop,
                              offsetof(CPUTLBEntry, addr_read));
 
     /* The fast path is exactly one insn.  Thus we can perform the
@@ -1169,7 +1183,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
     tcg_insn_unit *func;
     tcg_insn_unit *label_ptr;
 
-    addrz = tcg_out_tlb_load(s, addr, memi, memop & MO_SIZE,
+    addrz = tcg_out_tlb_load(s, addr, memi, memop,
                              offsetof(CPUTLBEntry, addr_write));
 
     /* The fast path is exactly one insn.  Thus we can perform the entire
@@ -1460,6 +1474,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 	tcg_out_arithc(s, a0, TCG_REG_G0, a1, const_args[1], c);
 	break;
 
+    case INDEX_op_mb:
+        tcg_out_mb(s, a0);
+        break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
@@ -1561,6 +1579,7 @@ static const TCGTargetOpDef sparc_op_defs[] = {
     { INDEX_op_qemu_st_i32, { "sZ", "A" } },
     { INDEX_op_qemu_st_i64, { "SZ", "A" } },
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 0243c99094..6e2fb3522f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -148,6 +148,13 @@ void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
     tcg_emit_op(ctx, opc, pi);
 }
 
+void tcg_gen_mb(TCGBar mb_type)
+{
+    if (parallel_cpus) {
+        tcg_gen_op1(&tcg_ctx, INDEX_op_mb, mb_type);
+    }
+}
+
 /* 32 bit ops */
 
 void tcg_gen_addi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
@@ -671,6 +678,33 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
     }
 }
 
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+        tcg_gen_mulu2_i32(t0, t1, arg1, arg2);
+        /* Adjust for negative input for the signed arg1.  */
+        tcg_gen_sari_i32(t2, arg1, 31);
+        tcg_gen_and_i32(t2, t2, arg2);
+        tcg_gen_sub_i32(rh, t1, t2);
+        tcg_gen_mov_i32(rl, t0);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i32(t2);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_ext_i32_i64(t0, arg1);
+        tcg_gen_extu_i32_i64(t1, arg2);
+        tcg_gen_mul_i64(t0, t0, t1);
+        tcg_gen_extr_i64_i32(rl, rh, t0);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+}
+
 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg)
 {
     if (TCG_TARGET_HAS_ext8s_i32) {
@@ -783,7 +817,7 @@ void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
 {
     tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset);
-    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31);
+    tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31);
 }
 
 void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset)
@@ -1741,6 +1775,22 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
     }
 }
 
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2)
+{
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    tcg_gen_mulu2_i64(t0, t1, arg1, arg2);
+    /* Adjust for negative input for the signed arg1.  */
+    tcg_gen_sari_i64(t2, arg1, 63);
+    tcg_gen_and_i64(t2, t2, arg2);
+    tcg_gen_sub_i64(rh, t1, t2);
+    tcg_gen_mov_i64(rl, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
 /* Size changing operations.  */
 
 void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg)
@@ -1958,3 +2008,345 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
                                addr, trace_mem_get_info(memop, 1));
     gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
 }
+
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc)
+{
+    switch (opc & MO_SSIZE) {
+    case MO_SB:
+        tcg_gen_ext8s_i32(ret, val);
+        break;
+    case MO_UB:
+        tcg_gen_ext8u_i32(ret, val);
+        break;
+    case MO_SW:
+        tcg_gen_ext16s_i32(ret, val);
+        break;
+    case MO_UW:
+        tcg_gen_ext16u_i32(ret, val);
+        break;
+    default:
+        tcg_gen_mov_i32(ret, val);
+        break;
+    }
+}
+
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc)
+{
+    switch (opc & MO_SSIZE) {
+    case MO_SB:
+        tcg_gen_ext8s_i64(ret, val);
+        break;
+    case MO_UB:
+        tcg_gen_ext8u_i64(ret, val);
+        break;
+    case MO_SW:
+        tcg_gen_ext16s_i64(ret, val);
+        break;
+    case MO_UW:
+        tcg_gen_ext16u_i64(ret, val);
+        break;
+    case MO_SL:
+        tcg_gen_ext32s_i64(ret, val);
+        break;
+    case MO_UL:
+        tcg_gen_ext32u_i64(ret, val);
+        break;
+    default:
+        tcg_gen_mov_i64(ret, val);
+        break;
+    }
+}
+
+#ifdef CONFIG_SOFTMMU
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
+                                  TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
+                                  TCGv_i64, TCGv_i64, TCGv_i32);
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
+                                  TCGv_i32, TCGv_i32);
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
+                                  TCGv_i64, TCGv_i32);
+#else
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32);
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64);
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32);
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64);
+#endif
+
+#ifdef CONFIG_ATOMIC64
+# define WITH_ATOMIC64(X) X,
+#else
+# define WITH_ATOMIC64(X)
+#endif
+
+static void * const table_cmpxchg[16] = {
+    [MO_8] = gen_helper_atomic_cmpxchgb,
+    [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
+    [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
+    [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
+    [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
+    WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
+    WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
+};
+
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
+                                TCGv_i32 newv, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 0, 0);
+
+    if (!parallel_cpus) {
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
+
+        tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
+
+        tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
+        tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
+        tcg_gen_qemu_st_i32(t2, addr, idx, memop);
+        tcg_temp_free_i32(t2);
+
+        if (memop & MO_SIGN) {
+            tcg_gen_ext_i32(retv, t1, memop);
+        } else {
+            tcg_gen_mov_i32(retv, t1);
+        }
+        tcg_temp_free_i32(t1);
+    } else {
+        gen_atomic_cx_i32 gen;
+
+        gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+        tcg_debug_assert(gen != NULL);
+
+#ifdef CONFIG_SOFTMMU
+        {
+            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
+            gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
+            tcg_temp_free_i32(oi);
+        }
+#else
+        gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
+#endif
+
+        if (memop & MO_SIGN) {
+            tcg_gen_ext_i32(retv, retv, memop);
+        }
+    }
+}
+
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
+                                TCGv_i64 newv, TCGArg idx, TCGMemOp memop)
+{
+    memop = tcg_canonicalize_memop(memop, 1, 0);
+
+    if (!parallel_cpus) {
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        TCGv_i64 t2 = tcg_temp_new_i64();
+
+        tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
+
+        tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
+        tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
+        tcg_gen_qemu_st_i64(t2, addr, idx, memop);
+        tcg_temp_free_i64(t2);
+
+        if (memop & MO_SIGN) {
+            tcg_gen_ext_i64(retv, t1, memop);
+        } else {
+            tcg_gen_mov_i64(retv, t1);
+        }
+        tcg_temp_free_i64(t1);
+    } else if ((memop & MO_SIZE) == MO_64) {
+#ifdef CONFIG_ATOMIC64
+        gen_atomic_cx_i64 gen;
+
+        gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
+        tcg_debug_assert(gen != NULL);
+
+#ifdef CONFIG_SOFTMMU
+        {
+            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx));
+            gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi);
+            tcg_temp_free_i32(oi);
+        }
+#else
+        gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv);
+#endif
+#else
+        gen_helper_exit_atomic(tcg_ctx.tcg_env);
+#endif /* CONFIG_ATOMIC64 */
+    } else {
+        TCGv_i32 c32 = tcg_temp_new_i32();
+        TCGv_i32 n32 = tcg_temp_new_i32();
+        TCGv_i32 r32 = tcg_temp_new_i32();
+
+        tcg_gen_extrl_i64_i32(c32, cmpv);
+        tcg_gen_extrl_i64_i32(n32, newv);
+        tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
+        tcg_temp_free_i32(c32);
+        tcg_temp_free_i32(n32);
+
+        tcg_gen_extu_i32_i64(retv, r32);
+        tcg_temp_free_i32(r32);
+
+        if (memop & MO_SIGN) {
+            tcg_gen_ext_i64(retv, retv, memop);
+        }
+    }
+}
+
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
+                                TCGArg idx, TCGMemOp memop, bool new_val,
+                                void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    TCGv_i32 t2 = tcg_temp_new_i32();
+
+    memop = tcg_canonicalize_memop(memop, 0, 0);
+
+    tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
+    gen(t2, t1, val);
+    tcg_gen_qemu_st_i32(t2, addr, idx, memop);
+
+    tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
+    tcg_temp_free_i32(t1);
+    tcg_temp_free_i32(t2);
+}
+
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
+                             TCGArg idx, TCGMemOp memop, void * const table[])
+{
+    gen_atomic_op_i32 gen;
+
+    memop = tcg_canonicalize_memop(memop, 0, 0);
+
+    gen = table[memop & (MO_SIZE | MO_BSWAP)];
+    tcg_debug_assert(gen != NULL);
+
+#ifdef CONFIG_SOFTMMU
+    {
+        TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
+        gen(ret, tcg_ctx.tcg_env, addr, val, oi);
+        tcg_temp_free_i32(oi);
+    }
+#else
+    gen(ret, tcg_ctx.tcg_env, addr, val);
+#endif
+
+    if (memop & MO_SIGN) {
+        tcg_gen_ext_i32(ret, ret, memop);
+    }
+}
+
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
+                                TCGArg idx, TCGMemOp memop, bool new_val,
+                                void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    memop = tcg_canonicalize_memop(memop, 1, 0);
+
+    tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
+    gen(t2, t1, val);
+    tcg_gen_qemu_st_i64(t2, addr, idx, memop);
+
+    tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
+                             TCGArg idx, TCGMemOp memop, void * const table[])
+{
+    memop = tcg_canonicalize_memop(memop, 1, 0);
+
+    if ((memop & MO_SIZE) == MO_64) {
+#ifdef CONFIG_ATOMIC64
+        gen_atomic_op_i64 gen;
+
+        gen = table[memop & (MO_SIZE | MO_BSWAP)];
+        tcg_debug_assert(gen != NULL);
+
+#ifdef CONFIG_SOFTMMU
+        {
+            TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx));
+            gen(ret, tcg_ctx.tcg_env, addr, val, oi);
+            tcg_temp_free_i32(oi);
+        }
+#else
+        gen(ret, tcg_ctx.tcg_env, addr, val);
+#endif
+#else
+        gen_helper_exit_atomic(tcg_ctx.tcg_env);
+#endif /* CONFIG_ATOMIC64 */
+    } else {
+        TCGv_i32 v32 = tcg_temp_new_i32();
+        TCGv_i32 r32 = tcg_temp_new_i32();
+
+        tcg_gen_extrl_i64_i32(v32, val);
+        do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
+        tcg_temp_free_i32(v32);
+
+        tcg_gen_extu_i32_i64(ret, r32);
+        tcg_temp_free_i32(r32);
+
+        if (memop & MO_SIGN) {
+            tcg_gen_ext_i64(ret, ret, memop);
+        }
+    }
+}
+
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW)                                \
+static void * const table_##NAME[16] = {                                \
+    [MO_8] = gen_helper_atomic_##NAME##b,                               \
+    [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le,                   \
+    [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be,                   \
+    [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le,                   \
+    [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be,                   \
+    WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le)     \
+    WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be)     \
+};                                                                      \
+void tcg_gen_atomic_##NAME##_i32                                        \
+    (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \
+{                                                                       \
+    if (parallel_cpus) {                                                \
+        do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME);     \
+    } else {                                                            \
+        do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW,            \
+                            tcg_gen_##OP##_i32);                        \
+    }                                                                   \
+}                                                                       \
+void tcg_gen_atomic_##NAME##_i64                                        \
+    (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \
+{                                                                       \
+    if (parallel_cpus) {                                                \
+        do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME);     \
+    } else {                                                            \
+        do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW,            \
+                            tcg_gen_##OP##_i64);                        \
+    }                                                                   \
+}
+
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
+
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
+
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
+{
+    tcg_gen_mov_i32(r, b);
+}
+
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
+{
+    tcg_gen_mov_i64(r, b);
+}
+
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
+
+#undef GEN_ATOMIC_HELPER
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index f217e80747..6d044b7c5b 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -261,6 +261,8 @@ static inline void tcg_gen_br(TCGLabel *l)
     tcg_gen_op1(&tcg_ctx, INDEX_op_br, label_arg(l));
 }
 
+void tcg_gen_mb(TCGBar);
+
 /* Helper calls. */
 
 /* 32 bit ops */
@@ -304,6 +306,7 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al,
                       TCGv_i32 ah, TCGv_i32 bl, TCGv_i32 bh);
 void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
+void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext16s_i32(TCGv_i32 ret, TCGv_i32 arg);
 void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg);
@@ -480,6 +483,7 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al,
                       TCGv_i64 ah, TCGv_i64 bl, TCGv_i64 bh);
 void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
+void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_not_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext8s_i64(TCGv_i64 ret, TCGv_i64 arg);
 void tcg_gen_ext16s_i64(TCGv_i64 ret, TCGv_i64 arg);
@@ -852,6 +856,30 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
     tcg_gen_qemu_st_i64(arg, addr, mem_index, MO_TEQ);
 }
 
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
+                                TCGArg, TCGMemOp);
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
+                                TCGArg, TCGMemOp);
+
+void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp);
+void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
+
 #if TARGET_LONG_BITS == 64
 #define tcg_gen_movi_tl tcg_gen_movi_i64
 #define tcg_gen_mov_tl tcg_gen_mov_i64
@@ -930,6 +958,17 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sub2_tl tcg_gen_sub2_i64
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64
 #define tcg_gen_muls2_tl tcg_gen_muls2_i64
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64
+#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64
+#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64
+#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64
+#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64
+#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64
+#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64
+#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64
+#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64
+#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64
+#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -1007,6 +1046,17 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
 #define tcg_gen_sub2_tl tcg_gen_sub2_i32
 #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32
 #define tcg_gen_muls2_tl tcg_gen_muls2_i32
+#define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32
+#define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32
+#define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32
+#define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32
+#define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32
+#define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32
+#define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32
+#define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32
+#define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32
+#define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32
+#define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32
 #endif
 
 #if UINTPTR_MAX == UINT32_MAX
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 6d0410c4b9..45528d2192 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -42,6 +42,8 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 # define IMPL64  TCG_OPF_64BIT
 #endif
 
+DEF(mb, 0, 0, 1, 0)
+
 DEF(mov_i32, 1, 1, 0, TCG_OPF_NOT_PRESENT)
 DEF(movi_i32, 1, 0, 1, TCG_OPF_NOT_PRESENT)
 DEF(setcond_i32, 1, 2, 1, 0)
diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
index 23a0c37711..1deb86a099 100644
--- a/tcg/tcg-runtime.h
+++ b/tcg/tcg-runtime.h
@@ -14,3 +14,112 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
 
 DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64)
 DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
+
+#ifdef CONFIG_SOFTMMU
+
+DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
+                   i32, env, tl, i32, i32, i32)
+#ifdef CONFIG_ATOMIC64
+DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
+                   i64, env, tl, i64, i64, i32)
+DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
+                   i64, env, tl, i64, i64, i32)
+#endif
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPERS(NAME)                                  \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le),           \
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be),           \
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)
+#else
+#define GEN_ATOMIC_HELPERS(NAME)                                  \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
+    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+#endif /* CONFIG_ATOMIC64 */
+
+#else
+
+DEF_HELPER_FLAGS_4(atomic_cmpxchgb, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgw_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgw_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgl_be, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgl_le, TCG_CALL_NO_WG, i32, env, tl, i32, i32)
+#ifdef CONFIG_ATOMIC64
+DEF_HELPER_FLAGS_4(atomic_cmpxchgq_be, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
+DEF_HELPER_FLAGS_4(atomic_cmpxchgq_le, TCG_CALL_NO_WG, i64, env, tl, i64, i64)
+#endif
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPERS(NAME)                             \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_le),      \
+                       TCG_CALL_NO_WG, i64, env, tl, i64)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), q_be),      \
+                       TCG_CALL_NO_WG, i64, env, tl, i64)
+#else
+#define GEN_ATOMIC_HELPERS(NAME)                             \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), b),         \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), w_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_le),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)    \
+    DEF_HELPER_FLAGS_3(glue(glue(atomic_, NAME), l_be),      \
+                       TCG_CALL_NO_WG, i32, env, tl, i32)
+#endif /* CONFIG_ATOMIC64 */
+
+#endif /* CONFIG_SOFTMMU */
+
+GEN_ATOMIC_HELPERS(fetch_add)
+GEN_ATOMIC_HELPERS(fetch_and)
+GEN_ATOMIC_HELPERS(fetch_or)
+GEN_ATOMIC_HELPERS(fetch_xor)
+
+GEN_ATOMIC_HELPERS(add_fetch)
+GEN_ATOMIC_HELPERS(and_fetch)
+GEN_ATOMIC_HELPERS(or_fetch)
+GEN_ATOMIC_HELPERS(xor_fetch)
+
+GEN_ATOMIC_HELPERS(xchg)
+
+#undef GEN_ATOMIC_HELPERS
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 42417bdc92..aabf94f365 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -40,8 +40,6 @@
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
 
-#include "qemu/host-utils.h"
-#include "qemu/timer.h"
 #include "exec/cpu-common.h"
 #include "exec/exec-all.h"
 
@@ -414,10 +412,12 @@ void tcg_prologue_init(TCGContext *s)
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+        qemu_log_lock();
         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
         log_disas(buf0, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
+        qemu_log_unlock();
     }
 #endif
 }
@@ -2099,15 +2099,9 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
     save_globals(s, allocated_regs);
 }
 
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
-                               TCGLifeData arg_life)
+static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
+                                  tcg_target_ulong val, TCGLifeData arg_life)
 {
-    TCGTemp *ots;
-    tcg_target_ulong val;
-
-    ots = &s->temps[args[0]];
-    val = args[1];
-
     if (ots->fixed_reg) {
         /* For fixed registers, we do not do any constant propagation.  */
         tcg_out_movi(s, ots->type, ots->reg, val);
@@ -2128,6 +2122,15 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
     }
 }
 
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
+                               TCGLifeData arg_life)
+{
+    TCGTemp *ots = &s->temps[args[0]];
+    tcg_target_ulong val = args[1];
+
+    tcg_reg_alloc_do_movi(s, ots, val, arg_life);
+}
+
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                               const TCGArg *args, TCGLifeData arg_life)
 {
@@ -2143,21 +2146,29 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
     otype = ots->type;
     itype = ts->type;
 
-    /* If the source value is not in a register, and we're going to be
-       forced to have it in a register in order to perform the copy,
-       then copy the SOURCE value into its own register first.  That way
-       we don't have to reload SOURCE the next time it is used. */
-    if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
-        || ts->val_type == TEMP_VAL_MEM) {
+    if (ts->val_type == TEMP_VAL_CONST) {
+        /* propagate constant or generate sti */
+        tcg_target_ulong val = ts->val;
+        if (IS_DEAD_ARG(1)) {
+            temp_dead(s, ts);
+        }
+        tcg_reg_alloc_do_movi(s, ots, val, arg_life);
+        return;
+    }
+
+    /* If the source value is in memory we're going to be forced
+       to have it in a register in order to perform the copy.  Copy
+       the SOURCE value into its own register first, that way we
+       don't have to reload SOURCE the next time it is used. */
+    if (ts->val_type == TEMP_VAL_MEM) {
         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
     }
 
+    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
         /* mov to a non-saved dead register makes no sense (even with
            liveness analysis disabled). */
         tcg_debug_assert(NEED_SYNC_ARG(0));
-        /* The code above should have moved the temp to a register. */
-        tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
         if (!ots->mem_allocated) {
             temp_allocate_frame(s, args[0]);
         }
@@ -2166,20 +2177,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
             temp_dead(s, ts);
         }
         temp_dead(s, ots);
-    } else if (ts->val_type == TEMP_VAL_CONST) {
-        /* propagate constant */
-        if (ots->val_type == TEMP_VAL_REG) {
-            s->reg_to_temp[ots->reg] = NULL;
-        }
-        ots->val_type = TEMP_VAL_CONST;
-        ots->val = ts->val;
-        if (IS_DEAD_ARG(1)) {
-            temp_dead(s, ts);
-        }
     } else {
-        /* The code in the first if block should have moved the
-           temp to a register. */
-        tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
             /* the mov can be suppressed */
             if (ots->val_type == TEMP_VAL_REG) {
@@ -2546,9 +2544,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
                  && qemu_log_in_addr_range(tb->pc))) {
+        qemu_log_lock();
         qemu_log("OP:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 
@@ -2574,9 +2574,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
             if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
                          && qemu_log_in_addr_range(tb->pc))) {
+                qemu_log_lock();
                 qemu_log("OP before indirect lowering:\n");
                 tcg_dump_ops(s);
                 qemu_log("\n");
+                qemu_log_unlock();
             }
 #endif
             /* Replace indirect temps with direct temps.  */
@@ -2594,9 +2596,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
                  && qemu_log_in_addr_range(tb->pc))) {
+        qemu_log_lock();
         qemu_log("OP after optimization and liveness analysis:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 1bcabcad9d..a35e4c4fd4 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -287,20 +287,19 @@ typedef enum TCGMemOp {
      * MO_ALIGN accesses will result in a call to the CPU's
      * do_unaligned_access hook if the guest address is not aligned.
      * The default depends on whether the target CPU defines ALIGNED_ONLY.
+     *
      * Some architectures (e.g. ARMv8) need the address which is aligned
      * to a size more than the size of the memory access.
-     * To support such check it's enough the current costless alignment
-     * check implementation in QEMU, but we need to support
-     * an alignment size specifying.
-     * MO_ALIGN supposes a natural alignment
-     * (i.e. the alignment size is the size of a memory access).
-     * Note that an alignment size must be equal or greater
-     * than an access size.
+     * Some architectures (e.g. SPARCv9) need an address which is aligned,
+     * but less strictly than the natural alignment.
+     *
+     * MO_ALIGN supposes the alignment size is the size of a memory access.
+     *
      * There are three options:
-     * - an alignment to the size of an access (MO_ALIGN);
-     * - an alignment to the specified size that is equal or greater than
-     *   an access size (MO_ALIGN_x where 'x' is a size in bytes);
      * - unaligned access permitted (MO_UNALN).
+     * - an alignment to the size of an access (MO_ALIGN);
+     * - an alignment to a specified size, which may be more or less than
+     *   the access size (MO_ALIGN_x where 'x' is a size in bytes);
      */
     MO_ASHIFT = 4,
     MO_AMASK = 7 << MO_ASHIFT,
@@ -353,50 +352,60 @@ typedef enum TCGMemOp {
  * @memop: TCGMemOp value
  *
  * Extract the alignment size from the memop.
- *
- * Returns: 0 in case of byte access (which is always aligned);
- *          positive value - number of alignment bits;
- *          negative value if unaligned access enabled
- *          and this is not a byte access.
  */
-static inline int get_alignment_bits(TCGMemOp memop)
+static inline unsigned get_alignment_bits(TCGMemOp memop)
 {
-    int a = memop & MO_AMASK;
-    int s = memop & MO_SIZE;
-    int r;
+    unsigned a = memop & MO_AMASK;
 
     if (a == MO_UNALN) {
-        /* Negative value if unaligned access enabled,
-         * or zero value in case of byte access.
-         */
-        return -s;
+        /* No alignment required.  */
+        a = 0;
     } else if (a == MO_ALIGN) {
-        /* A natural alignment: return a number of access size bits */
-        r = s;
+        /* A natural alignment requirement.  */
+        a = memop & MO_SIZE;
     } else {
-        /* Specific alignment size. It must be equal or greater
-         * than the access size.
-         */
-        r = a >> MO_ASHIFT;
-        tcg_debug_assert(r >= s);
+        /* A specific alignment requirement.  */
+        a = a >> MO_ASHIFT;
     }
 #if defined(CONFIG_SOFTMMU)
     /* The requested alignment cannot overlap the TLB flags.  */
-    tcg_debug_assert((TLB_FLAGS_MASK & ((1 << r) - 1)) == 0);
+    tcg_debug_assert((TLB_FLAGS_MASK & ((1 << a) - 1)) == 0);
 #endif
-    return r;
+    return a;
 }
 
 typedef tcg_target_ulong TCGArg;
 
-/* Define a type and accessor macros for variables.  Using pointer types
-   is nice because it gives some level of type safely.  Converting to and
-   from intptr_t rather than int reduces the number of sign-extension
-   instructions that get implied on 64-bit hosts.  Users of tcg_gen_* don't
-   need to know about any of this, and should treat TCGv as an opaque type.
-   In addition we do typechecking for different types of variables.  TCGv_i32
-   and TCGv_i64 are 32/64-bit variables respectively.  TCGv and TCGv_ptr
-   are aliases for target_ulong and host pointer sized values respectively.  */
+/* Define type and accessor macros for TCG variables.
+
+   TCG variables are the inputs and outputs of TCG ops, as described
+   in tcg/README. Target CPU front-end code uses these types to deal
+   with TCG variables as it emits TCG code via the tcg_gen_* functions.
+   They come in several flavours:
+    * TCGv_i32 : 32 bit integer type
+    * TCGv_i64 : 64 bit integer type
+    * TCGv_ptr : a host pointer type
+    * TCGv : an integer type the same size as target_ulong
+             (an alias for either TCGv_i32 or TCGv_i64)
+   The compiler's type checking will complain if you mix them
+   up and pass the wrong sized TCGv to a function.
+
+   Users of tcg_gen_* don't need to know about any of the internal
+   details of these, and should treat them as opaque types.
+   You won't be able to look inside them in a debugger either.
+
+   Internal implementation details follow:
+
+   Note that there is no definition of the structs TCGv_i32_d etc anywhere.
+   This is deliberate, because the values we store in variables of type
+   TCGv_i32 are not really pointers-to-structures. They're just small
+   integers, but keeping them in pointer types like this means that the
+   compiler will complain if you accidentally pass a TCGv_i32 to a
+   function which takes a TCGv_i64, and so on. Only the internals of
+   TCG need to care about the actual contents of the types, and they always
+   box and unbox via the MAKE_TCGV_* and GET_TCGV_* functions.
+   Converting to and from intptr_t rather than int reduces the number
+   of sign-extension instructions that get implied on 64-bit hosts.  */
 
 typedef struct TCGv_i32_d *TCGv_i32;
 typedef struct TCGv_i64_d *TCGv_i64;
@@ -478,6 +487,23 @@ static inline intptr_t QEMU_ARTIFICIAL GET_TCGV_PTR(TCGv_ptr t)
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
+typedef enum {
+    /* Used to indicate the type of accesses on which ordering
+       is to be ensured.  Modeled after SPARC barriers.  */
+    TCG_MO_LD_LD  = 0x01,
+    TCG_MO_ST_LD  = 0x02,
+    TCG_MO_LD_ST  = 0x04,
+    TCG_MO_ST_ST  = 0x08,
+    TCG_MO_ALL    = 0x0F,  /* OR of the above */
+
+    /* Used to indicate the kind of ordering which is to be ensured by the
+       instruction.  These types are derived from x86/aarch64 instructions.
+       It should be noted that these are different from C11 semantics.  */
+    TCG_BAR_LDAQ  = 0x10,  /* Following ops will not come forward */
+    TCG_BAR_STRL  = 0x20,  /* Previous ops will not be delayed */
+    TCG_BAR_SC    = 0x30,  /* No ops cross barrier; OR of the above */
+} TCGBar;
+
 /* Conditions.  Note that these are laid out for easy manipulation by
    the functions below:
      bit 0 is used for inverting;
@@ -700,6 +726,7 @@ struct TCGContext {
 };
 
 extern TCGContext tcg_ctx;
+extern bool parallel_cpus;
 
 static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
 {
@@ -721,14 +748,15 @@ static inline bool tcg_op_buf_full(void)
 
 /* pool based memory allocation */
 
+/* tb_lock must be held for tcg_malloc_internal. */
 void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
-void tcg_pool_delete(TCGContext *s);
 
 void tb_lock(void);
 void tb_unlock(void);
 void tb_lock_reset(void);
 
+/* Called with tb_lock held.  */
 static inline void *tcg_malloc(int size)
 {
     TCGContext *s = &tcg_ctx;
@@ -907,7 +935,6 @@ void tcg_optimize(TCGContext *s);
 /* only used for debugging purposes */
 void tcg_dump_ops(TCGContext *s);
 
-void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf);
 TCGv_i32 tcg_const_i32(int32_t val);
 TCGv_i64 tcg_const_i64(int64_t val);
 TCGv_i32 tcg_const_local_i32(int32_t val);
@@ -1174,6 +1201,90 @@ uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
 # define helper_ret_ldq_cmmu  helper_le_ldq_cmmu
 #endif
 
+uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
+                                    uint32_t cmpv, uint32_t newv,
+                                    TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
+                                       uint32_t cmpv, uint32_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
+                                       uint32_t cmpv, uint32_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
+                                       uint64_t cmpv, uint64_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
+                                       uint32_t cmpv, uint32_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
+                                       uint32_t cmpv, uint32_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
+                                       uint64_t cmpv, uint64_t newv,
+                                       TCGMemOpIdx oi, uintptr_t retaddr);
+
+#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX)         \
+TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu         \
+    (CPUArchState *env, target_ulong addr, TYPE val,  \
+     TCGMemOpIdx oi, uintptr_t retaddr);
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+#else
+#define GEN_ATOMIC_HELPER_ALL(NAME)          \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, b)     \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, w_be)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_le)  \
+    GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+#endif
+
+GEN_ATOMIC_HELPER_ALL(fetch_add)
+GEN_ATOMIC_HELPER_ALL(fetch_sub)
+GEN_ATOMIC_HELPER_ALL(fetch_and)
+GEN_ATOMIC_HELPER_ALL(fetch_or)
+GEN_ATOMIC_HELPER_ALL(fetch_xor)
+
+GEN_ATOMIC_HELPER_ALL(add_fetch)
+GEN_ATOMIC_HELPER_ALL(sub_fetch)
+GEN_ATOMIC_HELPER_ALL(and_fetch)
+GEN_ATOMIC_HELPER_ALL(or_fetch)
+GEN_ATOMIC_HELPER_ALL(xor_fetch)
+
+GEN_ATOMIC_HELPER_ALL(xchg)
+
+#undef GEN_ATOMIC_HELPER_ALL
+#undef GEN_ATOMIC_HELPER
 #endif /* CONFIG_SOFTMMU */
 
+#ifdef CONFIG_ATOMIC128
+#include "qemu/int128.h"
+
+/* These aren't really a "proper" helpers because TCG cannot manage Int128.
+   However, use the same format as the others, for use by the backends. */
+Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv,
+                                     TCGMemOpIdx oi, uintptr_t retaddr);
+
+Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+                                TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+                              TCGMemOpIdx oi, uintptr_t retaddr);
+
+#endif /* CONFIG_ATOMIC128 */
+
 #endif /* TCG_H */
diff --git a/tcg/tci/README b/tcg/tci/README
index 3786b0915b..386c3c7507 100644
--- a/tcg/tci/README
+++ b/tcg/tci/README
@@ -9,7 +9,7 @@ code fragments ("basic blocks") from target code (any of the
 targets supported by QEMU) to a code representation which
 can be run on a host.
 
-QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64,
+QEMU can create native code for some hosts (arm, i386, ia64, ppc, ppc64,
 s390, sparc, x86_64). For others, unofficial host support was written.
 
 By adding a code generator for a virtual machine and using an
diff --git a/tcg/tci/tcg-target.inc.c b/tcg/tci/tcg-target.inc.c
index 3c47ea7a9e..9dbf4d5512 100644
--- a/tcg/tci/tcg-target.inc.c
+++ b/tcg/tci/tcg-target.inc.c
@@ -255,6 +255,7 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
     { INDEX_op_bswap32_i32, { R, R } },
 #endif
 
+    { INDEX_op_mb, { } },
     { -1 },
 };
 
@@ -800,6 +801,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         }
         tcg_out_i(s, *args++);
         break;
+    case INDEX_op_mb:
+        break;
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
diff --git a/tci.c b/tci.c
index b488c0d8e4..4bdc645f2a 100644
--- a/tci.c
+++ b/tci.c
@@ -1236,6 +1236,10 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
                 tcg_abort();
             }
             break;
+        case INDEX_op_mb:
+            /* Ensure ordering for all kinds */
+            smp_mb();
+            break;
         default:
             TODO();
             break;
diff --git a/tests/.gitignore b/tests/.gitignore
index dbb52639f6..e9b182e2bd 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,3 +1,4 @@
+atomic_add-bench
 check-qdict
 check-qfloat
 check-qint
@@ -12,7 +13,10 @@ rcutorture
 test-aio
 test-base64
 test-bitops
+test-blockjob
 test-blockjob-txn
+test-bufferiszero
+test-char
 test-clone-visitor
 test-coroutine
 test-crypto-afsplit
@@ -57,22 +61,24 @@ test-qht-par
 test-qmp-commands
 test-qmp-commands.h
 test-qmp-event
-test-qmp-input-strict
-test-qmp-input-visitor
+test-qobject-input-strict
+test-qobject-input-visitor
 test-qmp-introspect.[ch]
 test-qmp-marshal.c
-test-qmp-output-visitor
+test-qobject-output-visitor
 test-rcu-list
-test-rfifolock
+test-replication
 test-string-input-visitor
 test-string-output-visitor
 test-thread-pool
 test-throttle
 test-timed-average
+test-uuid
 test-visitor-serialization
 test-vmstate
 test-write-threshold
 test-x86-cpuid
+test-x86-cpuid-compat
 test-xbzrle
 test-netfilter
 test-filter-mirror
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 14be4915b9..e98d3b6bb3 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -8,6 +8,8 @@ SYSEMU_TARGET_LIST := $(subst -softmmu.mak,,$(notdir \
 
 check-unit-y = tests/check-qdict$(EXESUF)
 gcov-files-check-qdict-y = qobject/qdict.c
+check-unit-y += tests/test-char$(EXESUF)
+gcov-files-check-qdict-y = qemu-char.c
 check-unit-y += tests/check-qfloat$(EXESUF)
 gcov-files-check-qfloat-y = qobject/qfloat.c
 check-unit-y += tests/check-qint$(EXESUF)
@@ -20,13 +22,13 @@ check-unit-y += tests/check-qnull$(EXESUF)
 gcov-files-check-qnull-y = qobject/qnull.c
 check-unit-y += tests/check-qjson$(EXESUF)
 gcov-files-check-qjson-y = qobject/qjson.c
-check-unit-y += tests/test-qmp-output-visitor$(EXESUF)
-gcov-files-test-qmp-output-visitor-y = qapi/qmp-output-visitor.c
+check-unit-y += tests/test-qobject-output-visitor$(EXESUF)
+gcov-files-test-qobject-output-visitor-y = qapi/qobject-output-visitor.c
 check-unit-y += tests/test-clone-visitor$(EXESUF)
 gcov-files-test-clone-visitor-y = qapi/qapi-clone-visitor.c
-check-unit-y += tests/test-qmp-input-visitor$(EXESUF)
-gcov-files-test-qmp-input-visitor-y = qapi/qmp-input-visitor.c
-check-unit-y += tests/test-qmp-input-strict$(EXESUF)
+check-unit-y += tests/test-qobject-input-visitor$(EXESUF)
+gcov-files-test-qobject-input-visitor-y = qapi/qobject-input-visitor.c
+check-unit-y += tests/test-qobject-input-strict$(EXESUF)
 check-unit-y += tests/test-qmp-commands$(EXESUF)
 gcov-files-test-qmp-commands-y = qapi/qmp-dispatch.c
 check-unit-y += tests/test-string-input-visitor$(EXESUF)
@@ -43,7 +45,6 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF)
 check-unit-y += tests/test-iov$(EXESUF)
 gcov-files-test-iov-y = util/iov.c
 check-unit-y += tests/test-aio$(EXESUF)
-check-unit-$(CONFIG_POSIX) += tests/test-rfifolock$(EXESUF)
 check-unit-y += tests/test-throttle$(EXESUF)
 gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
 gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
@@ -112,6 +113,12 @@ check-unit-y += tests/test-crypto-xts$(EXESUF)
 check-unit-y += tests/test-crypto-block$(EXESUF)
 gcov-files-test-logging-y = tests/test-logging.c
 check-unit-y += tests/test-logging$(EXESUF)
+check-unit-$(CONFIG_REPLICATION) += tests/test-replication$(EXESUF)
+check-unit-y += tests/test-bufferiszero$(EXESUF)
+gcov-files-check-bufferiszero-y = util/bufferiszero.c
+check-unit-y += tests/test-uuid$(EXESUF)
+check-unit-y += tests/ptimer-test$(EXESUF)
+gcov-files-ptimer-test-y = hw/core/ptimer.c
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -195,6 +202,7 @@ check-qtest-i386-y += tests/hd-geo-test$(EXESUF)
 gcov-files-i386-y += hw/block/hd-geometry.c
 check-qtest-i386-y += tests/boot-order-test$(EXESUF)
 check-qtest-i386-y += tests/bios-tables-test$(EXESUF)
+check-qtest-i386-y += tests/boot-serial-test$(EXESUF)
 check-qtest-i386-y += tests/pxe-test$(EXESUF)
 check-qtest-i386-y += tests/rtc-test$(EXESUF)
 check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF)
@@ -238,40 +246,72 @@ check-qtest-i386-y += tests/test-netfilter$(EXESUF)
 check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
 check-qtest-i386-y += tests/test-filter-redirector$(EXESUF)
 check-qtest-i386-y += tests/postcopy-test$(EXESUF)
+check-qtest-i386-y += tests/test-x86-cpuid-compat$(EXESUF)
 check-qtest-x86_64-y += $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
+
+check-qtest-alpha-y = tests/boot-serial-test$(EXESUF)
+
 check-qtest-mips-y = tests/endianness-test$(EXESUF)
+
 check-qtest-mips64-y = tests/endianness-test$(EXESUF)
+
 check-qtest-mips64el-y = tests/endianness-test$(EXESUF)
+
 check-qtest-ppc-y = tests/endianness-test$(EXESUF)
-check-qtest-ppc64-y = tests/endianness-test$(EXESUF)
+check-qtest-ppc-y += tests/boot-order-test$(EXESUF)
+check-qtest-ppc-y += tests/prom-env-test$(EXESUF)
+check-qtest-ppc-y += tests/drive_del-test$(EXESUF)
+check-qtest-ppc-y += tests/boot-serial-test$(EXESUF)
+
+check-qtest-ppc64-y = tests/spapr-phb-test$(EXESUF)
+gcov-files-ppc64-y = ppc64-softmmu/hw/ppc/spapr_pci.c
+check-qtest-ppc64-y += tests/endianness-test$(EXESUF)
+check-qtest-ppc64-y += tests/boot-order-test$(EXESUF)
+check-qtest-ppc64-y += tests/prom-env-test$(EXESUF)
+check-qtest-ppc64-y += tests/pnv-xscom-test$(EXESUF)
+check-qtest-ppc64-y += tests/drive_del-test$(EXESUF)
+check-qtest-ppc64-y += tests/postcopy-test$(EXESUF)
+check-qtest-ppc64-y += tests/boot-serial-test$(EXESUF)
+check-qtest-ppc64-y += tests/rtas-test$(EXESUF)
+check-qtest-ppc64-y += tests/pxe-test$(EXESUF)
+check-qtest-ppc64-y += tests/usb-hcd-ohci-test$(EXESUF)
+gcov-files-ppc64-y += hw/usb/hcd-ohci.c
+check-qtest-ppc64-y += tests/usb-hcd-uhci-test$(EXESUF)
+gcov-files-ppc64-y += hw/usb/hcd-uhci.c
+check-qtest-ppc64-y += tests/usb-hcd-xhci-test$(EXESUF)
+gcov-files-ppc64-y += hw/usb/hcd-xhci.c
+check-qtest-ppc64-y += $(check-qtest-virtio-y)
+
 check-qtest-sh4-y = tests/endianness-test$(EXESUF)
+
 check-qtest-sh4eb-y = tests/endianness-test$(EXESUF)
+
+check-qtest-sparc-y = tests/prom-env-test$(EXESUF)
+#check-qtest-sparc-y += tests/m48t59-test$(EXESUF)
+#gcov-files-sparc-y = hw/timer/m48t59.c
+
 check-qtest-sparc64-y = tests/endianness-test$(EXESUF)
-#check-qtest-sparc-y = tests/m48t59-test$(EXESUF)
 #check-qtest-sparc64-y += tests/m48t59-test$(EXESUF)
-gcov-files-sparc-y += hw/timer/m48t59.c
-gcov-files-sparc64-y += hw/timer/m48t59.c
+#gcov-files-sparc64-y += hw/timer/m48t59.c
+#Disabled for now, triggers a TCG bug on 32-bit hosts
+#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
+
 check-qtest-arm-y = tests/tmp105-test$(EXESUF)
 check-qtest-arm-y += tests/ds1338-test$(EXESUF)
+check-qtest-arm-y += tests/m25p80-test$(EXESUF)
 gcov-files-arm-y += hw/misc/tmp105.c
 check-qtest-arm-y += tests/virtio-blk-test$(EXESUF)
 gcov-files-arm-y += arm-softmmu/hw/block/virtio-blk.c
-check-qtest-ppc-y += tests/boot-order-test$(EXESUF)
-check-qtest-ppc64-y += tests/boot-order-test$(EXESUF)
-check-qtest-ppc-y += tests/drive_del-test$(EXESUF)
-check-qtest-ppc64-y += tests/drive_del-test$(EXESUF)
-check-qtest-ppc64-y += tests/spapr-phb-test$(EXESUF)
-gcov-files-ppc64-y += ppc64-softmmu/hw/ppc/spapr_pci.c
-check-qtest-ppc-y += tests/prom-env-test$(EXESUF)
-check-qtest-ppc64-y += tests/prom-env-test$(EXESUF)
-check-qtest-sparc-y += tests/prom-env-test$(EXESUF)
-#Disabled for now, triggers a TCG bug on 32-bit hosts
-#check-qtest-sparc64-y += tests/prom-env-test$(EXESUF)
+check-qtest-arm-y += tests/test-arm-mptimer$(EXESUF)
+gcov-files-arm-y += hw/timer/arm_mptimer.c
+
 check-qtest-microblazeel-y = $(check-qtest-microblaze-y)
+
 check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
-check-qtest-ppc64-y += tests/postcopy-test$(EXESUF)
+
+check-qtest-s390x-y = tests/boot-serial-test$(EXESUF)
 
 check-qtest-generic-y += tests/qom-test$(EXESUF)
 
@@ -413,15 +453,16 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/check-qlist.o tests/check-qfloat.o tests/check-qnull.o \
 	tests/check-qjson.o \
 	tests/test-coroutine.o tests/test-string-output-visitor.o \
-	tests/test-string-input-visitor.o tests/test-qmp-output-visitor.o \
+	tests/test-string-input-visitor.o tests/test-qobject-output-visitor.o \
 	tests/test-clone-visitor.o \
-	tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
+	tests/test-qobject-input-visitor.o tests/test-qobject-input-strict.o \
 	tests/test-qmp-commands.o tests/test-visitor-serialization.o \
 	tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \
 	tests/test-opts-visitor.o tests/test-qmp-event.o \
 	tests/rcutorture.o tests/test-rcu-list.o \
 	tests/test-qdist.o \
-	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
+	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
+	tests/atomic_add-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -446,9 +487,10 @@ tests/check-qnull$(EXESUF): tests/check-qnull.o $(test-util-obj-y)
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(test-util-obj-y)
 tests/check-qom-interface$(EXESUF): tests/check-qom-interface.o $(test-qom-obj-y)
 tests/check-qom-proplist$(EXESUF): tests/check-qom-proplist.o $(test-qom-obj-y)
+
+tests/test-char$(EXESUF): tests/test-char.o qemu-char.o qemu-timer.o $(test-util-obj-y) $(qtest-obj-y) $(test-io-obj-y)
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(test-block-obj-y)
 tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
-tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o $(test-util-obj-y)
 tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
 tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) $(test-util-obj-y)
 tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o $(test-block-obj-y) $(test-util-obj-y)
@@ -465,6 +507,8 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y)
 tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
 tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
+tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
+tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
@@ -480,42 +524,46 @@ tests/test-timed-average$(EXESUF): tests/test-timed-average.o qemu-timer.o \
 	$(test-util-obj-y)
 tests/test-base64$(EXESUF): tests/test-base64.o \
 	libqemuutil.a libqemustub.a
+tests/ptimer-test$(EXESUF): tests/ptimer-test.o tests/ptimer-test-stubs.o hw/core/ptimer.o libqemustub.a
 
 tests/test-logging$(EXESUF): tests/test-logging.o $(test-util-obj-y)
 
+tests/test-replication$(EXESUF): tests/test-replication.o $(test-util-obj-y) \
+	$(test-block-obj-y)
+
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
 		$(gen-out-type) -o tests -p "test-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 tests/test-qapi-visit.c tests/test-qapi-visit.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
 		$(gen-out-type) -o tests -p "test-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 tests/test-qmp-commands.h tests/test-qmp-marshal.c :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
 		$(gen-out-type) -o tests -p "test-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 tests/test-qapi-event.c tests/test-qapi-event.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-event.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-event.py \
 		$(gen-out-type) -o tests -p "test-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 tests/test-qmp-introspect.c tests/test-qmp-introspect.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
 	$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-introspect.py \
 		$(gen-out-type) -o tests -p "test-" $<, \
-		"  GEN   $@")
+		"GEN","$@")
 
 tests/test-string-output-visitor$(EXESUF): tests/test-string-output-visitor.o $(test-qapi-obj-y)
 tests/test-string-input-visitor$(EXESUF): tests/test-string-input-visitor.o $(test-qapi-obj-y)
 tests/test-qmp-event$(EXESUF): tests/test-qmp-event.o $(test-qapi-obj-y)
-tests/test-qmp-output-visitor$(EXESUF): tests/test-qmp-output-visitor.o $(test-qapi-obj-y)
+tests/test-qobject-output-visitor$(EXESUF): tests/test-qobject-output-visitor.o $(test-qapi-obj-y)
 tests/test-clone-visitor$(EXESUF): tests/test-clone-visitor.o $(test-qapi-obj-y)
-tests/test-qmp-input-visitor$(EXESUF): tests/test-qmp-input-visitor.o $(test-qapi-obj-y)
-tests/test-qmp-input-strict$(EXESUF): tests/test-qmp-input-strict.o $(test-qapi-obj-y)
+tests/test-qobject-input-visitor$(EXESUF): tests/test-qobject-input-visitor.o $(test-qapi-obj-y)
+tests/test-qobject-input-strict$(EXESUF): tests/test-qobject-input-strict.o $(test-qapi-obj-y)
 tests/test-qmp-commands$(EXESUF): tests/test-qmp-commands.o tests/test-qmp-marshal.o $(test-qapi-obj-y)
 tests/test-visitor-serialization$(EXESUF): tests/test-visitor-serialization.o $(test-qapi-obj-y)
 tests/test-opts-visitor$(EXESUF): tests/test-opts-visitor.o $(test-qapi-obj-y)
@@ -557,13 +605,17 @@ tests/test-crypto-block$(EXESUF): tests/test-crypto-block.o $(test-crypto-obj-y)
 
 libqos-obj-y = tests/libqos/pci.o tests/libqos/fw_cfg.o tests/libqos/malloc.o
 libqos-obj-y += tests/libqos/i2c.o tests/libqos/libqos.o
+libqos-spapr-obj-y = $(libqos-obj-y) tests/libqos/malloc-spapr.o
+libqos-spapr-obj-y += tests/libqos/libqos-spapr.o
+libqos-spapr-obj-y += tests/libqos/rtas.o
+libqos-spapr-obj-y += tests/libqos/pci-spapr.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o tests/libqos/libqos-pc.o
 libqos-pc-obj-y += tests/libqos/ahci.o
 libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o
 libqos-imx-obj-y = $(libqos-obj-y) tests/libqos/i2c-imx.o
-libqos-usb-obj-y = $(libqos-pc-obj-y) tests/libqos/usb.o
-libqos-virtio-obj-y = $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o tests/libqos/malloc-generic.o
+libqos-usb-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/usb.o
+libqos-virtio-obj-y = $(libqos-spapr-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o tests/libqos/virtio-mmio.o tests/libqos/malloc-generic.o
 
 tests/device-introspect-test$(EXESUF): tests/device-introspect-test.o
 tests/rtc-test$(EXESUF): tests/rtc-test.o
@@ -571,6 +623,7 @@ tests/m48t59-test$(EXESUF): tests/m48t59-test.o
 tests/endianness-test$(EXESUF): tests/endianness-test.o
 tests/spapr-phb-test$(EXESUF): tests/spapr-phb-test.o $(libqos-obj-y)
 tests/prom-env-test$(EXESUF): tests/prom-env-test.o $(libqos-obj-y)
+tests/rtas-test$(EXESUF): tests/rtas-test.o $(libqos-spapr-obj-y)
 tests/fdc-test$(EXESUF): tests/fdc-test.o
 tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y)
@@ -578,11 +631,13 @@ tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
 tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
 tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
+tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
 	tests/boot-sector.o $(libqos-obj-y)
 tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y)
 tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y)
 tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y)
+tests/m25p80-test$(EXESUF): tests/m25p80-test.o
 tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y)
 tests/q35-test$(EXESUF): tests/q35-test.o $(libqos-pc-obj-y)
 tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y)
@@ -590,6 +645,7 @@ tests/e1000-test$(EXESUF): tests/e1000-test.o
 tests/e1000e-test$(EXESUF): tests/e1000e-test.o $(libqos-pc-obj-y)
 tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y)
 tests/pcnet-test$(EXESUF): tests/pcnet-test.o
+tests/pnv-xscom-test$(EXESUF): tests/pnv-xscom-test.o
 tests/eepro100-test$(EXESUF): tests/eepro100-test.o
 tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o
 tests/ne2000-test$(EXESUF): tests/ne2000-test.o
@@ -600,7 +656,7 @@ tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o $(libqos-virtio-obj-y)
 tests/virtio-net-test$(EXESUF): tests/virtio-net-test.o $(libqos-pc-obj-y) $(libqos-virtio-obj-y)
 tests/virtio-rng-test$(EXESUF): tests/virtio-rng-test.o $(libqos-pc-obj-y)
 tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y)
-tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o
+tests/virtio-9p-test$(EXESUF): tests/virtio-9p-test.o $(libqos-virtio-obj-y)
 tests/virtio-serial-test$(EXESUF): tests/virtio-serial-test.o
 tests/virtio-console-test$(EXESUF): tests/virtio-console-test.o
 tests/tpci200-test$(EXESUF): tests/tpci200-test.o
@@ -622,18 +678,21 @@ tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y)
 tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y)
 tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o
 tests/postcopy-test$(EXESUF): tests/postcopy-test.o
-tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y)
+tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) $(libqos-virtio-obj-y) $(libqos-pc-obj-y)
 tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y)
 tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
 tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
 tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
+tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
 tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
+tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
+tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
 
 tests/migration/stress$(EXESUF): tests/migration/stress.o
-	$(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"  LINK  $(TARGET_DIR)$@")
+	$(call quiet-command, $(LINKPROG) -static -O3 $(PTHREAD_LIB) -o $@ $< ,"LINK","$(TARGET_DIR)$@")
 
 INITRD_WORK_DIR=tests/migration/initrd
 
@@ -696,7 +755,7 @@ $(patsubst %, check-qtest-%, $(QTEST_TARGETS)): check-qtest-%: $(check-qtest-y)
 	$(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \
 		QTEST_QEMU_IMG=qemu-img$(EXESUF) \
 		MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
-		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER $@")
+		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER","$@")
 	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y) $(gcov-files-generic-y); do \
 	  echo Gcov report for $$f:;\
 	  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
@@ -707,7 +766,7 @@ $(patsubst %, check-%, $(check-unit-y)): check-%: %
 	$(if $(CONFIG_GCOV),@rm -f *.gcda */*.gcda */*/*.gcda */*/*/*.gcda,)
 	$(call quiet-command, \
 		MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
-		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $*,"GTESTER $*")
+		gtester $(GTESTER_OPTIONS) -m=$(SPEED) $*,"GTESTER","$*")
 	$(if $(CONFIG_GCOV),@for f in $(gcov-files-$(subst tests/,,$*)-y) $(gcov-files-generic-y); do \
 	  echo Gcov report for $$f:;\
 	  $(GCOV) $(GCOV_OPTIONS) $$f -o `dirname $$f`; \
@@ -718,18 +777,18 @@ $(patsubst %, check-%, $(check-unit-y)): check-%: %
 $(patsubst %, check-report-qtest-%.xml, $(QTEST_TARGETS)): check-report-qtest-%.xml: $(check-qtest-y)
 	$(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \
 		QTEST_QEMU_IMG=qemu-img$(EXESUF) \
-	  gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER $@")
+	  gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $(check-qtest-$*-y) $(check-qtest-generic-y),"GTESTER","$@")
 
 check-report-unit.xml: $(check-unit-y)
-	$(call quiet-command,gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $^, "GTESTER $@")
+	$(call quiet-command,gtester -q $(GTESTER_OPTIONS) -o $@ -m=$(SPEED) $^,"GTESTER","$@")
 
 # Reports and overall runs
 
 check-report.xml: $(patsubst %,check-report-qtest-%.xml, $(QTEST_TARGETS)) check-report-unit.xml
-	$(call quiet-command,$(SRC_PATH)/scripts/gtester-cat $^ > $@, "  GEN   $@")
+	$(call quiet-command,$(SRC_PATH)/scripts/gtester-cat $^ > $@,"GEN","$@")
 
 check-report.html: check-report.xml
-	$(call quiet-command,gtester-report $< > $@, "  GEN   $@")
+	$(call quiet-command,gtester-report $< > $@,"GEN","$@")
 
 
 # Other tests
@@ -749,7 +808,7 @@ $(patsubst %, check-%, $(check-qapi-schema-y)): check-%.json: $(SRC_PATH)/%.json
 		$(PYTHON) $(SRC_PATH)/tests/qapi-schema/test-qapi.py \
 		$^ >$*.test.out 2>$*.test.err; \
 		echo $$? >$*.test.exit, \
-		"  TEST  $*.out")
+		"TEST","$*.out")
 	@diff -q $(SRC_PATH)/$*.out $*.test.out
 	@# Sanitize error messages (make them independent of build directory)
 	@perl -p -e 's|\Q$(SRC_PATH)\E/||g' $*.test.err | diff -q $(SRC_PATH)/$*.err -
diff --git a/tests/acpi-test-data/pc/DSDT.cphp b/tests/acpi-test-data/pc/DSDT.cphp
index e8b146208e..9f405cfd83 100644
Binary files a/tests/acpi-test-data/pc/DSDT.cphp and b/tests/acpi-test-data/pc/DSDT.cphp differ
diff --git a/tests/acpi-test-data/pc/SRAT.cphp b/tests/acpi-test-data/pc/SRAT.cphp
new file mode 100644
index 0000000000..ff2137642f
Binary files /dev/null and b/tests/acpi-test-data/pc/SRAT.cphp differ
diff --git a/tests/acpi-test-data/q35/DSDT.cphp b/tests/acpi-test-data/q35/DSDT.cphp
index 6cc28c6dae..a0ce6b3264 100644
Binary files a/tests/acpi-test-data/q35/DSDT.cphp and b/tests/acpi-test-data/q35/DSDT.cphp differ
diff --git a/tests/acpi-test-data/q35/SRAT.cphp b/tests/acpi-test-data/q35/SRAT.cphp
new file mode 100644
index 0000000000..ff2137642f
Binary files /dev/null and b/tests/acpi-test-data/q35/SRAT.cphp differ
diff --git a/tests/ahci-test.c b/tests/ahci-test.c
index 9c0adce220..ef17629345 100644
--- a/tests/ahci-test.c
+++ b/tests/ahci-test.c
@@ -78,25 +78,23 @@ static void string_bswap16(uint16_t *s, size_t bytes)
 /**
  * Verify that the transfer did not corrupt our state at all.
  */
-static void verify_state(AHCIQState *ahci)
+static void verify_state(AHCIQState *ahci, uint64_t hba_old)
 {
     int i, j;
     uint32_t ahci_fingerprint;
     uint64_t hba_base;
-    uint64_t hba_stored;
     AHCICommandHeader cmd;
 
     ahci_fingerprint = qpci_config_readl(ahci->dev, PCI_VENDOR_ID);
     g_assert_cmphex(ahci_fingerprint, ==, ahci->fingerprint);
 
     /* If we haven't initialized, this is as much as can be validated. */
-    if (!ahci->hba_base) {
+    if (!ahci->enabled) {
         return;
     }
 
     hba_base = (uint64_t)qpci_config_readl(ahci->dev, PCI_BASE_ADDRESS_5);
-    hba_stored = (uint64_t)(uintptr_t)ahci->hba_base;
-    g_assert_cmphex(hba_base, ==, hba_stored);
+    g_assert_cmphex(hba_base, ==, hba_old);
 
     g_assert_cmphex(ahci_rreg(ahci, AHCI_CAP), ==, ahci->cap);
     g_assert_cmphex(ahci_rreg(ahci, AHCI_CAP2), ==, ahci->cap2);
@@ -119,12 +117,15 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri)
     QOSState *tmp = to->parent;
     QPCIDevice *dev = to->dev;
     char *uri_local = NULL;
+    uint64_t hba_old;
 
     if (uri == NULL) {
         uri_local = g_strdup_printf("%s%s", "unix:", mig_socket);
         uri = uri_local;
     }
 
+    hba_old = (uint64_t)qpci_config_readl(from->dev, PCI_BASE_ADDRESS_5);
+
     /* context will be 'to' after completion. */
     migrate(from->parent, to->parent, uri);
 
@@ -141,7 +142,7 @@ static void ahci_migrate(AHCIQState *from, AHCIQState *to, const char *uri)
     from->parent = tmp;
     from->dev = dev;
 
-    verify_state(to);
+    verify_state(to, hba_old);
     g_free(uri_local);
 }
 
@@ -1472,8 +1473,13 @@ static int ahci_cb_cmp_buff(AHCIQState *ahci, AHCICommand *cmd,
                             const AHCIOpts *opts)
 {
     unsigned char *tx = opts->opaque;
-    unsigned char *rx = g_malloc0(opts->size);
+    unsigned char *rx;
 
+    if (!opts->size) {
+        return 0;
+    }
+
+    rx = g_malloc0(opts->size);
     bufread(opts->buffer, rx, opts->size);
     g_assert_cmphex(memcmp(tx, rx, opts->size), ==, 0);
     g_free(rx);
@@ -1481,7 +1487,8 @@ static int ahci_cb_cmp_buff(AHCIQState *ahci, AHCICommand *cmd,
     return 0;
 }
 
-static void ahci_test_cdrom(int nsectors, bool dma)
+static void ahci_test_cdrom(int nsectors, bool dma, uint8_t cmd,
+                            bool override_bcl, uint16_t bcl)
 {
     AHCIQState *ahci;
     unsigned char *tx;
@@ -1492,10 +1499,13 @@ static void ahci_test_cdrom(int nsectors, bool dma)
         .atapi = true,
         .atapi_dma = dma,
         .post_cb = ahci_cb_cmp_buff,
+        .set_bcl = override_bcl,
+        .bcl = bcl,
     };
+    uint64_t iso_size = ATAPI_SECTOR_SIZE * (nsectors + 1);
 
     /* Prepare ISO and fill 'tx' buffer */
-    fd = prepare_iso(1024 * 1024, &tx, &iso);
+    fd = prepare_iso(iso_size, &tx, &iso);
     opts.opaque = tx;
 
     /* Standard startup wonkery, but use ide-cd and our special iso file */
@@ -1504,7 +1514,7 @@ static void ahci_test_cdrom(int nsectors, bool dma)
                                 "-device ide-cd,drive=drive0 ", iso);
 
     /* Build & Send AHCI command */
-    ahci_exec(ahci, ahci_port_select(ahci), CMD_ATAPI_READ_10, &opts);
+    ahci_exec(ahci, ahci_port_select(ahci), cmd, &opts);
 
     /* Cleanup */
     g_free(tx);
@@ -1512,24 +1522,133 @@ static void ahci_test_cdrom(int nsectors, bool dma)
     remove_iso(fd, iso);
 }
 
+static void ahci_test_cdrom_read10(int nsectors, bool dma)
+{
+    ahci_test_cdrom(nsectors, dma, CMD_ATAPI_READ_10, false, 0);
+}
+
 static void test_cdrom_dma(void)
 {
-    ahci_test_cdrom(1, true);
+    ahci_test_cdrom_read10(1, true);
 }
 
 static void test_cdrom_dma_multi(void)
 {
-    ahci_test_cdrom(3, true);
+    ahci_test_cdrom_read10(3, true);
 }
 
 static void test_cdrom_pio(void)
 {
-    ahci_test_cdrom(1, false);
+    ahci_test_cdrom_read10(1, false);
 }
 
 static void test_cdrom_pio_multi(void)
 {
-    ahci_test_cdrom(3, false);
+    ahci_test_cdrom_read10(3, false);
+}
+
+/* Regression test: Test that a READ_CD command with a BCL of 0 but a size of 0
+ * completes as a NOP instead of erroring out. */
+static void test_atapi_bcl(void)
+{
+    ahci_test_cdrom(0, false, CMD_ATAPI_READ_CD, true, 0);
+}
+
+
+static void atapi_wait_tray(bool open)
+{
+    QDict *rsp = qmp_eventwait_ref("DEVICE_TRAY_MOVED");
+    QDict *data = qdict_get_qdict(rsp, "data");
+    if (open) {
+        g_assert(qdict_get_bool(data, "tray-open"));
+    } else {
+        g_assert(!qdict_get_bool(data, "tray-open"));
+    }
+    QDECREF(rsp);
+}
+
+static void test_atapi_tray(void)
+{
+    AHCIQState *ahci;
+    unsigned char *tx;
+    char *iso;
+    int fd;
+    uint8_t port, sense, asc;
+    uint64_t iso_size = ATAPI_SECTOR_SIZE;
+    QDict *rsp;
+
+    fd = prepare_iso(iso_size, &tx, &iso);
+    ahci = ahci_boot_and_enable("-drive if=none,id=drive0,file=%s,format=raw "
+                                "-M q35 "
+                                "-device ide-cd,drive=drive0 ", iso);
+    port = ahci_port_select(ahci);
+
+    ahci_atapi_eject(ahci, port);
+    atapi_wait_tray(true);
+
+    ahci_atapi_load(ahci, port);
+    atapi_wait_tray(false);
+
+    /* Remove media */
+    qmp_async("{'execute': 'blockdev-open-tray', "
+               "'arguments': {'device': 'drive0'}}");
+    atapi_wait_tray(true);
+    rsp = qmp_receive();
+    QDECREF(rsp);
+
+    qmp_discard_response("{'execute': 'x-blockdev-remove-medium', "
+                         "'arguments': {'device': 'drive0'}}");
+
+    /* Test the tray without a medium */
+    ahci_atapi_load(ahci, port);
+    atapi_wait_tray(false);
+
+    ahci_atapi_eject(ahci, port);
+    atapi_wait_tray(true);
+
+    /* Re-insert media */
+    qmp_discard_response("{'execute': 'blockdev-add', "
+                          "'arguments': {'node-name': 'node0', "
+                                        "'driver': 'raw', "
+                                        "'file': { 'driver': 'file', "
+                                                  "'filename': %s }}}", iso);
+    qmp_discard_response("{'execute': 'x-blockdev-insert-medium',"
+                          "'arguments': { 'device': 'drive0', "
+                                         "'node-name': 'node0' }}");
+
+    /* Again, the event shows up first */
+    qmp_async("{'execute': 'blockdev-close-tray', "
+               "'arguments': {'device': 'drive0'}}");
+    atapi_wait_tray(false);
+    rsp = qmp_receive();
+    QDECREF(rsp);
+
+    /* Now, to convince ATAPI we understand the media has changed... */
+    ahci_atapi_test_ready(ahci, port, false, SENSE_NOT_READY);
+    ahci_atapi_get_sense(ahci, port, &sense, &asc);
+    g_assert_cmpuint(sense, ==, SENSE_NOT_READY);
+    g_assert_cmpuint(asc, ==, ASC_MEDIUM_NOT_PRESENT);
+
+    ahci_atapi_test_ready(ahci, port, false, SENSE_UNIT_ATTENTION);
+    ahci_atapi_get_sense(ahci, port, &sense, &asc);
+    g_assert_cmpuint(sense, ==, SENSE_UNIT_ATTENTION);
+    g_assert_cmpuint(asc, ==, ASC_MEDIUM_MAY_HAVE_CHANGED);
+
+    ahci_atapi_test_ready(ahci, port, true, SENSE_NO_SENSE);
+    ahci_atapi_get_sense(ahci, port, &sense, &asc);
+    g_assert_cmpuint(sense, ==, SENSE_NO_SENSE);
+
+    /* Final tray test. */
+    ahci_atapi_eject(ahci, port);
+    atapi_wait_tray(true);
+
+    ahci_atapi_load(ahci, port);
+    atapi_wait_tray(false);
+
+    /* Cleanup */
+    g_free(tx);
+    ahci_shutdown(ahci);
+    remove_iso(fd, iso);
 }
 
 /******************************************************************************/
@@ -1821,6 +1940,9 @@ int main(int argc, char **argv)
     qtest_add_func("/ahci/cdrom/pio/single", test_cdrom_pio);
     qtest_add_func("/ahci/cdrom/pio/multi", test_cdrom_pio_multi);
 
+    qtest_add_func("/ahci/cdrom/pio/bcl", test_atapi_bcl);
+    qtest_add_func("/ahci/cdrom/eject", test_atapi_tray);
+
     ret = g_test_run();
 
     /* Cleanup */
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
new file mode 100644
index 0000000000..caa1e8e689
--- /dev/null
+++ b/tests/atomic_add-bench.c
@@ -0,0 +1,163 @@
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+    uint64_t r;
+} QEMU_ALIGNED(64);
+
+struct count {
+    unsigned long val;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
+static bool test_start;
+static bool test_stop;
+
+static const char commands_string[] =
+    " -n = number of threads\n"
+    " -d = duration in seconds\n"
+    " -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+    struct thread_info *info = arg;
+
+    atomic_inc(&n_ready_threads);
+    while (!atomic_read(&test_start)) {
+        cpu_relax();
+    }
+
+    while (!atomic_read(&test_stop)) {
+        unsigned int index;
+
+        info->r = xorshift64star(info->r);
+        index = info->r & (range - 1);
+        atomic_inc(&counts[index].val);
+    }
+    return NULL;
+}
+
+static void run_test(void)
+{
+    unsigned int remaining;
+    unsigned int i;
+
+    while (atomic_read(&n_ready_threads) != n_threads) {
+        cpu_relax();
+    }
+    atomic_set(&test_start, true);
+    do {
+        remaining = sleep(duration);
+    } while (remaining);
+    atomic_set(&test_stop, true);
+
+    for (i = 0; i < n_threads; i++) {
+        qemu_thread_join(&threads[i]);
+    }
+}
+
+static void create_threads(void)
+{
+    unsigned int i;
+
+    threads = g_new(QemuThread, n_threads);
+    th_info = g_new(struct thread_info, n_threads);
+    counts = qemu_memalign(64, sizeof(*counts) * range);
+    memset(counts, 0, sizeof(*counts) * range);
+
+    for (i = 0; i < n_threads; i++) {
+        struct thread_info *info = &th_info[i];
+
+        info->r = (i + 1) ^ time(NULL);
+        qemu_thread_create(&threads[i], NULL, thread_func, info,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+static void pr_params(void)
+{
+    printf("Parameters:\n");
+    printf(" # of threads:      %u\n", n_threads);
+    printf(" duration:          %u\n", duration);
+    printf(" ops' range:        %u\n", range);
+}
+
+static void pr_stats(void)
+{
+    unsigned long long val = 0;
+    unsigned int i;
+    double tx;
+
+    for (i = 0; i < range; i++) {
+        val += counts[i].val;
+    }
+    tx = val / duration / 1e6;
+
+    printf("Results:\n");
+    printf("Duration:            %u s\n", duration);
+    printf(" Throughput:         %.2f Mops/s\n", tx);
+    printf(" Throughput/thread:  %.2f Mops/s/thread\n", tx / n_threads);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "hd:n:r:");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argv);
+            exit(0);
+        case 'd':
+            duration = atoi(optarg);
+            break;
+        case 'n':
+            n_threads = atoi(optarg);
+            break;
+        case 'r':
+            range = pow2ceil(atoi(optarg));
+            break;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    pr_params();
+    create_threads();
+    run_test();
+    pr_stats();
+    return 0;
+}
diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index de4019e57d..812f830539 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -112,7 +112,7 @@ typedef struct {
     g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \
 } while (0)
 
-static const char *disk = "tests/acpi-test-disk.raw";
+static char disk[] = "tests/acpi-test-disk-XXXXXX";
 static const char *data_dir = "tests/acpi-test-data";
 #ifdef CONFIG_IASL
 static const char *iasl = stringify(CONFIG_IASL);
@@ -711,9 +711,12 @@ static void test_acpi_one(const char *params, test_data *data)
 {
     char *args;
 
-    args = g_strdup_printf("-net none -display none %s "
+    /* Disable kernel irqchip to be able to override apic irq0. */
+    args = g_strdup_printf("-machine %s,accel=%s,kernel-irqchip=off "
+                           "-net none -display none %s "
                            "-drive id=hd0,if=none,file=%s,format=raw "
                            "-device ide-hd,drive=hd0 ",
+                           data->machine, "kvm:tcg",
                            params ? params : "", disk);
 
     qtest_start(args);
@@ -758,7 +761,7 @@ static void test_acpi_piix4_tcg(void)
     data.machine = MACHINE_PC;
     data.required_struct_types = base_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
-    test_acpi_one("-machine accel=tcg", &data);
+    test_acpi_one(NULL, &data);
     free_test_data(&data);
 }
 
@@ -771,7 +774,7 @@ static void test_acpi_piix4_tcg_bridge(void)
     data.variant = ".bridge";
     data.required_struct_types = base_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
-    test_acpi_one("-machine accel=tcg -device pci-bridge,chassis_nr=1", &data);
+    test_acpi_one("-device pci-bridge,chassis_nr=1", &data);
     free_test_data(&data);
 }
 
@@ -783,7 +786,7 @@ static void test_acpi_q35_tcg(void)
     data.machine = MACHINE_Q35;
     data.required_struct_types = base_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
-    test_acpi_one("-machine q35,accel=tcg", &data);
+    test_acpi_one(NULL, &data);
     free_test_data(&data);
 }
 
@@ -796,7 +799,7 @@ static void test_acpi_q35_tcg_bridge(void)
     data.variant = ".bridge";
     data.required_struct_types = base_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types);
-    test_acpi_one("-machine q35,accel=tcg -device pci-bridge,chassis_nr=1",
+    test_acpi_one("-device pci-bridge,chassis_nr=1",
                   &data);
     free_test_data(&data);
 }
@@ -808,8 +811,8 @@ static void test_acpi_piix4_tcg_cphp(void)
     memset(&data, 0, sizeof(data));
     data.machine = MACHINE_PC;
     data.variant = ".cphp";
-    test_acpi_one("-machine accel=tcg"
-                  " -smp 2,cores=3,sockets=2,maxcpus=6",
+    test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
+                  " -numa node -numa node",
                   &data);
     free_test_data(&data);
 }
@@ -821,8 +824,8 @@ static void test_acpi_q35_tcg_cphp(void)
     memset(&data, 0, sizeof(data));
     data.machine = MACHINE_Q35;
     data.variant = ".cphp";
-    test_acpi_one("-machine q35,accel=tcg"
-                  " -smp 2,cores=3,sockets=2,maxcpus=6",
+    test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
+                  " -numa node -numa node",
                   &data);
     free_test_data(&data);
 }
@@ -840,7 +843,7 @@ static void test_acpi_q35_tcg_ipmi(void)
     data.variant = ".ipmibt";
     data.required_struct_types = ipmi_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types);
-    test_acpi_one("-machine q35,accel=tcg -device ipmi-bmc-sim,id=bmc0"
+    test_acpi_one("-device ipmi-bmc-sim,id=bmc0"
                   " -device isa-ipmi-bt,bmc=bmc0",
                   &data);
     free_test_data(&data);
@@ -858,7 +861,7 @@ static void test_acpi_piix4_tcg_ipmi(void)
     data.variant = ".ipmikcs";
     data.required_struct_types = ipmi_required_struct_types;
     data.required_struct_types_len = ARRAY_SIZE(ipmi_required_struct_types);
-    test_acpi_one("-machine accel=tcg -device ipmi-bmc-sim,id=bmc0"
+    test_acpi_one("-device ipmi-bmc-sim,id=bmc0"
                   " -device isa-ipmi-kcs,irq=0,bmc=bmc0",
                   &data);
     free_test_data(&data);
@@ -876,14 +879,14 @@ int main(int argc, char *argv[])
     g_test_init(&argc, &argv, NULL);
 
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
-        qtest_add_func("acpi/piix4/tcg", test_acpi_piix4_tcg);
-        qtest_add_func("acpi/piix4/tcg/bridge", test_acpi_piix4_tcg_bridge);
-        qtest_add_func("acpi/q35/tcg", test_acpi_q35_tcg);
-        qtest_add_func("acpi/q35/tcg/bridge", test_acpi_q35_tcg_bridge);
-        qtest_add_func("acpi/piix4/tcg/ipmi", test_acpi_piix4_tcg_ipmi);
-        qtest_add_func("acpi/q35/tcg/ipmi", test_acpi_q35_tcg_ipmi);
-        qtest_add_func("acpi/piix4/tcg/cpuhp", test_acpi_piix4_tcg_cphp);
-        qtest_add_func("acpi/q35/tcg/cpuhp", test_acpi_q35_tcg_cphp);
+        qtest_add_func("acpi/piix4", test_acpi_piix4_tcg);
+        qtest_add_func("acpi/piix4/bridge", test_acpi_piix4_tcg_bridge);
+        qtest_add_func("acpi/q35", test_acpi_q35_tcg);
+        qtest_add_func("acpi/q35/bridge", test_acpi_q35_tcg_bridge);
+        qtest_add_func("acpi/piix4/ipmi", test_acpi_piix4_tcg_ipmi);
+        qtest_add_func("acpi/q35/ipmi", test_acpi_q35_tcg_ipmi);
+        qtest_add_func("acpi/piix4/cpuhp", test_acpi_piix4_tcg_cphp);
+        qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp);
     }
     ret = g_test_run();
     boot_sector_cleanup(disk);
diff --git a/tests/boot-sector.c b/tests/boot-sector.c
index 3ffe2987ff..e3880f4455 100644
--- a/tests/boot-sector.c
+++ b/tests/boot-sector.c
@@ -69,16 +69,32 @@ static uint8_t boot_sector[0x7e000] = {
 };
 
 /* Create boot disk file.  */
-int boot_sector_init(const char *fname)
+int boot_sector_init(char *fname)
 {
-    FILE *f = fopen(fname, "w");
+    int fd, ret;
+    size_t len = sizeof boot_sector;
 
-    if (!f) {
+    fd = mkstemp(fname);
+    if (fd < 0) {
         fprintf(stderr, "Couldn't open \"%s\": %s", fname, strerror(errno));
         return 1;
     }
-    fwrite(boot_sector, 1, sizeof boot_sector, f);
-    fclose(f);
+
+    /* For Open Firmware based system, we can use a Forth script instead */
+    if (strcmp(qtest_get_arch(), "ppc64") == 0) {
+        len = sprintf((char *)boot_sector, "\\ Bootscript\n%x %x c! %x %x c!\n",
+                LOW(SIGNATURE), BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET,
+                HIGH(SIGNATURE), BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET + 1);
+    }
+
+    ret = write(fd, boot_sector, len);
+    close(fd);
+
+    if (ret != len) {
+        fprintf(stderr, "Could not write \"%s\"", fname);
+        return 1;
+    }
+
     return 0;
 }
 
@@ -90,9 +106,9 @@ void boot_sector_test(void)
     uint16_t signature;
     int i;
 
-   /* Wait at most 1 minute */
+    /* Wait at most 90 seconds */
 #define TEST_DELAY (1 * G_USEC_PER_SEC / 10)
-#define TEST_CYCLES MAX((60 * G_USEC_PER_SEC / TEST_DELAY), 1)
+#define TEST_CYCLES MAX((90 * G_USEC_PER_SEC / TEST_DELAY), 1)
 
     /* Poll until code has run and modified memory.  Once it has we know BIOS
      * initialization is done.  TODO: check that IP reached the halt
diff --git a/tests/boot-sector.h b/tests/boot-sector.h
index f64b477aa3..35d61c7e2b 100644
--- a/tests/boot-sector.h
+++ b/tests/boot-sector.h
@@ -14,8 +14,8 @@
 #ifndef TEST_BOOT_SECTOR_H
 #define TEST_BOOT_SECTOR_H
 
-/* Create boot disk file.  */
-int boot_sector_init(const char *fname);
+/* Create boot disk file. fname must be a suitable string for mkstemp() */
+int boot_sector_init(char *fname);
 
 /* Loop until signature in memory is OK.  */
 void boot_sector_test(void);
diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
new file mode 100644
index 0000000000..44c82e5110
--- /dev/null
+++ b/tests/boot-serial-test.c
@@ -0,0 +1,111 @@
+/*
+ * Test serial output of some machines.
+ *
+ * Copyright 2016 Thomas Huth, Red Hat Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2
+ * or later. See the COPYING file in the top-level directory.
+ *
+ * This test is used to check that the serial output of the firmware
+ * (that we provide for some machines) contains an expected string.
+ * Thus we check that the firmware still boots at least to a certain
+ * point and so we know that the machine is not completely broken.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+typedef struct testdef {
+    const char *arch;       /* Target architecture */
+    const char *machine;    /* Name of the machine */
+    const char *extra;      /* Additional parameters */
+    const char *expect;     /* Expected string in the serial output */
+} testdef_t;
+
+static testdef_t tests[] = {
+    { "alpha", "clipper", "", "PCI:" },
+    { "ppc", "ppce500", "", "U-Boot" },
+    { "ppc", "prep", "", "Open Hack'Ware BIOS" },
+    { "ppc64", "ppce500", "", "U-Boot" },
+    { "ppc64", "prep", "", "Open Hack'Ware BIOS" },
+    { "ppc64", "pseries", "", "Open Firmware" },
+    { "ppc64", "powernv", "-cpu POWER9", "SkiBoot" },
+    { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" },
+    { "i386", "pc", "-device sga", "SGABIOS" },
+    { "i386", "q35", "-device sga", "SGABIOS" },
+    { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" },
+    { "x86_64", "q35", "-device sga", "SGABIOS" },
+    { "s390x", "s390-ccw-virtio",
+      "-nodefaults -device sclpconsole,chardev=serial0", "virtio device" },
+    { NULL }
+};
+
+static void check_guest_output(const testdef_t *test, int fd)
+{
+    bool output_ok = false;
+    int i, nbr, pos = 0;
+    char ch;
+
+    /* Poll serial output... Wait at most 60 seconds */
+    for (i = 0; i < 6000; ++i) {
+        while ((nbr = read(fd, &ch, 1)) == 1) {
+            if (ch == test->expect[pos]) {
+                pos += 1;
+                if (test->expect[pos] == '\0') {
+                    /* We've reached the end of the expected string! */
+                    output_ok = true;
+                    goto done;
+                }
+            } else {
+                pos = 0;
+            }
+        }
+        g_assert(nbr >= 0);
+        g_usleep(10000);
+    }
+
+done:
+    g_assert(output_ok);
+}
+
+static void test_machine(const void *data)
+{
+    const testdef_t *test = data;
+    char *args;
+    char tmpname[] = "/tmp/qtest-boot-serial-XXXXXX";
+    int fd;
+
+    fd = mkstemp(tmpname);
+    g_assert(fd != -1);
+
+    args = g_strdup_printf("-M %s,accel=tcg -chardev file,id=serial0,path=%s"
+                           " -serial chardev:serial0 %s", test->machine,
+                           tmpname, test->extra);
+
+    qtest_start(args);
+    unlink(tmpname);
+
+    check_guest_output(test, fd);
+    qtest_quit(global_qtest);
+
+    g_free(args);
+    close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+    const char *arch = qtest_get_arch();
+    int i;
+
+    g_test_init(&argc, &argv, NULL);
+
+    for (i = 0; tests[i].arch != NULL; i++) {
+        if (strcmp(arch, tests[i].arch) == 0) {
+            char *name = g_strdup_printf("boot-serial/%s", tests[i].machine);
+            qtest_add_data_func(name, &tests[i], test_machine);
+            g_free(name);
+        }
+    }
+
+    return g_test_run();
+}
diff --git a/tests/check-block.sh b/tests/check-block.sh
index a37797a494..c3de3789c4 100755
--- a/tests/check-block.sh
+++ b/tests/check-block.sh
@@ -1,5 +1,10 @@
 #!/bin/sh
 
+FORMAT_LIST="raw qcow2 qed vmdk vpc"
+if [ "$#" -ne 0 ]; then
+    FORMAT_LIST="$@"
+fi
+
 export QEMU_PROG="$(pwd)/x86_64-softmmu/qemu-system-x86_64"
 export QEMU_IMG_PROG="$(pwd)/qemu-img"
 export QEMU_IO_PROG="$(pwd)/qemu-io"
@@ -12,10 +17,8 @@ fi
 cd tests/qemu-iotests
 
 ret=0
-./check -T -nocache -raw || ret=1
-./check -T -nocache -qcow2 || ret=1
-./check -T -nocache -qed|| ret=1
-./check -T -nocache -vmdk|| ret=1
-./check -T -nocache -vpc || ret=1
+for FMT in $FORMAT_LIST ; do
+    ./check -T -nocache -$FMT || ret=1
+done
 
 exit $ret
diff --git a/tests/check-qdict.c b/tests/check-qdict.c
index 42da1e65a5..07b1c798d8 100644
--- a/tests/check-qdict.c
+++ b/tests/check-qdict.c
@@ -14,6 +14,7 @@
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
+#include "qapi/error.h"
 #include "qemu-common.h"
 
 /*
@@ -412,7 +413,6 @@ static void qdict_array_split_test(void)
 
     QDECREF(test_dict);
 
-
     /*
      * Test the split of
      *
@@ -518,7 +518,6 @@ static void qdict_join_test(void)
     dict1 = qdict_new();
     dict2 = qdict_new();
 
-
     /* Test everything once without overwrite and once with */
     do
     {
@@ -528,7 +527,6 @@ static void qdict_join_test(void)
         g_assert(qdict_size(dict1) == 0);
         g_assert(qdict_size(dict2) == 0);
 
-
         /* First iteration: Test movement */
         /* Second iteration: Test empty source and non-empty destination */
         qdict_put(dict2, "foo", qint_from_int(42));
@@ -542,7 +540,6 @@ static void qdict_join_test(void)
             g_assert(qdict_get_int(dict1, "foo") == 42);
         }
 
-
         /* Test non-empty source and destination without conflict */
         qdict_put(dict2, "bar", qint_from_int(23));
 
@@ -554,7 +551,6 @@ static void qdict_join_test(void)
         g_assert(qdict_get_int(dict1, "foo") == 42);
         g_assert(qdict_get_int(dict1, "bar") == 23);
 
-
         /* Test conflict */
         qdict_put(dict2, "foo", qint_from_int(84));
 
@@ -570,7 +566,6 @@ static void qdict_join_test(void)
             g_assert(qdict_get_int(dict2, "foo") == 84);
         }
 
-
         /* Check the references */
         g_assert(qdict_get(dict1, "foo")->refcnt == 1);
         g_assert(qdict_get(dict1, "bar")->refcnt == 1);
@@ -579,7 +574,6 @@ static void qdict_join_test(void)
             g_assert(qdict_get(dict2, "foo")->refcnt == 1);
         }
 
-
         /* Clean up */
         qdict_del(dict1, "foo");
         qdict_del(dict1, "bar");
@@ -590,11 +584,152 @@ static void qdict_join_test(void)
     }
     while (overwrite ^= true);
 
-
     QDECREF(dict1);
     QDECREF(dict2);
 }
 
+static void qdict_crumple_test_recursive(void)
+{
+    QDict *src, *dst, *rule, *vnc, *acl, *listen;
+    QObject *child, *res;
+    QList *rules;
+
+    src = qdict_new();
+    qdict_put(src, "vnc.listen.addr", qstring_from_str("127.0.0.1"));
+    qdict_put(src, "vnc.listen.port", qstring_from_str("5901"));
+    qdict_put(src, "vnc.acl.rules.0.match", qstring_from_str("fred"));
+    qdict_put(src, "vnc.acl.rules.0.policy", qstring_from_str("allow"));
+    qdict_put(src, "vnc.acl.rules.1.match", qstring_from_str("bob"));
+    qdict_put(src, "vnc.acl.rules.1.policy", qstring_from_str("deny"));
+    qdict_put(src, "vnc.acl.default", qstring_from_str("deny"));
+    qdict_put(src, "vnc.acl..name", qstring_from_str("acl0"));
+    qdict_put(src, "vnc.acl.rule..name", qstring_from_str("acl0"));
+
+    res = qdict_crumple(src, &error_abort);
+
+    g_assert_cmpint(qobject_type(res), ==, QTYPE_QDICT);
+
+    dst = qobject_to_qdict(res);
+
+    g_assert_cmpint(qdict_size(dst), ==, 1);
+
+    child = qdict_get(dst, "vnc");
+    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
+    vnc = qobject_to_qdict(child);
+
+    child = qdict_get(vnc, "listen");
+    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
+    listen = qobject_to_qdict(child);
+    g_assert_cmpstr("127.0.0.1", ==, qdict_get_str(listen, "addr"));
+    g_assert_cmpstr("5901", ==, qdict_get_str(listen, "port"));
+
+    child = qdict_get(vnc, "acl");
+    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
+    acl = qobject_to_qdict(child);
+
+    child = qdict_get(acl, "rules");
+    g_assert_cmpint(qobject_type(child), ==, QTYPE_QLIST);
+    rules = qobject_to_qlist(child);
+    g_assert_cmpint(qlist_size(rules), ==, 2);
+
+    rule = qobject_to_qdict(qlist_pop(rules));
+    g_assert_cmpint(qdict_size(rule), ==, 2);
+    g_assert_cmpstr("fred", ==, qdict_get_str(rule, "match"));
+    g_assert_cmpstr("allow", ==, qdict_get_str(rule, "policy"));
+    QDECREF(rule);
+
+    rule = qobject_to_qdict(qlist_pop(rules));
+    g_assert_cmpint(qdict_size(rule), ==, 2);
+    g_assert_cmpstr("bob", ==, qdict_get_str(rule, "match"));
+    g_assert_cmpstr("deny", ==, qdict_get_str(rule, "policy"));
+    QDECREF(rule);
+
+    /* With recursive crumpling, we should see all names unescaped */
+    g_assert_cmpstr("acl0", ==, qdict_get_str(vnc, "acl.name"));
+    child = qdict_get(vnc, "acl");
+    g_assert_cmpint(qobject_type(child), ==, QTYPE_QDICT);
+    acl = qdict_get_qdict(vnc, "acl");
+    g_assert_cmpstr("acl0", ==, qdict_get_str(acl, "rule.name"));
+
+    QDECREF(src);
+    QDECREF(dst);
+}
+
+static void qdict_crumple_test_empty(void)
+{
+    QDict *src, *dst;
+
+    src = qdict_new();
+
+    dst = (QDict *)qdict_crumple(src, &error_abort);
+
+    g_assert_cmpint(qdict_size(dst), ==, 0);
+
+    QDECREF(src);
+    QDECREF(dst);
+}
+
+static void qdict_crumple_test_bad_inputs(void)
+{
+    QDict *src;
+    Error *error = NULL;
+
+    src = qdict_new();
+    /* rule.0 can't be both a string and a dict */
+    qdict_put(src, "rule.0", qstring_from_str("fred"));
+    qdict_put(src, "rule.0.policy", qstring_from_str("allow"));
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    QDECREF(src);
+
+    src = qdict_new();
+    /* rule can't be both a list and a dict */
+    qdict_put(src, "rule.0", qstring_from_str("fred"));
+    qdict_put(src, "rule.a", qstring_from_str("allow"));
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    QDECREF(src);
+
+    src = qdict_new();
+    /* The input should be flat, ie no dicts or lists */
+    qdict_put(src, "rule.a", qdict_new());
+    qdict_put(src, "rule.b", qstring_from_str("allow"));
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    QDECREF(src);
+
+    src = qdict_new();
+    /* List indexes must not have gaps */
+    qdict_put(src, "rule.0", qstring_from_str("deny"));
+    qdict_put(src, "rule.3", qstring_from_str("allow"));
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    QDECREF(src);
+
+    src = qdict_new();
+    /* List indexes must be in %zu format */
+    qdict_put(src, "rule.0", qstring_from_str("deny"));
+    qdict_put(src, "rule.+1", qstring_from_str("allow"));
+
+    g_assert(qdict_crumple(src, &error) == NULL);
+    g_assert(error != NULL);
+    error_free(error);
+    error = NULL;
+    QDECREF(src);
+}
+
 /*
  * Errors test-cases
  */
@@ -742,6 +877,13 @@ int main(int argc, char **argv)
     g_test_add_func("/errors/put_exists", qdict_put_exists_test);
     g_test_add_func("/errors/get_not_exists", qdict_get_not_exists_test);
 
+    g_test_add_func("/public/crumple/recursive",
+                    qdict_crumple_test_recursive);
+    g_test_add_func("/public/crumple/empty",
+                    qdict_crumple_test_empty);
+    g_test_add_func("/public/crumple/bad_inputs",
+                    qdict_crumple_test_bad_inputs);
+
     /* The Big one */
     if (g_test_slow()) {
         g_test_add_func("/stress/test", qdict_stress_test);
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 85955744eb..0b21a22e10 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -964,7 +964,7 @@ static void vararg_number(void)
     QInt *qint;
     QFloat *qfloat;
     int value = 0x2342;
-    int64_t value64 = 0x2342342343LL;
+    long long value_ll = 0x2342342343LL;
     double valuef = 2.323423423;
 
     obj = qobject_from_jsonf("%d", value);
@@ -976,12 +976,12 @@ static void vararg_number(void)
 
     QDECREF(qint);
 
-    obj = qobject_from_jsonf("%" PRId64, value64);
+    obj = qobject_from_jsonf("%lld", value_ll);
     g_assert(obj != NULL);
     g_assert(qobject_type(obj) == QTYPE_QINT);
 
     qint = qobject_to_qint(obj);
-    g_assert(qint_get_int(qint) == value64);
+    g_assert(qint_get_int(qint) == value_ll);
 
     QDECREF(qint);
 
diff --git a/tests/check-qnull.c b/tests/check-qnull.c
index dc906b116e..b50bb8a81a 100644
--- a/tests/check-qnull.c
+++ b/tests/check-qnull.c
@@ -10,8 +10,8 @@
 
 #include "qapi/qmp/qobject.h"
 #include "qemu-common.h"
-#include "qapi/qmp-input-visitor.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/error.h"
 
 /*
@@ -47,12 +47,12 @@ static void qnull_visit_test(void)
 
     g_assert(qnull_.refcnt == 1);
     obj = qnull();
-    v = qmp_input_visitor_new(obj, true);
+    v = qobject_input_visitor_new(obj, true);
     qobject_decref(obj);
     visit_type_null(v, NULL, &error_abort);
     visit_free(v);
 
-    v = qmp_output_visitor_new(&obj);
+    v = qobject_output_visitor_new(&obj);
     visit_type_null(v, NULL, &error_abort);
     visit_complete(v, &obj);
     g_assert(obj == &qnull_);
diff --git a/tests/check-qom-interface.c b/tests/check-qom-interface.c
index 719ddcf2e0..f87c9aaa8a 100644
--- a/tests/check-qom-interface.c
+++ b/tests/check-qom-interface.c
@@ -76,6 +76,7 @@ static void test_interface_impl(const char *type)
 
     g_assert(iobj);
     g_assert(ioc->test == PATTERN);
+    object_unref(obj);
 }
 
 static void interface_direct_test(void)
diff --git a/tests/check-qom-proplist.c b/tests/check-qom-proplist.c
index 42defe7128..a16cefca73 100644
--- a/tests/check-qom-proplist.c
+++ b/tests/check-qom-proplist.c
@@ -230,6 +230,13 @@ struct DummyBackendClass {
 };
 
 
+static void dummy_dev_finalize(Object *obj)
+{
+    DummyDev *dev = DUMMY_DEV(obj);
+
+    object_unref(OBJECT(dev->bus));
+}
+
 static void dummy_dev_init(Object *obj)
 {
     DummyDev *dev = DUMMY_DEV(obj);
@@ -257,6 +264,13 @@ static void dummy_dev_class_init(ObjectClass *klass, void *opaque)
 }
 
 
+static void dummy_bus_finalize(Object *obj)
+{
+    DummyBus *bus = DUMMY_BUS(obj);
+
+    object_unref(OBJECT(bus->backend));
+}
+
 static void dummy_bus_init(Object *obj)
 {
 }
@@ -283,6 +297,7 @@ static const TypeInfo dummy_dev_info = {
     .parent        = TYPE_OBJECT,
     .instance_size = sizeof(DummyDev),
     .instance_init = dummy_dev_init,
+    .instance_finalize = dummy_dev_finalize,
     .class_size = sizeof(DummyDevClass),
     .class_init = dummy_dev_class_init,
 };
@@ -292,6 +307,7 @@ static const TypeInfo dummy_bus_info = {
     .parent        = TYPE_OBJECT,
     .instance_size = sizeof(DummyBus),
     .instance_init = dummy_bus_init,
+    .instance_finalize = dummy_bus_finalize,
     .class_size = sizeof(DummyBusClass),
     .class_init = dummy_bus_class_init,
 };
diff --git a/tests/crypto-tls-x509-helpers.h b/tests/crypto-tls-x509-helpers.h
index 356b49cd5a..921341c649 100644
--- a/tests/crypto-tls-x509-helpers.h
+++ b/tests/crypto-tls-x509-helpers.h
@@ -21,12 +21,8 @@
 #include <gnutls/gnutls.h>
 #include <gnutls/x509.h>
 
-#include <gnutls/gnutls.h>
-#include <gnutls/x509.h>
-
 #if !(defined WIN32) && \
     defined(CONFIG_TASN1) && \
-    defined(LIBGNUTLS_VERSION_NUMBER) && \
     (LIBGNUTLS_VERSION_NUMBER >= 0x020600)
 # define QCRYPTO_HAVE_TLS_TEST_SUPPORT
 #endif
diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
index 4f4707dae0..3f15d5aea8 100644
--- a/tests/docker/Makefile.include
+++ b/tests/docker/Makefile.include
@@ -25,7 +25,7 @@ make-archive-maybe = $(if $(wildcard $1/*), \
 		else \
 			git archive -1 $$(git stash create) --format=tar.gz; \
 		fi) > $2, \
-		"  ARCHIVE $(notdir $2)"))
+		"ARCHIVE","$(notdir $2)"))
 
 CUR_TIME := $(shell date +%Y-%m-%d-%H.%M.%S.$$$$)
 DOCKER_SRC_COPY := docker-src.$(CUR_TIME)
@@ -36,7 +36,7 @@ $(DOCKER_SRC_COPY):
 	$(call make-archive-maybe, $(SRC_PATH)/dtc, $@/dtc.tgz)
 	$(call make-archive-maybe, $(SRC_PATH)/pixman, $@/pixman.tgz)
 	$(call quiet-command, cp $(SRC_PATH)/tests/docker/run $@/run, \
-		"  COPY RUNNER")
+		"COPY","RUNNER")
 
 docker-qemu-src: $(DOCKER_SRC_COPY)
 
@@ -44,11 +44,14 @@ docker-image: ${DOCKER_TARGETS}
 
 # General rule for building docker images
 docker-image-%: $(DOCKER_FILES_DIR)/%.docker
+	@if test "$@" = docker-image-debian-bootstrap -a -z "$(EXECUTABLE)"; then \
+		echo WARNING: EXECUTABLE is not set, debootstrap may fail. 2>&1 ; \
+	fi
 	$(call quiet-command,\
 		$(SRC_PATH)/tests/docker/docker.py build qemu:$* $< \
 		$(if $V,,--quiet) $(if $(NOCACHE),--no-cache) \
 		$(if $(EXECUTABLE),--include-executable=$(EXECUTABLE)),\
-		"  BUILD $*")
+		"BUILD","$*")
 
 # Expand all the pre-requistes for each docker image and test combination
 $(foreach i,$(DOCKER_IMAGES), \
@@ -75,6 +78,7 @@ docker:
 	@echo '                         "IMAGE" is one of the listed container name."'
 	@echo '    docker-image:        Build all images.'
 	@echo '    docker-image-IMAGE:  Build image "IMAGE".'
+	@echo '    docker-run:          For manually running a "TEST" with "IMAGE"'
 	@echo
 	@echo 'Available container images:'
 	@echo '    $(DOCKER_IMAGES)'
@@ -98,31 +102,45 @@ docker:
 	@echo '    NOCACHE=1            Ignore cache when build images.'
 	@echo '    EXECUTABLE=<path>    Include executable in image.'
 
-docker-run-%: CMD = $(shell echo '$@' | sed -e 's/docker-run-\([^@]*\)@\(.*\)/\1/')
-docker-run-%: IMAGE = $(shell echo '$@' | sed -e 's/docker-run-\([^@]*\)@\(.*\)/\2/')
-docker-run-%: docker-qemu-src
+# This rule if for directly running against an arbitrary docker target.
+# It is called by the expanded docker targets (e.g. make
+# docker-test-foo@bar) which will do additional verification.
+#
+# For example: make docker-run TEST="test-quick" IMAGE="debian:arm64" EXECUTABLE=./aarch64-linux-user/qemu-aarch64
+#
+docker-run: docker-qemu-src
 	@mkdir -p "$(DOCKER_CCACHE_DIR)"
-	@if test -z "$(IMAGE)" || test -z "$(CMD)"; \
-		then echo "Invalid target"; exit 1; \
+	@if test -z "$(IMAGE)" || test -z "$(TEST)"; \
+		then echo "Invalid target $(IMAGE)/$(TEST)"; exit 1; \
 	fi
-	$(if $(filter $(TESTS),$(CMD)),$(if $(filter $(IMAGES),$(IMAGE)), \
-		$(call quiet-command,\
-			if $(SRC_PATH)/tests/docker/docker.py images | \
-				awk '$$1=="qemu" && $$2=="$(IMAGE)"{found=1} END{exit(!found)}'; then \
-				$(SRC_PATH)/tests/docker/docker.py run $(if $V,,--rm) \
-				-t \
-				$(if $(DEBUG),-i,--net=none) \
-				-e TARGET_LIST=$(TARGET_LIST) \
-				-e EXTRA_CONFIGURE_OPTS=$(EXTRA_CONFIGURE_OPTS) \
-				-e V=$V -e J=$J -e DEBUG=$(DEBUG)\
-				-e CCACHE_DIR=/var/tmp/ccache \
-				-v $$(realpath $(DOCKER_SRC_COPY)):/var/tmp/qemu:z$(COMMA)ro \
-				-v $(DOCKER_CCACHE_DIR):/var/tmp/ccache:z \
-				qemu:$(IMAGE) \
-				/var/tmp/qemu/run \
-				$(CMD); \
-			fi \
-			, "  RUN $(CMD) in $(IMAGE)")))
+	$(if $(EXECUTABLE),						\
+		$(call quiet-command,					\
+			$(SRC_PATH)/tests/docker/docker.py update 	\
+			$(IMAGE) $(EXECUTABLE),				\
+			"  COPYING $(EXECUTABLE) to $(IMAGE)"))
+	$(call quiet-command,						\
+		$(SRC_PATH)/tests/docker/docker.py run 			\
+			-t 						\
+			$(if $V,,--rm) 					\
+			$(if $(DEBUG),-i,--net=none) 			\
+			-e TARGET_LIST=$(TARGET_LIST) 			\
+			-e EXTRA_CONFIGURE_OPTS="$(EXTRA_CONFIGURE_OPTS)" \
+			-e V=$V -e J=$J -e DEBUG=$(DEBUG)		\
+			-e SHOW_ENV=$(SHOW_ENV) 			\
+			-e CCACHE_DIR=/var/tmp/ccache 			\
+			-v $$(readlink -e $(DOCKER_SRC_COPY)):/var/tmp/qemu:z$(COMMA)ro \
+			-v $(DOCKER_CCACHE_DIR):/var/tmp/ccache:z 	\
+			$(IMAGE) 					\
+			/var/tmp/qemu/run 				\
+			$(TEST), "  RUN $(TEST) in ${IMAGE}")
+
+# Run targets:
+#
+# Of the form docker-TEST-FOO@IMAGE-BAR which will then be expanded into a call to "make docker-run"
+docker-run-%: CMD = $(shell echo '$@' | sed -e 's/docker-run-\([^@]*\)@\(.*\)/\1/')
+docker-run-%: IMAGE = $(shell echo '$@' | sed -e 's/docker-run-\([^@]*\)@\(.*\)/\2/')
+docker-run-%:
+	@$(MAKE) docker-run TEST=$(CMD) IMAGE=qemu:$(IMAGE)
 
 docker-clean:
 	$(call quiet-command, $(SRC_PATH)/tests/docker/docker.py clean)
diff --git a/tests/docker/common.rc b/tests/docker/common.rc
index 0c6d8d5ece..21657e87c6 100755
--- a/tests/docker/common.rc
+++ b/tests/docker/common.rc
@@ -11,6 +11,9 @@
 # or (at your option) any later version. See the COPYING file in
 # the top-level directory.
 
+BUILD_DIR=/var/tmp/qemu-build
+mkdir $BUILD_DIR
+
 requires()
 {
     for c in $@; do
@@ -23,11 +26,13 @@ requires()
 
 build_qemu()
 {
-    $QEMU_SRC/configure \
-        --enable-werror \
-        ${TARGET_LIST:+"--target-list=${TARGET_LIST}"} \
-        --prefix="$PWD/install" \
-        $EXTRA_CONFIGURE_OPTS \
-        "$@"
+    config_opts="--enable-werror \
+                 ${TARGET_LIST:+--target-list=${TARGET_LIST}} \
+                 --prefix=$PWD/install \
+                 $EXTRA_CONFIGURE_OPTS \
+                 $@"
+    echo "Configure options:"
+    echo $config_opts
+    $QEMU_SRC/configure $config_opts
     make $MAKEFLAGS
 }
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index 222a1053fe..37d83199e7 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -21,10 +21,15 @@
 import argparse
 import tempfile
 import re
+import signal
 from tarfile import TarFile, TarInfo
 from StringIO import StringIO
 from shutil import copy, rmtree
 
+
+DEVNULL = open(os.devnull, 'wb')
+
+
 def _text_checksum(text):
     """Calculate a digest string unique to the text content"""
     return hashlib.sha1(text).hexdigest()
@@ -33,10 +38,12 @@ def _guess_docker_command():
     """ Guess a working docker command or raise exception if not found"""
     commands = [["docker"], ["sudo", "-n", "docker"]]
     for cmd in commands:
-        if subprocess.call(cmd + ["images"],
-                           stdout=subprocess.PIPE,
-                           stderr=subprocess.PIPE) == 0:
-            return cmd
+        try:
+            if subprocess.call(cmd + ["images"],
+                               stdout=DEVNULL, stderr=DEVNULL) == 0:
+                return cmd
+        except OSError:
+            pass
     commands_txt = "\n".join(["  " + " ".join(x) for x in commands])
     raise Exception("Cannot find working docker command. Tried:\n%s" % \
                     commands_txt)
@@ -95,10 +102,12 @@ def __init__(self):
         self._command = _guess_docker_command()
         self._instances = []
         atexit.register(self._kill_instances)
+        signal.signal(signal.SIGTERM, self._kill_instances)
+        signal.signal(signal.SIGHUP, self._kill_instances)
 
     def _do(self, cmd, quiet=True, infile=None, **kwargs):
         if quiet:
-            kwargs["stdout"] = subprocess.PIPE
+            kwargs["stdout"] = DEVNULL
         if infile:
             kwargs["stdin"] = infile
         return subprocess.call(self._command + cmd, **kwargs)
@@ -127,7 +136,7 @@ def clean(self):
         self._do_kill_instances(False, False)
         return 0
 
-    def _kill_instances(self):
+    def _kill_instances(self, *args, **kwargs):
         return self._do_kill_instances(True)
 
     def _output(self, cmd, **kwargs):
@@ -236,8 +245,9 @@ def run(self, args, argv):
             # Is there a .pre file to run in the build context?
             docker_pre = os.path.splitext(args.dockerfile)[0]+".pre"
             if os.path.exists(docker_pre):
+                stdout = DEVNULL if args.quiet else None
                 rc = subprocess.call(os.path.realpath(docker_pre),
-                                     cwd=docker_dir)
+                                     cwd=docker_dir, stdout=stdout)
                 if rc == 3:
                     print "Skip"
                     return 0
diff --git a/tests/docker/dockerfiles/centos6.docker b/tests/docker/dockerfiles/centos6.docker
index 8f4fe46379..34e0d3b91e 100644
--- a/tests/docker/dockerfiles/centos6.docker
+++ b/tests/docker/dockerfiles/centos6.docker
@@ -1,6 +1,8 @@
 FROM centos:6
-RUN yum install -y \
+RUN yum install -y epel-release
+ENV PACKAGES libfdt-devel ccache \
     tar git make gcc g++ \
     zlib-devel glib2-devel SDL-devel pixman-devel \
     epel-release
-RUN yum install -y libfdt-devel ccache
+RUN yum install -y $PACKAGES
+RUN rpm -q $PACKAGES | sort > /packages.txt
diff --git a/tests/docker/dockerfiles/debian-bootstrap.pre b/tests/docker/dockerfiles/debian-bootstrap.pre
index 5d9c8d5ebc..7c76dce663 100755
--- a/tests/docker/dockerfiles/debian-bootstrap.pre
+++ b/tests/docker/dockerfiles/debian-bootstrap.pre
@@ -3,6 +3,8 @@
 # Simple wrapper for debootstrap, run in the docker build context
 #
 FAKEROOT=`which fakeroot 2> /dev/null`
+# debootstrap < 1.0.67 generates empty sources.list, see Debian#732255
+MIN_DEBOOTSTRAP_VERSION=1.0.67
 
 exit_and_skip()
 {
@@ -13,8 +15,21 @@ exit_and_skip()
 # fakeroot is needed to run the bootstrap stage
 #
 if [ -z $FAKEROOT ]; then
-    echo "Please install fakeroot to enable bootstraping"
+    echo "Please install fakeroot to enable bootstraping" >&2
     exit_and_skip
+
+fi
+
+if [ -z "${DEB_ARCH}" ]; then
+    echo "Please set DEB_ARCH to choose an architecture (e.g. armhf)" >&2
+    exit_and_skip
+
+fi
+
+if [ -z "${DEB_TYPE}" ]; then
+    echo "Please set DEB_TYPE to a Debian archive name (e.g. testing)" >&2
+    exit_and_skip
+
 fi
 
 # We check in order for
@@ -27,18 +42,27 @@ fi
 #
 
 if [ -z $DEBOOTSTRAP_DIR ]; then
+    NEED_DEBOOTSTRAP=false
     DEBOOTSTRAP=`which debootstrap 2> /dev/null`
     if [ -z $DEBOOTSTRAP ]; then
         echo "No debootstrap installed, attempting to install from SCM"
+        NEED_DEBOOTSTRAP=true
+    elif ! (echo "${MIN_DEBOOTSTRAP_VERSION}" ; "${DEBOOTSTRAP}" --version \
+            | cut -d ' ' -f 2) | sort -t . -n -k 1,1 -k 2,2 -k 3,3 -c &>/dev/null; then
+        echo "debootstrap too old, attempting to install from SCM"
+        NEED_DEBOOTSTRAP=true
+    fi
+    if $NEED_DEBOOTSTRAP; then
         DEBOOTSTRAP_SOURCE=https://anonscm.debian.org/git/d-i/debootstrap.git
         git clone ${DEBOOTSTRAP_SOURCE} ./debootstrap.git
         export DEBOOTSTRAP_DIR=./debootstrap.git
         DEBOOTSTRAP=./debootstrap.git/debootstrap
+        (cd "${DEBOOTSTRAP_DIR}" && "${FAKEROOT}" make )
     fi
 else
     DEBOOTSTRAP=${DEBOOTSTRAP_DIR}/debootstrap
     if [ ! -f $DEBOOTSTRAP ]; then
-        echo "Couldn't find script at ${DEBOOTSTRAP}"
+        echo "Couldn't find script at ${DEBOOTSTRAP}" >&2
         exit_and_skip
     fi
 fi
@@ -48,7 +72,7 @@ fi
 #
 BINFMT_DIR=/proc/sys/fs/binfmt_misc
 if [ ! -e $BINFMT_DIR ]; then
-   echo "binfmt_misc needs enabling for a QEMU bootstrap to work"
+   echo "binfmt_misc needs enabling for a QEMU bootstrap to work" >&2
    exit_and_skip
 else
     # DEB_ARCH and QEMU arch names are not totally aligned
@@ -76,7 +100,7 @@ else
         ;;
     esac
     if [ ! -e "${BINFMT_DIR}/$QEMU" ]; then
-        echo "No binfmt_misc rule to run $QEMU, can't bootstrap"
+        echo "No binfmt_misc rule to run $QEMU, can't bootstrap" >&2
         exit_and_skip
     fi
 fi
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 1d26a8e98a..478163b8d8 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,7 +1,17 @@
-FROM fedora:23
-RUN dnf install -y \
+FROM fedora:latest
+ENV PACKAGES \
     ccache git tar PyYAML sparse flex bison \
     glib2-devel pixman-devel zlib-devel SDL-devel libfdt-devel \
     gcc gcc-c++ clang make perl which bc findutils \
-    mingw{32,64}-{pixman,glib2,gmp,SDL,pkg-config,gtk2,gtk3,gnutls,nettle,libtasn1,libjpeg-turbo,libpng,curl,libssh2,bzip2}
+    mingw32-pixman mingw32-glib2 mingw32-gmp mingw32-SDL mingw32-pkg-config \
+    mingw32-gtk2 mingw32-gtk3 mingw32-gnutls mingw32-nettle mingw32-libtasn1 \
+    mingw32-libjpeg-turbo mingw32-libpng mingw32-curl mingw32-libssh2 \
+    mingw32-bzip2 \
+    mingw64-pixman mingw64-glib2 mingw64-gmp mingw64-SDL mingw64-pkg-config \
+    mingw64-gtk2 mingw64-gtk3 mingw64-gnutls mingw64-nettle mingw64-libtasn1 \
+    mingw64-libjpeg-turbo mingw64-libpng mingw64-curl mingw64-libssh2 \
+    mingw64-bzip2
+
+RUN dnf install -y $PACKAGES
+RUN rpm -q $PACKAGES | sort > /packages.txt
 ENV FEATURES mingw clang pyyaml
diff --git a/tests/docker/dockerfiles/min-glib.docker b/tests/docker/dockerfiles/min-glib.docker
new file mode 100644
index 0000000000..9f542d5e9c
--- /dev/null
+++ b/tests/docker/dockerfiles/min-glib.docker
@@ -0,0 +1,8 @@
+FROM centos:6
+RUN yum install -y \
+    tar git make gcc g++ \
+    zlib-devel SDL-devel pixman-devel \
+    epel-release
+RUN yum install -y libfdt-devel ccache
+RUN yum downgrade -y http://vault.centos.org/6.0/os/x86_64/Packages/glib2-2.22.5-5.el6.x86_64.rpm
+RUN yum install -y http://vault.centos.org/6.0/os/x86_64/Packages/glib2-devel-2.22.5-5.el6.x86_64.rpm
diff --git a/tests/docker/dockerfiles/travis.docker b/tests/docker/dockerfiles/travis.docker
new file mode 100644
index 0000000000..e4983ae2d3
--- /dev/null
+++ b/tests/docker/dockerfiles/travis.docker
@@ -0,0 +1,6 @@
+FROM quay.io/travisci/travis-ruby
+RUN apt-get update
+RUN apt-get -y build-dep qemu
+RUN apt-get -y build-dep device-tree-compiler
+RUN apt-get -y install python2.7 dh-autoreconf
+ENV FEATURES pyyaml
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
index a8b88c318c..a360a050a2 100644
--- a/tests/docker/dockerfiles/ubuntu.docker
+++ b/tests/docker/dockerfiles/ubuntu.docker
@@ -2,10 +2,12 @@ FROM ubuntu:14.04
 RUN echo "deb http://archive.ubuntu.com/ubuntu/ trusty universe multiverse" >> \
     /etc/apt/sources.list
 RUN apt-get update
-RUN apt-get -y install flex bison \
+ENV PACKAGES flex bison \
     libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev \
     libseccomp-dev libgnutls-dev libssh2-1-dev  libspice-server-dev \
     libspice-protocol-dev libnss3-dev libfdt-dev \
     libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev \
     git make ccache python-yaml gcc clang sparse
+RUN apt-get -y install $PACKAGES
+RUN dpkg -l $PACKAGES | sort > /packages.txt
 ENV FEATURES clang pyyaml
diff --git a/tests/docker/run b/tests/docker/run
index d85d49afc1..c1e4513bce 100755
--- a/tests/docker/run
+++ b/tests/docker/run
@@ -40,20 +40,34 @@ for p in dtc pixman; do
     fi
 done
 
+if test -n "$SHOW_ENV"; then
+    if test -f /packages.txt; then
+        echo "Packages installed:"
+        cat /packages.txt
+        echo
+    fi
+    echo "Environment variables:"
+    env
+    echo
+fi
+
 export QEMU_SRC="$TEST_DIR/src"
 
 cd "$QEMU_SRC/tests/docker"
 
 CMD="$QEMU_SRC/tests/docker/$@"
 
-if test -n "$DEBUG"; then
-    echo "* Prepared to run command:"
-    echo "  $CMD"
-    echo "* Hit Ctrl-D to continue, or type 'exit 1' to abort"
-    echo
-    $SHELL
+if test -z "$DEBUG"; then
+    exec $CMD
 fi
 
+# DEBUG workflow
+echo "* Prepared to run command:"
+echo "  $CMD"
+echo "* Hit Ctrl-D to continue, or type 'exit 1' to abort"
+echo
+$SHELL
+
 if "$CMD"; then
     exit 0
 elif test -n "$DEBUG"; then
diff --git a/tests/docker/test-build b/tests/docker/test-build
new file mode 100755
index 0000000000..031a7d9d30
--- /dev/null
+++ b/tests/docker/test-build
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+#
+# Quick compile test without the make check step of test-quick.
+#
+# Copyright (c) 2016 Red Hat Inc.
+#
+# Authors:
+#  Fam Zheng <famz@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2
+# or (at your option) any later version. See the COPYING file in
+# the top-level directory.
+
+. common.rc
+
+cd "$BUILD_DIR"
+
+DEF_TARGET_LIST="x86_64-softmmu,aarch64-softmmu"
+TARGET_LIST=${TARGET_LIST:-$DEF_TARGET_LIST} \
+build_qemu
diff --git a/tests/docker/test-clang b/tests/docker/test-clang
index 60e4e976b3..16485e6b7e 100755
--- a/tests/docker/test-clang
+++ b/tests/docker/test-clang
@@ -15,6 +15,8 @@
 
 requires clang
 
+cd "$BUILD_DIR"
+
 OPTS="--enable-debug --cxx=clang++ --cc=clang --host-cc=clang"
 # -fsanitize=undefined is broken on Fedora 23, skip it for now
 # See also: https://bugzilla.redhat.com/show_bug.cgi?id=1263834
diff --git a/tests/docker/test-full b/tests/docker/test-full
index fd9b798947..05f0d491d1 100755
--- a/tests/docker/test-full
+++ b/tests/docker/test-full
@@ -13,5 +13,7 @@
 
 . common.rc
 
+cd "$BUILD_DIR"
+
 build_qemu
 make check $MAKEFLAGS
diff --git a/tests/docker/test-mingw b/tests/docker/test-mingw
index c03757add8..2adadcb58d 100755
--- a/tests/docker/test-mingw
+++ b/tests/docker/test-mingw
@@ -15,8 +15,11 @@
 
 requires mingw dtc
 
+cd "$BUILD_DIR"
+DEF_TARGET_LIST="x86_64-softmmu,aarch64-softmmu"
+
 for prefix in x86_64-w64-mingw32- i686-w64-mingw32-; do
-    TARGET_LIST=x86_64-softmmu,aarch64-softmmu \
+    TARGET_LIST=${TARGET_LIST:-$DEF_TARGET_LIST} \
         build_qemu --cross-prefix=$prefix \
         --enable-trace-backends=simple \
         --enable-debug \
diff --git a/tests/docker/test-quick b/tests/docker/test-quick
index 07cdc59a10..c465dc06d8 100755
--- a/tests/docker/test-quick
+++ b/tests/docker/test-quick
@@ -13,7 +13,9 @@
 
 . common.rc
 
-DEF_TARGET_LIST="$(echo {x86_64,aarch64}-softmmu)"
+cd "$BUILD_DIR"
+
+DEF_TARGET_LIST="x86_64-softmmu,aarch64-softmmu"
 TARGET_LIST=${TARGET_LIST:-$DEF_TARGET_LIST} \
 build_qemu
 make check $MAKEFLAGS
diff --git a/tests/e1000e-test.c b/tests/e1000e-test.c
index d497b0857c..8c42ca919f 100644
--- a/tests/e1000e-test.c
+++ b/tests/e1000e-test.c
@@ -87,7 +87,7 @@
 
 typedef struct e1000e_device {
     QPCIDevice *pci_dev;
-    void *mac_regs;
+    QPCIBar mac_regs;
 
     uint64_t tx_ring;
     uint64_t rx_ring;
@@ -119,12 +119,12 @@ static QPCIDevice *e1000e_device_find(QPCIBus *bus)
 
 static void e1000e_macreg_write(e1000e_device *d, uint32_t reg, uint32_t val)
 {
-    qpci_io_writel(d->pci_dev, d->mac_regs + reg, val);
+    qpci_io_writel(d->pci_dev, d->mac_regs, reg, val);
 }
 
 static uint32_t e1000e_macreg_read(e1000e_device *d, uint32_t reg)
 {
-    return qpci_io_readl(d->pci_dev, d->mac_regs + reg);
+    return qpci_io_readl(d->pci_dev, d->mac_regs, reg);
 }
 
 static void e1000e_device_init(QPCIBus *bus, e1000e_device *d)
@@ -138,7 +138,6 @@ static void e1000e_device_init(QPCIBus *bus, e1000e_device *d)
 
     /* Map BAR0 (mac registers) */
     d->mac_regs = qpci_iomap(d->pci_dev, 0, NULL);
-    g_assert_nonnull(d->mac_regs);
 
     /* Reset the device */
     val = e1000e_macreg_read(d, E1000E_CTRL);
@@ -390,7 +389,7 @@ static void data_test_init(e1000e_device *d)
     qtest_start(cmdline);
     g_free(cmdline);
 
-    test_bus = qpci_init_pc();
+    test_bus = qpci_init_pc(NULL);
     g_assert_nonnull(test_bus);
 
     test_alloc = pc_alloc_init();
diff --git a/tests/endianness-test.c b/tests/endianness-test.c
index b7a120e0a4..cf8d41b7b4 100644
--- a/tests/endianness-test.c
+++ b/tests/endianness-test.c
@@ -38,7 +38,8 @@ static const TestCase test_cases[] = {
     { "ppc", "prep", 0x80000000, .bswap = true },
     { "ppc", "bamboo", 0xe8000000, .bswap = true, .superio = "i82378" },
     { "ppc64", "mac99", 0xf2000000, .bswap = true, .superio = "i82378" },
-    { "ppc64", "pseries", 0x10080000000ULL,
+    { "ppc64", "pseries", (1ULL << 45), .bswap = true, .superio = "i82378" },
+    { "ppc64", "pseries-2.7", 0x10080000000ULL,
       .bswap = true, .superio = "i82378" },
     { "sh4", "r2d", 0xfe240000, .superio = "i82378" },
     { "sh4eb", "r2d", 0xfe240000, .bswap = true, .superio = "i82378" },
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 12ee3929da..6176e81ab2 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -416,7 +416,9 @@ int main(int argc, char **argv)
     ret = g_test_run();
 
     for (i = 0; i < backend_last; i++) {
-        unlink(img_file_name[i]);
+        if (img_file_name[i]) {
+            unlink(img_file_name[i]);
+        }
     }
 
     return ret;
diff --git a/tests/i440fx-test.c b/tests/i440fx-test.c
index 3542ad114e..da2d5a53f0 100644
--- a/tests/i440fx-test.c
+++ b/tests/i440fx-test.c
@@ -38,7 +38,7 @@ static QPCIBus *test_start_get_bus(const TestData *s)
     cmdline = g_strdup_printf("-smp %d", s->num_cpus);
     qtest_start(cmdline);
     g_free(cmdline);
-    return qpci_init_pc();
+    return qpci_init_pc(NULL);
 }
 
 static void test_i440fx_defaults(gconstpointer opaque)
diff --git a/tests/ide-test.c b/tests/ide-test.c
index 1e51af2a94..fb541f88b5 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -137,13 +137,13 @@ static void ide_test_quit(void)
     qtest_end();
 }
 
-static QPCIDevice *get_pci_device(uint16_t *bmdma_base)
+static QPCIDevice *get_pci_device(QPCIBar *bmdma_bar, QPCIBar *ide_bar)
 {
     QPCIDevice *dev;
     uint16_t vendor_id, device_id;
 
     if (!pcibus) {
-        pcibus = qpci_init_pc();
+        pcibus = qpci_init_pc(NULL);
     }
 
     /* Find PCI device and verify it's the right one */
@@ -156,7 +156,9 @@ static QPCIDevice *get_pci_device(uint16_t *bmdma_base)
     g_assert(device_id == PCI_DEVICE_ID_INTEL_82371SB_1);
 
     /* Map bmdma BAR */
-    *bmdma_base = (uint16_t)(uintptr_t) qpci_iomap(dev, 4, NULL);
+    *bmdma_bar = qpci_iomap(dev, 4, NULL);
+
+    *ide_bar = qpci_legacy_iomap(dev, IDE_BASE);
 
     qpci_device_enable(dev);
 
@@ -179,17 +181,18 @@ typedef struct PrdtEntry {
 
 static int send_dma_request(int cmd, uint64_t sector, int nb_sectors,
                             PrdtEntry *prdt, int prdt_entries,
-                            void(*post_exec)(uint64_t sector, int nb_sectors))
+                            void(*post_exec)(QPCIDevice *dev, QPCIBar ide_bar,
+                                             uint64_t sector, int nb_sectors))
 {
     QPCIDevice *dev;
-    uint16_t bmdma_base;
+    QPCIBar bmdma_bar, ide_bar;
     uintptr_t guest_prdt;
     size_t len;
     bool from_dev;
     uint8_t status;
     int flags;
 
-    dev = get_pci_device(&bmdma_base);
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
 
     flags = cmd & ~0xff;
     cmd &= 0xff;
@@ -214,59 +217,60 @@ static int send_dma_request(int cmd, uint64_t sector, int nb_sectors,
     }
 
     /* Select device 0 */
-    outb(IDE_BASE + reg_device, 0 | LBA);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0 | LBA);
 
     /* Stop any running transfer, clear any pending interrupt */
-    outb(bmdma_base + bmreg_cmd, 0);
-    outb(bmdma_base + bmreg_status, BM_STS_INTR);
+    qpci_io_writeb(dev, bmdma_bar, bmreg_cmd, 0);
+    qpci_io_writeb(dev, bmdma_bar, bmreg_status, BM_STS_INTR);
 
     /* Setup PRDT */
     len = sizeof(*prdt) * prdt_entries;
     guest_prdt = guest_alloc(guest_malloc, len);
     memwrite(guest_prdt, prdt, len);
-    outl(bmdma_base + bmreg_prdt, guest_prdt);
+    qpci_io_writel(dev, bmdma_bar, bmreg_prdt, guest_prdt);
 
     /* ATA DMA command */
     if (cmd == CMD_PACKET) {
         /* Enables ATAPI DMA; otherwise PIO is attempted */
-        outb(IDE_BASE + reg_feature, 0x01);
+        qpci_io_writeb(dev, ide_bar, reg_feature, 0x01);
     } else {
-        outb(IDE_BASE + reg_nsectors, nb_sectors);
-        outb(IDE_BASE + reg_lba_low,    sector & 0xff);
-        outb(IDE_BASE + reg_lba_middle, (sector >> 8) & 0xff);
-        outb(IDE_BASE + reg_lba_high,   (sector >> 16) & 0xff);
+        qpci_io_writeb(dev, ide_bar, reg_nsectors, nb_sectors);
+        qpci_io_writeb(dev, ide_bar, reg_lba_low,    sector & 0xff);
+        qpci_io_writeb(dev, ide_bar, reg_lba_middle, (sector >> 8) & 0xff);
+        qpci_io_writeb(dev, ide_bar, reg_lba_high,   (sector >> 16) & 0xff);
     }
 
-    outb(IDE_BASE + reg_command, cmd);
+    qpci_io_writeb(dev, ide_bar, reg_command, cmd);
 
     if (post_exec) {
-        post_exec(sector, nb_sectors);
+        post_exec(dev, ide_bar, sector, nb_sectors);
     }
 
     /* Start DMA transfer */
-    outb(bmdma_base + bmreg_cmd, BM_CMD_START | (from_dev ? BM_CMD_WRITE : 0));
+    qpci_io_writeb(dev, bmdma_bar, bmreg_cmd,
+                   BM_CMD_START | (from_dev ? BM_CMD_WRITE : 0));
 
     if (flags & CMDF_ABORT) {
-        outb(bmdma_base + bmreg_cmd, 0);
+        qpci_io_writeb(dev, bmdma_bar, bmreg_cmd, 0);
     }
 
     /* Wait for the DMA transfer to complete */
     do {
-        status = inb(bmdma_base + bmreg_status);
+        status = qpci_io_readb(dev, bmdma_bar, bmreg_status);
     } while ((status & (BM_STS_ACTIVE | BM_STS_INTR)) == BM_STS_ACTIVE);
 
     g_assert_cmpint(get_irq(IDE_PRIMARY_IRQ), ==, !!(status & BM_STS_INTR));
 
     /* Check IDE status code */
-    assert_bit_set(inb(IDE_BASE + reg_status), DRDY);
-    assert_bit_clear(inb(IDE_BASE + reg_status), BSY | DRQ);
+    assert_bit_set(qpci_io_readb(dev, ide_bar, reg_status), DRDY);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), BSY | DRQ);
 
     /* Reading the status register clears the IRQ */
     g_assert(!get_irq(IDE_PRIMARY_IRQ));
 
     /* Stop DMA transfer if still active */
     if (status & BM_STS_ACTIVE) {
-        outb(bmdma_base + bmreg_cmd, 0);
+        qpci_io_writeb(dev, bmdma_bar, bmreg_cmd, 0);
     }
 
     free_pci_device(dev);
@@ -276,6 +280,8 @@ static int send_dma_request(int cmd, uint64_t sector, int nb_sectors,
 
 static void test_bmdma_simple_rw(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
     uint8_t *buf;
     uint8_t *cmpbuf;
@@ -289,6 +295,8 @@ static void test_bmdma_simple_rw(void)
         },
     };
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     buf = g_malloc(len);
     cmpbuf = g_malloc(len);
 
@@ -299,7 +307,7 @@ static void test_bmdma_simple_rw(void)
     status = send_dma_request(CMD_WRITE_DMA, 0, 1, prdt,
                               ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     /* Write 0xaa pattern to sector 1 */
     memset(buf, 0xaa, len);
@@ -308,14 +316,14 @@ static void test_bmdma_simple_rw(void)
     status = send_dma_request(CMD_WRITE_DMA, 1, 1, prdt,
                               ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     /* Read and verify 0x55 pattern in sector 0 */
     memset(cmpbuf, 0x55, len);
 
     status = send_dma_request(CMD_READ_DMA, 0, 1, prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     memread(guest_buf, buf, len);
     g_assert(memcmp(buf, cmpbuf, len) == 0);
@@ -325,7 +333,7 @@ static void test_bmdma_simple_rw(void)
 
     status = send_dma_request(CMD_READ_DMA, 1, 1, prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     memread(guest_buf, buf, len);
     g_assert(memcmp(buf, cmpbuf, len) == 0);
@@ -337,6 +345,8 @@ static void test_bmdma_simple_rw(void)
 
 static void test_bmdma_short_prdt(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
 
     PrdtEntry prdt[] = {
@@ -346,21 +356,25 @@ static void test_bmdma_short_prdt(void)
         },
     };
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* Normal request */
     status = send_dma_request(CMD_READ_DMA, 0, 1,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     /* Abort the request before it completes */
     status = send_dma_request(CMD_READ_DMA | CMDF_ABORT, 0, 1,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 }
 
 static void test_bmdma_one_sector_short_prdt(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
 
     /* Read 2 sectors but only give 1 sector in PRDT */
@@ -371,21 +385,25 @@ static void test_bmdma_one_sector_short_prdt(void)
         },
     };
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* Normal request */
     status = send_dma_request(CMD_READ_DMA, 0, 2,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     /* Abort the request before it completes */
     status = send_dma_request(CMD_READ_DMA | CMDF_ABORT, 0, 2,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, 0);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 }
 
 static void test_bmdma_long_prdt(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
 
     PrdtEntry prdt[] = {
@@ -395,23 +413,29 @@ static void test_bmdma_long_prdt(void)
         },
     };
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* Normal request */
     status = send_dma_request(CMD_READ_DMA, 0, 1,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     /* Abort the request before it completes */
     status = send_dma_request(CMD_READ_DMA | CMDF_ABORT, 0, 1,
                               prdt, ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 }
 
 static void test_bmdma_no_busmaster(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* No PRDT_EOT, each entry addr 0/size 64k, and in theory qemu shouldn't be
      * able to access it anyway because the Bus Master bit in the PCI command
      * register isn't set. This is complete nonsense, but it used to be pretty
@@ -424,7 +448,7 @@ static void test_bmdma_no_busmaster(void)
     /* Not entirely clear what the expected result is, but this is what we get
      * in practice. At least we want to be aware of any changes. */
     g_assert_cmphex(status, ==, BM_STS_ACTIVE | BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 }
 
 static void test_bmdma_setup(void)
@@ -454,6 +478,8 @@ static void string_cpu_to_be16(uint16_t *s, size_t bytes)
 
 static void test_identify(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t data;
     uint16_t buf[256];
     int i;
@@ -464,23 +490,25 @@ static void test_identify(void)
         "-global ide-hd.ver=%s",
         tmp_path, "testdisk", "version");
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* IDENTIFY command on device 0*/
-    outb(IDE_BASE + reg_device, 0);
-    outb(IDE_BASE + reg_command, CMD_IDENTIFY);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0);
+    qpci_io_writeb(dev, ide_bar, reg_command, CMD_IDENTIFY);
 
     /* Read in the IDENTIFY buffer and check registers */
-    data = inb(IDE_BASE + reg_device);
+    data = qpci_io_readb(dev, ide_bar, reg_device);
     g_assert_cmpint(data & DEV, ==, 0);
 
     for (i = 0; i < 256; i++) {
-        data = inb(IDE_BASE + reg_status);
+        data = qpci_io_readb(dev, ide_bar, reg_status);
         assert_bit_set(data, DRDY | DRQ);
         assert_bit_clear(data, BSY | DF | ERR);
 
-        ((uint16_t*) buf)[i] = inw(IDE_BASE + reg_data);
+        buf[i] = qpci_io_readw(dev, ide_bar, reg_data);
     }
 
-    data = inb(IDE_BASE + reg_status);
+    data = qpci_io_readb(dev, ide_bar, reg_status);
     assert_bit_set(data, DRDY);
     assert_bit_clear(data, BSY | DF | ERR | DRQ);
 
@@ -505,11 +533,15 @@ static void test_identify(void)
  */
 static void make_dirty(uint8_t device)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t status;
     size_t len = 512;
     uintptr_t guest_buf;
     void* buf;
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     guest_buf = guest_alloc(guest_malloc, len);
     buf = g_malloc(len);
     g_assert(guest_buf);
@@ -527,19 +559,23 @@ static void make_dirty(uint8_t device)
     status = send_dma_request(CMD_WRITE_DMA, 1, 1, prdt,
                               ARRAY_SIZE(prdt), NULL);
     g_assert_cmphex(status, ==, BM_STS_INTR);
-    assert_bit_clear(inb(IDE_BASE + reg_status), DF | ERR);
+    assert_bit_clear(qpci_io_readb(dev, ide_bar, reg_status), DF | ERR);
 
     g_free(buf);
 }
 
 static void test_flush(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t data;
 
     ide_test_start(
         "-drive file=blkdebug::%s,if=ide,cache=writeback,format=raw",
         tmp_path);
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     qtest_irq_intercept_in(global_qtest, "ioapic");
 
     /* Dirty media so that CMD_FLUSH_CACHE will actually go to disk */
@@ -549,11 +585,11 @@ static void test_flush(void)
     g_free(hmp("qemu-io ide0-hd0 \"break flush_to_os A\""));
 
     /* FLUSH CACHE command on device 0*/
-    outb(IDE_BASE + reg_device, 0);
-    outb(IDE_BASE + reg_command, CMD_FLUSH_CACHE);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0);
+    qpci_io_writeb(dev, ide_bar, reg_command, CMD_FLUSH_CACHE);
 
     /* Check status while request is in flight*/
-    data = inb(IDE_BASE + reg_status);
+    data = qpci_io_readb(dev, ide_bar, reg_status);
     assert_bit_set(data, BSY | DRDY);
     assert_bit_clear(data, DF | ERR | DRQ);
 
@@ -561,11 +597,11 @@ static void test_flush(void)
     g_free(hmp("qemu-io ide0-hd0 \"resume A\""));
 
     /* Check registers */
-    data = inb(IDE_BASE + reg_device);
+    data = qpci_io_readb(dev, ide_bar, reg_device);
     g_assert_cmpint(data & DEV, ==, 0);
 
     do {
-        data = inb(IDE_BASE + reg_status);
+        data = qpci_io_readb(dev, ide_bar, reg_status);
     } while (data & BSY);
 
     assert_bit_set(data, DRDY);
@@ -576,28 +612,31 @@ static void test_flush(void)
 
 static void test_retry_flush(const char *machine)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t data;
     const char *s;
 
     prepare_blkdebug_script(debug_path, "flush_to_disk");
 
     ide_test_start(
-        "-vnc none "
         "-drive file=blkdebug:%s:%s,if=ide,cache=writeback,format=raw,"
         "rerror=stop,werror=stop",
         debug_path, tmp_path);
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     qtest_irq_intercept_in(global_qtest, "ioapic");
 
     /* Dirty media so that CMD_FLUSH_CACHE will actually go to disk */
     make_dirty(0);
 
     /* FLUSH CACHE command on device 0*/
-    outb(IDE_BASE + reg_device, 0);
-    outb(IDE_BASE + reg_command, CMD_FLUSH_CACHE);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0);
+    qpci_io_writeb(dev, ide_bar, reg_command, CMD_FLUSH_CACHE);
 
     /* Check status while request is in flight*/
-    data = inb(IDE_BASE + reg_status);
+    data = qpci_io_readb(dev, ide_bar, reg_status);
     assert_bit_set(data, BSY | DRDY);
     assert_bit_clear(data, DF | ERR | DRQ);
 
@@ -608,11 +647,11 @@ static void test_retry_flush(const char *machine)
     qmp_discard_response(s);
 
     /* Check registers */
-    data = inb(IDE_BASE + reg_device);
+    data = qpci_io_readb(dev, ide_bar, reg_device);
     g_assert_cmpint(data & DEV, ==, 0);
 
     do {
-        data = inb(IDE_BASE + reg_status);
+        data = qpci_io_readb(dev, ide_bar, reg_status);
     } while (data & BSY);
 
     assert_bit_set(data, DRDY);
@@ -623,11 +662,16 @@ static void test_retry_flush(const char *machine)
 
 static void test_flush_nodev(void)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
+
     ide_test_start("");
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* FLUSH CACHE command on device 0*/
-    outb(IDE_BASE + reg_device, 0);
-    outb(IDE_BASE + reg_command, CMD_FLUSH_CACHE);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0);
+    qpci_io_writeb(dev, ide_bar, reg_command, CMD_FLUSH_CACHE);
 
     /* Just testing that qemu doesn't crash... */
 
@@ -654,7 +698,8 @@ typedef struct Read10CDB {
     uint16_t padding;
 } __attribute__((__packed__)) Read10CDB;
 
-static void send_scsi_cdb_read10(uint64_t lba, int nblocks)
+static void send_scsi_cdb_read10(QPCIDevice *dev, QPCIBar ide_bar,
+                                 uint64_t lba, int nblocks)
 {
     Read10CDB pkt = { .padding = 0 };
     int i;
@@ -670,7 +715,8 @@ static void send_scsi_cdb_read10(uint64_t lba, int nblocks)
 
     /* Send Packet */
     for (i = 0; i < sizeof(Read10CDB)/2; i++) {
-        outw(IDE_BASE + reg_data, cpu_to_le16(((uint16_t *)&pkt)[i]));
+        qpci_io_writew(dev, ide_bar, reg_data,
+                       le16_to_cpu(((uint16_t *)&pkt)[i]));
     }
 }
 
@@ -683,13 +729,17 @@ static void nsleep(int64_t nsecs)
 
 static uint8_t ide_wait_clear(uint8_t flag)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     uint8_t data;
     time_t st;
 
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
+
     /* Wait with a 5 second timeout */
     time(&st);
     while (true) {
-        data = inb(IDE_BASE + reg_status);
+        data = qpci_io_readb(dev, ide_bar, reg_status);
         if (!(data & flag)) {
             return data;
         }
@@ -723,6 +773,8 @@ static void ide_wait_intr(int irq)
 
 static void cdrom_pio_impl(int nblocks)
 {
+    QPCIDevice *dev;
+    QPCIBar bmdma_bar, ide_bar;
     FILE *fh;
     int patt_blocks = MAX(16, nblocks);
     size_t patt_len = ATAPI_BLOCK_SIZE * patt_blocks;
@@ -741,13 +793,14 @@ static void cdrom_pio_impl(int nblocks)
 
     ide_test_start("-drive if=none,file=%s,media=cdrom,format=raw,id=sr0,index=0 "
                    "-device ide-cd,drive=sr0,bus=ide.0", tmp_path);
+    dev = get_pci_device(&bmdma_bar, &ide_bar);
     qtest_irq_intercept_in(global_qtest, "ioapic");
 
     /* PACKET command on device 0 */
-    outb(IDE_BASE + reg_device, 0);
-    outb(IDE_BASE + reg_lba_middle, BYTE_COUNT_LIMIT & 0xFF);
-    outb(IDE_BASE + reg_lba_high, (BYTE_COUNT_LIMIT >> 8 & 0xFF));
-    outb(IDE_BASE + reg_command, CMD_PACKET);
+    qpci_io_writeb(dev, ide_bar, reg_device, 0);
+    qpci_io_writeb(dev, ide_bar, reg_lba_middle, BYTE_COUNT_LIMIT & 0xFF);
+    qpci_io_writeb(dev, ide_bar, reg_lba_high, (BYTE_COUNT_LIMIT >> 8 & 0xFF));
+    qpci_io_writeb(dev, ide_bar, reg_command, CMD_PACKET);
     /* HP0: Check_Status_A State */
     nsleep(400);
     data = ide_wait_clear(BSY);
@@ -756,7 +809,7 @@ static void cdrom_pio_impl(int nblocks)
     assert_bit_clear(data, ERR | DF | BSY);
 
     /* SCSI CDB (READ10) -- read n*2048 bytes from block 0 */
-    send_scsi_cdb_read10(0, nblocks);
+    send_scsi_cdb_read10(dev, ide_bar, 0, nblocks);
 
     /* Read data back: occurs in bursts of 'BYTE_COUNT_LIMIT' bytes.
      * If BYTE_COUNT_LIMIT is odd, we transfer BYTE_COUNT_LIMIT - 1 bytes.
@@ -780,7 +833,8 @@ static void cdrom_pio_impl(int nblocks)
 
         /* HP4: Transfer_Data */
         for (j = 0; j < MIN((limit / 2), rem); j++) {
-            rx[offset + j] = le16_to_cpu(inw(IDE_BASE + reg_data));
+            rx[offset + j] = cpu_to_le16(qpci_io_readw(dev, ide_bar,
+                                                       reg_data));
         }
     }
 
diff --git a/tests/ipmi-bt-test.c b/tests/ipmi-bt-test.c
index be9005eb85..e84dd6889b 100644
--- a/tests/ipmi-bt-test.c
+++ b/tests/ipmi-bt-test.c
@@ -309,7 +309,7 @@ static void test_connect(void)
     uint8_t msg[100];
     unsigned int msglen;
     static uint8_t exp1[] = { 0xff, 0x01, 0xa1 }; /* A protocol version */
-    static uint8_t exp2[] = { 0x08, 0x1f, 0xa1 }; /* A capabilities cmd */
+    static uint8_t exp2[] = { 0x08, 0x3f, 0xa1 }; /* A capabilities cmd */
 
     FD_ZERO(&readfds);
     FD_SET(emu_lfd, &readfds);
@@ -415,7 +415,7 @@ int main(int argc, char **argv)
     /* Run the tests */
     g_test_init(&argc, &argv, NULL);
 
-    cmdline = g_strdup_printf("-vnc none"
+    cmdline = g_strdup_printf(
           " -chardev socket,id=ipmi0,host=localhost,port=%d,reconnect=10"
           " -device ipmi-bmc-extern,chardev=ipmi0,id=bmc0"
           " -device isa-ipmi-bt,bmc=bmc0", emu_port);
diff --git a/tests/ipmi-kcs-test.c b/tests/ipmi-kcs-test.c
index 3750389651..9cf0b34a33 100644
--- a/tests/ipmi-kcs-test.c
+++ b/tests/ipmi-kcs-test.c
@@ -276,7 +276,7 @@ int main(int argc, char **argv)
     /* Run the tests */
     g_test_init(&argc, &argv, NULL);
 
-    cmdline = g_strdup_printf("-vnc none -device ipmi-bmc-sim,id=bmc0"
+    cmdline = g_strdup_printf("-device ipmi-bmc-sim,id=bmc0"
                               " -device isa-ipmi-kcs,bmc=bmc0");
     qtest_start(cmdline);
     qtest_irq_intercept_in(global_qtest, "ioapic");
diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
index 0957ee7555..04a5c5dc7d 100644
--- a/tests/ivshmem-test.c
+++ b/tests/ivshmem-test.c
@@ -41,7 +41,7 @@ static QPCIDevice *get_device(QPCIBus *pcibus)
 
 typedef struct _IVState {
     QTestState *qtest;
-    void *reg_base, *mem_base;
+    QPCIBar reg_bar, mem_bar;
     QPCIBus *pcibus;
     QPCIDevice *dev;
 } IVState;
@@ -75,7 +75,7 @@ static inline unsigned in_reg(IVState *s, enum Reg reg)
     unsigned res;
 
     global_qtest = s->qtest;
-    res = qpci_io_readl(s->dev, s->reg_base + reg);
+    res = qpci_io_readl(s->dev, s->reg_bar, reg);
     g_test_message("*%s -> %x\n", name, res);
     global_qtest = qtest;
 
@@ -89,7 +89,26 @@ static inline void out_reg(IVState *s, enum Reg reg, unsigned v)
 
     global_qtest = s->qtest;
     g_test_message("%x -> *%s\n", v, name);
-    qpci_io_writel(s->dev, s->reg_base + reg, v);
+    qpci_io_writel(s->dev, s->reg_bar, reg, v);
+    global_qtest = qtest;
+}
+
+static inline void read_mem(IVState *s, uint64_t off, void *buf, size_t len)
+{
+    QTestState *qtest = global_qtest;
+
+    global_qtest = s->qtest;
+    qpci_memread(s->dev, s->mem_bar, off, buf, len);
+    global_qtest = qtest;
+}
+
+static inline void write_mem(IVState *s, uint64_t off,
+                             const void *buf, size_t len)
+{
+    QTestState *qtest = global_qtest;
+
+    global_qtest = s->qtest;
+    qpci_memwrite(s->dev, s->mem_bar, off, buf, len);
     global_qtest = qtest;
 }
 
@@ -105,19 +124,17 @@ static void setup_vm_cmd(IVState *s, const char *cmd, bool msix)
     uint64_t barsize;
 
     s->qtest = qtest_start(cmd);
-    s->pcibus = qpci_init_pc();
+    s->pcibus = qpci_init_pc(NULL);
     s->dev = get_device(s->pcibus);
 
-    s->reg_base = qpci_iomap(s->dev, 0, &barsize);
-    g_assert_nonnull(s->reg_base);
+    s->reg_bar = qpci_iomap(s->dev, 0, &barsize);
     g_assert_cmpuint(barsize, ==, 256);
 
     if (msix) {
         qpci_msix_enable(s->dev);
     }
 
-    s->mem_base = qpci_iomap(s->dev, 2, &barsize);
-    g_assert_nonnull(s->mem_base);
+    s->mem_bar = qpci_iomap(s->dev, 2, &barsize);
     g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
 
     qpci_device_enable(s->dev);
@@ -169,7 +186,7 @@ static void test_ivshmem_single(void)
     for (i = 0; i < G_N_ELEMENTS(data); i++) {
         data[i] = i;
     }
-    qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+    write_mem(s, 0, data, sizeof(data));
 
     /* verify write */
     for (i = 0; i < G_N_ELEMENTS(data); i++) {
@@ -178,7 +195,7 @@ static void test_ivshmem_single(void)
 
     /* read it back and verify read */
     memset(data, 0, sizeof(data));
-    qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
+    read_mem(s, 0, data, sizeof(data));
     for (i = 0; i < G_N_ELEMENTS(data); i++) {
         g_assert_cmpuint(data[i], ==, i);
     }
@@ -201,29 +218,29 @@ static void test_ivshmem_pair(void)
 
     /* host write, guest 1 & 2 read */
     memset(tmpshmem, 0x42, TMPSHMSIZE);
-    qtest_memread(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    read_mem(s1, 0, data, TMPSHMSIZE);
     for (i = 0; i < TMPSHMSIZE; i++) {
         g_assert_cmpuint(data[i], ==, 0x42);
     }
-    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    read_mem(s2, 0, data, TMPSHMSIZE);
     for (i = 0; i < TMPSHMSIZE; i++) {
         g_assert_cmpuint(data[i], ==, 0x42);
     }
 
     /* guest 1 write, guest 2 read */
     memset(data, 0x43, TMPSHMSIZE);
-    qtest_memwrite(s1->qtest, (uintptr_t)s1->mem_base, data, TMPSHMSIZE);
+    write_mem(s1, 0, data, TMPSHMSIZE);
     memset(data, 0, TMPSHMSIZE);
-    qtest_memread(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    read_mem(s2, 0, data, TMPSHMSIZE);
     for (i = 0; i < TMPSHMSIZE; i++) {
         g_assert_cmpuint(data[i], ==, 0x43);
     }
 
     /* guest 2 write, guest 1 read */
     memset(data, 0x44, TMPSHMSIZE);
-    qtest_memwrite(s2->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    write_mem(s2, 0, data, TMPSHMSIZE);
     memset(data, 0, TMPSHMSIZE);
-    qtest_memread(s1->qtest, (uintptr_t)s2->mem_base, data, TMPSHMSIZE);
+    read_mem(s1, 0, data, TMPSHMSIZE);
     for (i = 0; i < TMPSHMSIZE; i++) {
         g_assert_cmpuint(data[i], ==, 0x44);
     }
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index f3be5500e1..1ca7f456b5 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -86,6 +86,7 @@ struct AHCICommand {
     uint8_t name;
     uint8_t port;
     uint8_t slot;
+    uint8_t errors;
     uint32_t interrupts;
     uint64_t xbytes;
     uint32_t prd_size;
@@ -128,7 +129,7 @@ QPCIDevice *get_ahci_device(uint32_t *fingerprint)
     uint32_t ahci_fingerprint;
     QPCIBus *pcibus;
 
-    pcibus = qpci_init_pc();
+    pcibus = qpci_init_pc(NULL);
 
     /* Find the AHCI PCI device and verify it's the right one. */
     ahci = qpci_device_find(pcibus, QPCI_DEVFN(0x1F, 0x02));
@@ -210,8 +211,7 @@ void ahci_pci_enable(AHCIQState *ahci)
 void start_ahci_device(AHCIQState *ahci)
 {
     /* Map AHCI's ABAR (BAR5) */
-    ahci->hba_base = qpci_iomap(ahci->dev, 5, &ahci->barsize);
-    g_assert(ahci->hba_base);
+    ahci->hba_bar = qpci_iomap(ahci->dev, 5, &ahci->barsize);
 
     /* turns on pci.cmd.iose, pci.cmd.mse and pci.cmd.bme */
     qpci_device_enable(ahci->dev);
@@ -351,6 +351,7 @@ void ahci_hba_enable(AHCIQState *ahci)
     reg = ahci_rreg(ahci, AHCI_GHC);
     ASSERT_BIT_SET(reg, AHCI_GHC_IE);
 
+    ahci->enabled = true;
     /* TODO: The device should now be idling and waiting for commands.
      * In the future, a small test-case to inspect the Register D2H FIS
      * and clear the initial interrupts might be good. */
@@ -402,12 +403,14 @@ void ahci_port_clear(AHCIQState *ahci, uint8_t port)
 /**
  * Check a port for errors.
  */
-void ahci_port_check_error(AHCIQState *ahci, uint8_t port)
+void ahci_port_check_error(AHCIQState *ahci, uint8_t port,
+                           uint32_t imask, uint8_t emask)
 {
     uint32_t reg;
 
     /* The upper 9 bits of the IS register all indicate errors. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_IS);
+    reg &= ~imask;
     reg >>= 23;
     g_assert_cmphex(reg, ==, 0);
 
@@ -417,8 +420,13 @@ void ahci_port_check_error(AHCIQState *ahci, uint8_t port)
 
     /* The TFD also has two error sections. */
     reg = ahci_px_rreg(ahci, port, AHCI_PX_TFD);
-    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR);
-    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR);
+    if (!emask) {
+        ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_STS_ERR);
+    } else {
+        ASSERT_BIT_SET(reg, AHCI_PX_TFD_STS_ERR);
+    }
+    ASSERT_BIT_CLEAR(reg, AHCI_PX_TFD_ERR & (~emask << 8));
+    ASSERT_BIT_SET(reg, AHCI_PX_TFD_ERR & (emask << 8));
 }
 
 void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port,
@@ -633,7 +641,8 @@ void ahci_exec(AHCIQState *ahci, uint8_t port,
 
     /* Command creation */
     if (opts->atapi) {
-        cmd = ahci_atapi_command_create(op);
+        uint16_t bcl = opts->set_bcl ? opts->bcl : ATAPI_SECTOR_SIZE;
+        cmd = ahci_atapi_command_create(op, bcl);
         if (opts->atapi_dma) {
             ahci_command_enable_atapi_dma(cmd);
         }
@@ -864,19 +873,82 @@ AHCICommand *ahci_command_create(uint8_t command_name)
     return cmd;
 }
 
-AHCICommand *ahci_atapi_command_create(uint8_t scsi_cmd)
+AHCICommand *ahci_atapi_command_create(uint8_t scsi_cmd, uint16_t bcl)
 {
     AHCICommand *cmd = ahci_command_create(CMD_PACKET);
     cmd->atapi_cmd = g_malloc0(16);
     cmd->atapi_cmd[0] = scsi_cmd;
-    /* ATAPI needs a PIO transfer chunk size set inside of the LBA registers.
-     * The block/sector size is a natural default. */
-    cmd->fis.lba_lo[1] = ATAPI_SECTOR_SIZE >> 8 & 0xFF;
-    cmd->fis.lba_lo[2] = ATAPI_SECTOR_SIZE & 0xFF;
-
+    stw_le_p(&cmd->fis.lba_lo[1], bcl);
     return cmd;
 }
 
+void ahci_atapi_test_ready(AHCIQState *ahci, uint8_t port,
+                           bool ready, uint8_t expected_sense)
+{
+    AHCICommand *cmd = ahci_atapi_command_create(CMD_ATAPI_TEST_UNIT_READY, 0);
+    ahci_command_set_size(cmd, 0);
+    if (!ready) {
+        cmd->interrupts |= AHCI_PX_IS_TFES;
+        cmd->errors |= expected_sense << 4;
+    }
+    ahci_command_commit(ahci, cmd, port);
+    ahci_command_issue(ahci, cmd);
+    ahci_command_verify(ahci, cmd);
+    ahci_command_free(cmd);
+}
+
+static int copy_buffer(AHCIQState *ahci, AHCICommand *cmd,
+                        const AHCIOpts *opts)
+{
+    unsigned char *rx = opts->opaque;
+    bufread(opts->buffer, rx, opts->size);
+    return 0;
+}
+
+void ahci_atapi_get_sense(AHCIQState *ahci, uint8_t port,
+                          uint8_t *sense, uint8_t *asc)
+{
+    unsigned char *rx;
+    AHCIOpts opts = {
+        .size = 18,
+        .atapi = true,
+        .post_cb = copy_buffer,
+    };
+    rx = g_malloc(18);
+    opts.opaque = rx;
+
+    ahci_exec(ahci, port, CMD_ATAPI_REQUEST_SENSE, &opts);
+
+    *sense = rx[2];
+    *asc = rx[12];
+
+    g_free(rx);
+}
+
+void ahci_atapi_eject(AHCIQState *ahci, uint8_t port)
+{
+    AHCICommand *cmd = ahci_atapi_command_create(CMD_ATAPI_START_STOP_UNIT, 0);
+    ahci_command_set_size(cmd, 0);
+
+    cmd->atapi_cmd[4] = 0x02; /* loej = true */
+    ahci_command_commit(ahci, cmd, port);
+    ahci_command_issue(ahci, cmd);
+    ahci_command_verify(ahci, cmd);
+    ahci_command_free(cmd);
+}
+
+void ahci_atapi_load(AHCIQState *ahci, uint8_t port)
+{
+    AHCICommand *cmd = ahci_atapi_command_create(CMD_ATAPI_START_STOP_UNIT, 0);
+    ahci_command_set_size(cmd, 0);
+
+    cmd->atapi_cmd[4] = 0x03; /* loej,start = true */
+    ahci_command_commit(ahci, cmd, port);
+    ahci_command_issue(ahci, cmd);
+    ahci_command_verify(ahci, cmd);
+    ahci_command_free(cmd);
+}
+
 void ahci_command_free(AHCICommand *cmd)
 {
     g_free(cmd->atapi_cmd);
@@ -901,12 +973,22 @@ static void ahci_atapi_command_set_offset(AHCICommand *cmd, uint64_t lba)
 
     switch (cbd[0]) {
     case CMD_ATAPI_READ_10:
+    case CMD_ATAPI_READ_CD:
         g_assert_cmpuint(lba, <=, UINT32_MAX);
         stl_be_p(&cbd[2], lba);
         break;
+    case CMD_ATAPI_REQUEST_SENSE:
+    case CMD_ATAPI_TEST_UNIT_READY:
+    case CMD_ATAPI_START_STOP_UNIT:
+        g_assert_cmpuint(lba, ==, 0x00);
+        break;
     default:
         /* SCSI doesn't have uniform packet formats,
          * so you have to add support for it manually. Sorry! */
+        fprintf(stderr, "The Libqos AHCI driver does not support the "
+                "set_offset operation for ATAPI command 0x%02x, "
+                "please add support.\n",
+                cbd[0]);
         g_assert_not_reached();
     }
 }
@@ -951,6 +1033,7 @@ static void ahci_atapi_set_size(AHCICommand *cmd, uint64_t xbytes)
 {
     unsigned char *cbd = cmd->atapi_cmd;
     uint64_t nsectors = xbytes / 2048;
+    uint32_t tmp;
     g_assert(cbd);
 
     switch (cbd[0]) {
@@ -958,9 +1041,28 @@ static void ahci_atapi_set_size(AHCICommand *cmd, uint64_t xbytes)
         g_assert_cmpuint(nsectors, <=, UINT16_MAX);
         stw_be_p(&cbd[7], nsectors);
         break;
+    case CMD_ATAPI_READ_CD:
+        /* 24bit BE store */
+        g_assert_cmpuint(nsectors, <, 1ULL << 24);
+        tmp = nsectors;
+        cbd[6] = (tmp & 0xFF0000) >> 16;
+        cbd[7] = (tmp & 0xFF00) >> 8;
+        cbd[8] = (tmp & 0xFF);
+        break;
+    case CMD_ATAPI_REQUEST_SENSE:
+        g_assert_cmpuint(xbytes, <=, UINT8_MAX);
+        cbd[4] = (uint8_t)xbytes;
+        break;
+    case CMD_ATAPI_TEST_UNIT_READY:
+    case CMD_ATAPI_START_STOP_UNIT:
+        g_assert_cmpuint(xbytes, ==, 0);
+        break;
     default:
         /* SCSI doesn't have uniform packet formats,
          * so you have to add support for it manually. Sorry! */
+        fprintf(stderr, "The Libqos AHCI driver does not support the set_size "
+                "operation for ATAPI command 0x%02x, please add support.\n",
+                cbd[0]);
         g_assert_not_reached();
     }
 }
@@ -1105,7 +1207,7 @@ void ahci_command_verify(AHCIQState *ahci, AHCICommand *cmd)
     uint8_t slot = cmd->slot;
     uint8_t port = cmd->port;
 
-    ahci_port_check_error(ahci, port);
+    ahci_port_check_error(ahci, port, cmd->interrupts, cmd->errors);
     ahci_port_check_interrupts(ahci, port, cmd->interrupts);
     ahci_port_check_nonbusy(ahci, port, slot);
     ahci_port_check_cmd_sanity(ahci, cmd);
diff --git a/tests/libqos/ahci.h b/tests/libqos/ahci.h
index c69fb5ae90..5f9627bb0f 100644
--- a/tests/libqos/ahci.h
+++ b/tests/libqos/ahci.h
@@ -287,7 +287,22 @@ enum {
 
 /* ATAPI Commands */
 enum {
-    CMD_ATAPI_READ_10 = 0x28,
+    CMD_ATAPI_TEST_UNIT_READY = 0x00,
+    CMD_ATAPI_REQUEST_SENSE   = 0x03,
+    CMD_ATAPI_START_STOP_UNIT = 0x1b,
+    CMD_ATAPI_READ_10         = 0x28,
+    CMD_ATAPI_READ_CD         = 0xbe,
+};
+
+enum {
+    SENSE_NO_SENSE       = 0x00,
+    SENSE_NOT_READY      = 0x02,
+    SENSE_UNIT_ATTENTION = 0x06,
+};
+
+enum {
+    ASC_MEDIUM_MAY_HAVE_CHANGED = 0x28,
+    ASC_MEDIUM_NOT_PRESENT      = 0x3a,
 };
 
 /* AHCI Command Header Flags & Masks*/
@@ -321,12 +336,13 @@ typedef struct AHCIPortQState {
 typedef struct AHCIQState {
     QOSState *parent;
     QPCIDevice *dev;
-    void *hba_base;
+    QPCIBar hba_bar;
     uint64_t barsize;
     uint32_t fingerprint;
     uint32_t cap;
     uint32_t cap2;
     AHCIPortQState port[32];
+    bool enabled;
 } AHCIQState;
 
 /**
@@ -461,12 +477,14 @@ typedef struct AHCICommand AHCICommand;
 
 /* Options to ahci_exec */
 typedef struct AHCIOpts {
-    size_t size;
-    unsigned prd_size;
-    uint64_t lba;
-    uint64_t buffer;
-    bool atapi;
-    bool atapi_dma;
+    size_t size;        /* Size of transfer */
+    unsigned prd_size;  /* Size per-each PRD */
+    bool set_bcl;       /* Override the default BCL of ATAPI_SECTOR_SIZE */
+    unsigned bcl;       /* Byte Count Limit, for ATAPI PIO */
+    uint64_t lba;       /* Starting LBA offset */
+    uint64_t buffer;    /* Pointer to source or destination guest buffer */
+    bool atapi;         /* ATAPI command? */
+    bool atapi_dma;     /* Use DMA for ATAPI? */
     bool error;
     int (*pre_cb)(AHCIQState*, AHCICommand*, const struct AHCIOpts *);
     int (*mid_cb)(AHCIQState*, AHCICommand*, const struct AHCIOpts *);
@@ -488,12 +506,12 @@ typedef struct AHCIOpts {
 
 static inline uint32_t ahci_mread(AHCIQState *ahci, size_t offset)
 {
-    return qpci_io_readl(ahci->dev, ahci->hba_base + offset);
+    return qpci_io_readl(ahci->dev, ahci->hba_bar, offset);
 }
 
 static inline void ahci_mwrite(AHCIQState *ahci, size_t offset, uint32_t value)
 {
-    qpci_io_writel(ahci->dev, ahci->hba_base + offset, value);
+    qpci_io_writel(ahci->dev, ahci->hba_bar, offset, value);
 }
 
 static inline uint32_t ahci_rreg(AHCIQState *ahci, uint32_t reg_num)
@@ -572,7 +590,8 @@ void ahci_set_command_header(AHCIQState *ahci, uint8_t port,
 void ahci_destroy_command(AHCIQState *ahci, uint8_t port, uint8_t slot);
 
 /* AHCI sanity check routines */
-void ahci_port_check_error(AHCIQState *ahci, uint8_t port);
+void ahci_port_check_error(AHCIQState *ahci, uint8_t port,
+                           uint32_t imask, uint8_t emask);
 void ahci_port_check_interrupts(AHCIQState *ahci, uint8_t port,
                                 uint32_t intr_mask);
 void ahci_port_check_nonbusy(AHCIQState *ahci, uint8_t port, uint8_t slot);
@@ -595,10 +614,16 @@ void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
              void *buffer, size_t bufsize, uint64_t sector);
 void ahci_exec(AHCIQState *ahci, uint8_t port,
                uint8_t op, const AHCIOpts *opts);
+void ahci_atapi_test_ready(AHCIQState *ahci, uint8_t port, bool ready,
+                           uint8_t expected_sense);
+void ahci_atapi_get_sense(AHCIQState *ahci, uint8_t port,
+                          uint8_t *sense, uint8_t *asc);
+void ahci_atapi_eject(AHCIQState *ahci, uint8_t port);
+void ahci_atapi_load(AHCIQState *ahci, uint8_t port);
 
 /* Command: Fine-grained lifecycle */
 AHCICommand *ahci_command_create(uint8_t command_name);
-AHCICommand *ahci_atapi_command_create(uint8_t scsi_cmd);
+AHCICommand *ahci_atapi_command_create(uint8_t scsi_cmd, uint16_t bcl);
 void ahci_command_commit(AHCIQState *ahci, AHCICommand *cmd, uint8_t port);
 void ahci_command_issue(AHCIQState *ahci, AHCICommand *cmd);
 void ahci_command_issue_async(AHCIQState *ahci, AHCICommand *cmd);
diff --git a/tests/libqos/libqos-pc.c b/tests/libqos/libqos-pc.c
index 72b5e3ba09..b554758802 100644
--- a/tests/libqos/libqos-pc.c
+++ b/tests/libqos/libqos-pc.c
@@ -1,10 +1,14 @@
 #include "qemu/osdep.h"
 #include "libqos/libqos-pc.h"
 #include "libqos/malloc-pc.h"
+#include "libqos/pci-pc.h"
 
 static QOSOps qos_ops = {
     .init_allocator = pc_alloc_init_flags,
-    .uninit_allocator = pc_alloc_uninit
+    .uninit_allocator = pc_alloc_uninit,
+    .qpci_init = qpci_init_pc,
+    .qpci_free = qpci_free_pc,
+    .shutdown = qtest_pc_shutdown,
 };
 
 QOSState *qtest_pc_vboot(const char *cmdline_fmt, va_list ap)
@@ -21,10 +25,12 @@ QOSState *qtest_pc_boot(const char *cmdline_fmt, ...)
     qs = qtest_vboot(&qos_ops, cmdline_fmt, ap);
     va_end(ap);
 
+    qtest_irq_intercept_in(global_qtest, "ioapic");
+
     return qs;
 }
 
 void qtest_pc_shutdown(QOSState *qs)
 {
-    return qtest_shutdown(qs);
+    return qtest_common_shutdown(qs);
 }
diff --git a/tests/libqos/libqos-spapr.c b/tests/libqos/libqos-spapr.c
new file mode 100644
index 0000000000..a37791e5d0
--- /dev/null
+++ b/tests/libqos/libqos-spapr.c
@@ -0,0 +1,34 @@
+#include "qemu/osdep.h"
+#include "libqos/libqos-spapr.h"
+#include "libqos/malloc-spapr.h"
+#include "libqos/pci-spapr.h"
+
+static QOSOps qos_ops = {
+    .init_allocator = spapr_alloc_init_flags,
+    .uninit_allocator = spapr_alloc_uninit,
+    .qpci_init = qpci_init_spapr,
+    .qpci_free = qpci_free_spapr,
+    .shutdown = qtest_spapr_shutdown,
+};
+
+QOSState *qtest_spapr_vboot(const char *cmdline_fmt, va_list ap)
+{
+    return qtest_vboot(&qos_ops, cmdline_fmt, ap);
+}
+
+QOSState *qtest_spapr_boot(const char *cmdline_fmt, ...)
+{
+    QOSState *qs;
+    va_list ap;
+
+    va_start(ap, cmdline_fmt);
+    qs = qtest_vboot(&qos_ops, cmdline_fmt, ap);
+    va_end(ap);
+
+    return qs;
+}
+
+void qtest_spapr_shutdown(QOSState *qs)
+{
+    return qtest_common_shutdown(qs);
+}
diff --git a/tests/libqos/libqos-spapr.h b/tests/libqos/libqos-spapr.h
new file mode 100644
index 0000000000..dcb5c43ad3
--- /dev/null
+++ b/tests/libqos/libqos-spapr.h
@@ -0,0 +1,10 @@
+#ifndef LIBQOS_SPAPR_H
+#define LIBQOS_SPAPR_H
+
+#include "libqos/libqos.h"
+
+QOSState *qtest_spapr_vboot(const char *cmdline_fmt, va_list ap);
+QOSState *qtest_spapr_boot(const char *cmdline_fmt, ...);
+void qtest_spapr_shutdown(QOSState *qs);
+
+#endif
diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c
index c7ba441d0b..6226546c28 100644
--- a/tests/libqos/libqos.c
+++ b/tests/libqos/libqos.c
@@ -10,6 +10,8 @@
 /**
  * Launch QEMU with the given command line,
  * and then set up interrupts and our guest malloc interface.
+ * Never returns NULL:
+ * Terminates the application in case an error is encountered.
  */
 QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap)
 {
@@ -20,9 +22,13 @@ QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap)
     cmdline = g_strdup_vprintf(cmdline_fmt, ap);
     qs->qts = qtest_start(cmdline);
     qs->ops = ops;
-    qtest_irq_intercept_in(global_qtest, "ioapic");
-    if (ops && ops->init_allocator) {
-        qs->alloc = ops->init_allocator(ALLOC_NO_FLAGS);
+    if (ops) {
+        if (ops->init_allocator) {
+            qs->alloc = ops->init_allocator(ALLOC_NO_FLAGS);
+        }
+        if (ops->qpci_init && qs->alloc) {
+            qs->pcibus = ops->qpci_init(qs->alloc);
+        }
     }
 
     g_free(cmdline);
@@ -48,16 +54,31 @@ QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...)
 /**
  * Tear down the QEMU instance.
  */
-void qtest_shutdown(QOSState *qs)
+void qtest_common_shutdown(QOSState *qs)
 {
-    if (qs->alloc && qs->ops && qs->ops->uninit_allocator) {
-        qs->ops->uninit_allocator(qs->alloc);
-        qs->alloc = NULL;
+    if (qs->ops) {
+        if (qs->pcibus && qs->ops->qpci_free) {
+            qs->ops->qpci_free(qs->pcibus);
+            qs->pcibus = NULL;
+        }
+        if (qs->alloc && qs->ops->uninit_allocator) {
+            qs->ops->uninit_allocator(qs->alloc);
+            qs->alloc = NULL;
+        }
     }
     qtest_quit(qs->qts);
     g_free(qs);
 }
 
+void qtest_shutdown(QOSState *qs)
+{
+    if (qs->ops && qs->ops->shutdown) {
+        qs->ops->shutdown(qs);
+    } else {
+        qtest_common_shutdown(qs);
+    }
+}
+
 void set_context(QOSState *s)
 {
     global_qtest = s->qts;
diff --git a/tests/libqos/libqos.h b/tests/libqos/libqos.h
index 604980d125..231969766f 100644
--- a/tests/libqos/libqos.h
+++ b/tests/libqos/libqos.h
@@ -5,19 +5,26 @@
 #include "libqos/pci.h"
 #include "libqos/malloc-pc.h"
 
+typedef struct QOSState QOSState;
+
 typedef struct QOSOps {
     QGuestAllocator *(*init_allocator)(QAllocOpts);
     void (*uninit_allocator)(QGuestAllocator *);
+    QPCIBus *(*qpci_init)(QGuestAllocator *alloc);
+    void (*qpci_free)(QPCIBus *bus);
+    void (*shutdown)(QOSState *);
 } QOSOps;
 
-typedef struct QOSState {
+struct QOSState {
     QTestState *qts;
     QGuestAllocator *alloc;
+    QPCIBus *pcibus;
     QOSOps *ops;
-} QOSState;
+};
 
 QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap);
 QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...);
+void qtest_common_shutdown(QOSState *qs);
 void qtest_shutdown(QOSState *qs);
 bool have_qemu_img(void);
 void mkimg(const char *file, const char *fmt, unsigned size_mb);
diff --git a/tests/libqos/malloc-spapr.c b/tests/libqos/malloc-spapr.c
new file mode 100644
index 0000000000..006404af33
--- /dev/null
+++ b/tests/libqos/malloc-spapr.c
@@ -0,0 +1,38 @@
+/*
+ * libqos malloc support for SPAPR
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqos/malloc-spapr.h"
+
+#include "qemu-common.h"
+
+#define PAGE_SIZE 4096
+
+/* Memory must be a multiple of 256 MB,
+ * so we have at least 256MB
+ */
+#define SPAPR_MIN_SIZE 0x10000000
+
+void spapr_alloc_uninit(QGuestAllocator *allocator)
+{
+    alloc_uninit(allocator);
+}
+
+QGuestAllocator *spapr_alloc_init_flags(QAllocOpts flags)
+{
+    QGuestAllocator *s;
+
+    s = alloc_init_flags(flags, 1 << 20, SPAPR_MIN_SIZE);
+    alloc_set_page_size(s, PAGE_SIZE);
+
+    return s;
+}
+
+QGuestAllocator *spapr_alloc_init(void)
+{
+    return spapr_alloc_init_flags(ALLOC_NO_FLAGS);
+}
diff --git a/tests/libqos/malloc-spapr.h b/tests/libqos/malloc-spapr.h
new file mode 100644
index 0000000000..64d0e770d1
--- /dev/null
+++ b/tests/libqos/malloc-spapr.h
@@ -0,0 +1,17 @@
+/*
+ * libqos malloc support for SPAPR
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBQOS_MALLOC_SPAPR_H
+#define LIBQOS_MALLOC_SPAPR_H
+
+#include "libqos/malloc.h"
+
+QGuestAllocator *spapr_alloc_init(void);
+QGuestAllocator *spapr_alloc_init_flags(QAllocOpts flags);
+void spapr_alloc_uninit(QGuestAllocator *allocator);
+
+#endif
diff --git a/tests/libqos/pci-pc.c b/tests/libqos/pci-pc.c
index 1ae2d3780f..ded1c54c06 100644
--- a/tests/libqos/pci-pc.c
+++ b/tests/libqos/pci-pc.c
@@ -17,7 +17,6 @@
 #include "hw/pci/pci_regs.h"
 
 #include "qemu-common.h"
-#include "qemu/host-utils.h"
 
 
 #define ACPI_PCIHP_ADDR         0xae00
@@ -26,89 +25,58 @@
 typedef struct QPCIBusPC
 {
     QPCIBus bus;
-
-    uint32_t pci_hole_start;
-    uint32_t pci_hole_size;
-    uint32_t pci_hole_alloc;
-
-    uint16_t pci_iohole_start;
-    uint16_t pci_iohole_size;
-    uint16_t pci_iohole_alloc;
 } QPCIBusPC;
 
-static uint8_t qpci_pc_io_readb(QPCIBus *bus, void *addr)
+static uint8_t qpci_pc_pio_readb(QPCIBus *bus, uint32_t addr)
 {
-    uintptr_t port = (uintptr_t)addr;
-    uint8_t value;
-
-    if (port < 0x10000) {
-        value = inb(port);
-    } else {
-        value = readb(port);
-    }
-
-    return value;
+    return inb(addr);
 }
 
-static uint16_t qpci_pc_io_readw(QPCIBus *bus, void *addr)
+static void qpci_pc_pio_writeb(QPCIBus *bus, uint32_t addr, uint8_t val)
 {
-    uintptr_t port = (uintptr_t)addr;
-    uint16_t value;
-
-    if (port < 0x10000) {
-        value = inw(port);
-    } else {
-        value = readw(port);
-    }
-
-    return value;
+    outb(addr, val);
 }
 
-static uint32_t qpci_pc_io_readl(QPCIBus *bus, void *addr)
+static uint16_t qpci_pc_pio_readw(QPCIBus *bus, uint32_t addr)
 {
-    uintptr_t port = (uintptr_t)addr;
-    uint32_t value;
-
-    if (port < 0x10000) {
-        value = inl(port);
-    } else {
-        value = readl(port);
-    }
+    return inw(addr);
+}
 
-    return value;
+static void qpci_pc_pio_writew(QPCIBus *bus, uint32_t addr, uint16_t val)
+{
+    outw(addr, val);
 }
 
-static void qpci_pc_io_writeb(QPCIBus *bus, void *addr, uint8_t value)
+static uint32_t qpci_pc_pio_readl(QPCIBus *bus, uint32_t addr)
 {
-    uintptr_t port = (uintptr_t)addr;
+    return inl(addr);
+}
 
-    if (port < 0x10000) {
-        outb(port, value);
-    } else {
-        writeb(port, value);
-    }
+static void qpci_pc_pio_writel(QPCIBus *bus, uint32_t addr, uint32_t val)
+{
+    outl(addr, val);
 }
 
-static void qpci_pc_io_writew(QPCIBus *bus, void *addr, uint16_t value)
+static uint64_t qpci_pc_pio_readq(QPCIBus *bus, uint32_t addr)
 {
-    uintptr_t port = (uintptr_t)addr;
+    return (uint64_t)inl(addr) + ((uint64_t)inl(addr + 4) << 32);
+}
 
-    if (port < 0x10000) {
-        outw(port, value);
-    } else {
-        writew(port, value);
-    }
+static void qpci_pc_pio_writeq(QPCIBus *bus, uint32_t addr, uint64_t val)
+{
+    outl(addr, val & 0xffffffff);
+    outl(addr + 4, val >> 32);
 }
 
-static void qpci_pc_io_writel(QPCIBus *bus, void *addr, uint32_t value)
+static void qpci_pc_memread(QPCIBus *bus, uint32_t addr, void *buf, size_t len)
 {
-    uintptr_t port = (uintptr_t)addr;
+    memread(addr, buf, len);
+}
 
-    if (port < 0x10000) {
-        outl(port, value);
-    } else {
-        writel(port, value);
-    }
+static void qpci_pc_memwrite(QPCIBus *bus, uint32_t addr,
+                             const void *buf, size_t len)
+{
+    memwrite(addr, buf, len);
 }
 
 static uint8_t qpci_pc_config_readb(QPCIBus *bus, int devfn, uint8_t offset)
@@ -147,84 +115,24 @@ static void qpci_pc_config_writel(QPCIBus *bus, int devfn, uint8_t offset, uint3
     outl(0xcfc, value);
 }
 
-static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *sizeptr)
-{
-    QPCIBusPC *s = container_of(bus, QPCIBusPC, bus);
-    static const int bar_reg_map[] = {
-        PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1, PCI_BASE_ADDRESS_2,
-        PCI_BASE_ADDRESS_3, PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5,
-    };
-    int bar_reg;
-    uint32_t addr;
-    uint64_t size;
-    uint32_t io_type;
-
-    g_assert(barno >= 0 && barno <= 5);
-    bar_reg = bar_reg_map[barno];
-
-    qpci_config_writel(dev, bar_reg, 0xFFFFFFFF);
-    addr = qpci_config_readl(dev, bar_reg);
-
-    io_type = addr & PCI_BASE_ADDRESS_SPACE;
-    if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
-        addr &= PCI_BASE_ADDRESS_IO_MASK;
-    } else {
-        addr &= PCI_BASE_ADDRESS_MEM_MASK;
-    }
-
-    size = (1ULL << ctzl(addr));
-    if (size == 0) {
-        return NULL;
-    }
-    if (sizeptr) {
-        *sizeptr = size;
-    }
-
-    if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
-        uint16_t loc;
-
-        g_assert(QEMU_ALIGN_UP(s->pci_iohole_alloc, size) + size
-                 <= s->pci_iohole_size);
-        s->pci_iohole_alloc = QEMU_ALIGN_UP(s->pci_iohole_alloc, size);
-        loc = s->pci_iohole_start + s->pci_iohole_alloc;
-        s->pci_iohole_alloc += size;
-
-        qpci_config_writel(dev, bar_reg, loc | PCI_BASE_ADDRESS_SPACE_IO);
-
-        return (void *)(intptr_t)loc;
-    } else {
-        uint64_t loc;
-
-        g_assert(QEMU_ALIGN_UP(s->pci_hole_alloc, size) + size
-                 <= s->pci_hole_size);
-        s->pci_hole_alloc = QEMU_ALIGN_UP(s->pci_hole_alloc, size);
-        loc = s->pci_hole_start + s->pci_hole_alloc;
-        s->pci_hole_alloc += size;
-
-        qpci_config_writel(dev, bar_reg, loc);
-
-        return (void *)(intptr_t)loc;
-    }
-}
-
-static void qpci_pc_iounmap(QPCIBus *bus, void *data)
-{
-    /* FIXME */
-}
-
-QPCIBus *qpci_init_pc(void)
+QPCIBus *qpci_init_pc(QGuestAllocator *alloc)
 {
     QPCIBusPC *ret;
 
     ret = g_malloc(sizeof(*ret));
 
-    ret->bus.io_readb = qpci_pc_io_readb;
-    ret->bus.io_readw = qpci_pc_io_readw;
-    ret->bus.io_readl = qpci_pc_io_readl;
+    ret->bus.pio_readb = qpci_pc_pio_readb;
+    ret->bus.pio_readw = qpci_pc_pio_readw;
+    ret->bus.pio_readl = qpci_pc_pio_readl;
+    ret->bus.pio_readq = qpci_pc_pio_readq;
 
-    ret->bus.io_writeb = qpci_pc_io_writeb;
-    ret->bus.io_writew = qpci_pc_io_writew;
-    ret->bus.io_writel = qpci_pc_io_writel;
+    ret->bus.pio_writeb = qpci_pc_pio_writeb;
+    ret->bus.pio_writew = qpci_pc_pio_writew;
+    ret->bus.pio_writel = qpci_pc_pio_writel;
+    ret->bus.pio_writeq = qpci_pc_pio_writeq;
+
+    ret->bus.memread = qpci_pc_memread;
+    ret->bus.memwrite = qpci_pc_memwrite;
 
     ret->bus.config_readb = qpci_pc_config_readb;
     ret->bus.config_readw = qpci_pc_config_readw;
@@ -234,16 +142,9 @@ QPCIBus *qpci_init_pc(void)
     ret->bus.config_writew = qpci_pc_config_writew;
     ret->bus.config_writel = qpci_pc_config_writel;
 
-    ret->bus.iomap = qpci_pc_iomap;
-    ret->bus.iounmap = qpci_pc_iounmap;
-
-    ret->pci_hole_start = 0xE0000000;
-    ret->pci_hole_size = 0x20000000;
-    ret->pci_hole_alloc = 0;
-
-    ret->pci_iohole_start = 0xc000;
-    ret->pci_iohole_size = 0x4000;
-    ret->pci_iohole_alloc = 0;
+    ret->bus.pio_alloc_ptr = 0xc000;
+    ret->bus.mmio_alloc_ptr = 0xE0000000;
+    ret->bus.mmio_limit = 0x100000000ULL;
 
     return &ret->bus;
 }
@@ -255,28 +156,6 @@ void qpci_free_pc(QPCIBus *bus)
     g_free(s);
 }
 
-void qpci_plug_device_test(const char *driver, const char *id,
-                           uint8_t slot, const char *opts)
-{
-    QDict *response;
-    char *cmd;
-
-    cmd = g_strdup_printf("{'execute': 'device_add',"
-                          " 'arguments': {"
-                          "   'driver': '%s',"
-                          "   'addr': '%d',"
-                          "   %s%s"
-                          "   'id': '%s'"
-                          "}}", driver, slot,
-                          opts ? opts : "", opts ? "," : "",
-                          id);
-    response = qmp(cmd);
-    g_free(cmd);
-    g_assert(response);
-    g_assert(!qdict_haskey(response, "error"));
-    QDECREF(response);
-}
-
 void qpci_unplug_acpi_device_test(const char *id, uint8_t slot)
 {
     QDict *response;
diff --git a/tests/libqos/pci-pc.h b/tests/libqos/pci-pc.h
index 26211790cd..9479b51642 100644
--- a/tests/libqos/pci-pc.h
+++ b/tests/libqos/pci-pc.h
@@ -14,8 +14,9 @@
 #define LIBQOS_PCI_PC_H
 
 #include "libqos/pci.h"
+#include "libqos/malloc.h"
 
-QPCIBus *qpci_init_pc(void);
+QPCIBus *qpci_init_pc(QGuestAllocator *alloc);
 void     qpci_free_pc(QPCIBus *bus);
 
 #endif
diff --git a/tests/libqos/pci-spapr.c b/tests/libqos/pci-spapr.c
new file mode 100644
index 0000000000..1e5d015bd4
--- /dev/null
+++ b/tests/libqos/pci-spapr.c
@@ -0,0 +1,212 @@
+/*
+ * libqos PCI bindings for SPAPR
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/pci-spapr.h"
+#include "libqos/rtas.h"
+
+#include "hw/pci/pci_regs.h"
+
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+
+
+/* From include/hw/pci-host/spapr.h */
+
+typedef struct QPCIWindow {
+    uint64_t pci_base;    /* window address in PCI space */
+    uint64_t size;        /* window size */
+} QPCIWindow;
+
+typedef struct QPCIBusSPAPR {
+    QPCIBus bus;
+    QGuestAllocator *alloc;
+
+    uint64_t buid;
+
+    uint64_t pio_cpu_base;
+    QPCIWindow pio;
+
+    uint64_t mmio32_cpu_base;
+    QPCIWindow mmio32;
+} QPCIBusSPAPR;
+
+/*
+ * PCI devices are always little-endian
+ * SPAPR by default is big-endian
+ * so PCI accessors need to swap data endianness
+ */
+
+static uint8_t qpci_spapr_pio_readb(QPCIBus *bus, uint32_t addr)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    return readb(s->pio_cpu_base + addr);
+}
+
+static void qpci_spapr_pio_writeb(QPCIBus *bus, uint32_t addr, uint8_t val)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    writeb(s->pio_cpu_base + addr, val);
+}
+
+static uint16_t qpci_spapr_pio_readw(QPCIBus *bus, uint32_t addr)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    return bswap16(readw(s->pio_cpu_base + addr));
+}
+
+static void qpci_spapr_pio_writew(QPCIBus *bus, uint32_t addr, uint16_t val)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    writew(s->pio_cpu_base + addr, bswap16(val));
+}
+
+static uint32_t qpci_spapr_pio_readl(QPCIBus *bus, uint32_t addr)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    return bswap32(readl(s->pio_cpu_base + addr));
+}
+
+static void qpci_spapr_pio_writel(QPCIBus *bus, uint32_t addr, uint32_t val)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    writel(s->pio_cpu_base + addr, bswap32(val));
+}
+
+static uint64_t qpci_spapr_pio_readq(QPCIBus *bus, uint32_t addr)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    return bswap64(readq(s->pio_cpu_base + addr));
+}
+
+static void qpci_spapr_pio_writeq(QPCIBus *bus, uint32_t addr, uint64_t val)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    writeq(s->pio_cpu_base + addr, bswap64(val));
+}
+
+static void qpci_spapr_memread(QPCIBus *bus, uint32_t addr,
+                               void *buf, size_t len)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    memread(s->mmio32_cpu_base + addr, buf, len);
+}
+
+static void qpci_spapr_memwrite(QPCIBus *bus, uint32_t addr,
+                                const void *buf, size_t len)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    memwrite(s->mmio32_cpu_base + addr, buf, len);
+}
+
+static uint8_t qpci_spapr_config_readb(QPCIBus *bus, int devfn, uint8_t offset)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 1);
+}
+
+static uint16_t qpci_spapr_config_readw(QPCIBus *bus, int devfn, uint8_t offset)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 2);
+}
+
+static uint32_t qpci_spapr_config_readl(QPCIBus *bus, int devfn, uint8_t offset)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    return qrtas_ibm_read_pci_config(s->alloc, s->buid, config_addr, 4);
+}
+
+static void qpci_spapr_config_writeb(QPCIBus *bus, int devfn, uint8_t offset,
+                                     uint8_t value)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 1, value);
+}
+
+static void qpci_spapr_config_writew(QPCIBus *bus, int devfn, uint8_t offset,
+                                     uint16_t value)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 2, value);
+}
+
+static void qpci_spapr_config_writel(QPCIBus *bus, int devfn, uint8_t offset,
+                                     uint32_t value)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+    uint32_t config_addr = (devfn << 8) | offset;
+    qrtas_ibm_write_pci_config(s->alloc, s->buid, config_addr, 4, value);
+}
+
+#define SPAPR_PCI_BASE               (1ULL << 45)
+
+#define SPAPR_PCI_MMIO32_WIN_SIZE    0x80000000 /* 2 GiB */
+#define SPAPR_PCI_IO_WIN_SIZE        0x10000
+
+QPCIBus *qpci_init_spapr(QGuestAllocator *alloc)
+{
+    QPCIBusSPAPR *ret;
+
+    ret = g_malloc(sizeof(*ret));
+
+    ret->alloc = alloc;
+
+    ret->bus.pio_readb = qpci_spapr_pio_readb;
+    ret->bus.pio_readw = qpci_spapr_pio_readw;
+    ret->bus.pio_readl = qpci_spapr_pio_readl;
+    ret->bus.pio_readq = qpci_spapr_pio_readq;
+
+    ret->bus.pio_writeb = qpci_spapr_pio_writeb;
+    ret->bus.pio_writew = qpci_spapr_pio_writew;
+    ret->bus.pio_writel = qpci_spapr_pio_writel;
+    ret->bus.pio_writeq = qpci_spapr_pio_writeq;
+
+    ret->bus.memread = qpci_spapr_memread;
+    ret->bus.memwrite = qpci_spapr_memwrite;
+
+    ret->bus.config_readb = qpci_spapr_config_readb;
+    ret->bus.config_readw = qpci_spapr_config_readw;
+    ret->bus.config_readl = qpci_spapr_config_readl;
+
+    ret->bus.config_writeb = qpci_spapr_config_writeb;
+    ret->bus.config_writew = qpci_spapr_config_writew;
+    ret->bus.config_writel = qpci_spapr_config_writel;
+
+    /* FIXME: We assume the default location of the PHB for now.
+     * Ideally we'd parse the device tree deposited in the guest to
+     * get the window locations */
+    ret->buid = 0x800000020000000ULL;
+
+    ret->pio_cpu_base = SPAPR_PCI_BASE;
+    ret->pio.pci_base = 0;
+    ret->pio.size = SPAPR_PCI_IO_WIN_SIZE;
+
+    /* 32-bit portion of the MMIO window is at PCI address 2..4 GiB */
+    ret->mmio32_cpu_base = SPAPR_PCI_BASE + SPAPR_PCI_MMIO32_WIN_SIZE;
+    ret->mmio32.pci_base = 0x80000000; /* 2 GiB */
+    ret->mmio32.size = SPAPR_PCI_MMIO32_WIN_SIZE;
+
+    ret->bus.pio_alloc_ptr = 0xc000;
+    ret->bus.mmio_alloc_ptr = ret->mmio32.pci_base;
+    ret->bus.mmio_limit = ret->mmio32.pci_base + ret->mmio32.size;
+
+    return &ret->bus;
+}
+
+void qpci_free_spapr(QPCIBus *bus)
+{
+    QPCIBusSPAPR *s = container_of(bus, QPCIBusSPAPR, bus);
+
+    g_free(s);
+}
diff --git a/tests/libqos/pci-spapr.h b/tests/libqos/pci-spapr.h
new file mode 100644
index 0000000000..4192126d86
--- /dev/null
+++ b/tests/libqos/pci-spapr.h
@@ -0,0 +1,17 @@
+/*
+ * libqos PCI bindings for SPAPR
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBQOS_PCI_SPAPR_H
+#define LIBQOS_PCI_SPAPR_H
+
+#include "libqos/malloc.h"
+#include "libqos/pci.h"
+
+QPCIBus *qpci_init_spapr(QGuestAllocator *alloc);
+void     qpci_free_spapr(QPCIBus *bus);
+
+#endif
diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c
index ed78d91cea..2dcdeade2a 100644
--- a/tests/libqos/pci.c
+++ b/tests/libqos/pci.c
@@ -14,6 +14,7 @@
 #include "libqos/pci.h"
 
 #include "hw/pci/pci_regs.h"
+#include "qemu/host-utils.h"
 
 void qpci_device_foreach(QPCIBus *bus, int vendor_id, int device_id,
                          void (*func)(QPCIDevice *dev, int devfn, void *data),
@@ -103,7 +104,6 @@ void qpci_msix_enable(QPCIDevice *dev)
     uint32_t table;
     uint8_t bir_table;
     uint8_t bir_pba;
-    void *offset;
 
     addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
     g_assert_cmphex(addr, !=, 0);
@@ -113,18 +113,16 @@ void qpci_msix_enable(QPCIDevice *dev)
 
     table = qpci_config_readl(dev, addr + PCI_MSIX_TABLE);
     bir_table = table & PCI_MSIX_FLAGS_BIRMASK;
-    offset = qpci_iomap(dev, bir_table, NULL);
-    dev->msix_table = offset + (table & ~PCI_MSIX_FLAGS_BIRMASK);
+    dev->msix_table_bar = qpci_iomap(dev, bir_table, NULL);
+    dev->msix_table_off = table & ~PCI_MSIX_FLAGS_BIRMASK;
 
     table = qpci_config_readl(dev, addr + PCI_MSIX_PBA);
     bir_pba = table & PCI_MSIX_FLAGS_BIRMASK;
     if (bir_pba != bir_table) {
-        offset = qpci_iomap(dev, bir_pba, NULL);
+        dev->msix_pba_bar = qpci_iomap(dev, bir_pba, NULL);
     }
-    dev->msix_pba = offset + (table & ~PCI_MSIX_FLAGS_BIRMASK);
+    dev->msix_pba_off = table & ~PCI_MSIX_FLAGS_BIRMASK;
 
-    g_assert(dev->msix_table != NULL);
-    g_assert(dev->msix_pba != NULL);
     dev->msix_enabled = true;
 }
 
@@ -140,22 +138,23 @@ void qpci_msix_disable(QPCIDevice *dev)
     qpci_config_writew(dev, addr + PCI_MSIX_FLAGS,
                                                 val & ~PCI_MSIX_FLAGS_ENABLE);
 
-    qpci_iounmap(dev, dev->msix_table);
-    qpci_iounmap(dev, dev->msix_pba);
+    qpci_iounmap(dev, dev->msix_table_bar);
+    qpci_iounmap(dev, dev->msix_pba_bar);
     dev->msix_enabled = 0;
-    dev->msix_table = NULL;
-    dev->msix_pba = NULL;
+    dev->msix_table_off = 0;
+    dev->msix_pba_off = 0;
 }
 
 bool qpci_msix_pending(QPCIDevice *dev, uint16_t entry)
 {
     uint32_t pba_entry;
     uint8_t bit_n = entry % 32;
-    void *addr = dev->msix_pba + (entry / 32) * PCI_MSIX_ENTRY_SIZE / 4;
+    uint64_t  off = (entry / 32) * PCI_MSIX_ENTRY_SIZE / 4;
 
     g_assert(dev->msix_enabled);
-    pba_entry = qpci_io_readl(dev, addr);
-    qpci_io_writel(dev, addr, pba_entry & ~(1 << bit_n));
+    pba_entry = qpci_io_readl(dev, dev->msix_pba_bar, dev->msix_pba_off + off);
+    qpci_io_writel(dev, dev->msix_pba_bar, dev->msix_pba_off + off,
+                   pba_entry & ~(1 << bit_n));
     return (pba_entry & (1 << bit_n)) != 0;
 }
 
@@ -163,7 +162,7 @@ bool qpci_msix_masked(QPCIDevice *dev, uint16_t entry)
 {
     uint8_t addr;
     uint16_t val;
-    void *vector_addr = dev->msix_table + (entry * PCI_MSIX_ENTRY_SIZE);
+    uint64_t vector_off = dev->msix_table_off + entry * PCI_MSIX_ENTRY_SIZE;
 
     g_assert(dev->msix_enabled);
     addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
@@ -173,8 +172,9 @@ bool qpci_msix_masked(QPCIDevice *dev, uint16_t entry)
     if (val & PCI_MSIX_FLAGS_MASKALL) {
         return true;
     } else {
-        return (qpci_io_readl(dev, vector_addr + PCI_MSIX_ENTRY_VECTOR_CTRL)
-                                            & PCI_MSIX_ENTRY_CTRL_MASKBIT) != 0;
+        return (qpci_io_readl(dev, dev->msix_table_bar,
+                              vector_off + PCI_MSIX_ENTRY_VECTOR_CTRL)
+                & PCI_MSIX_ENTRY_CTRL_MASKBIT) != 0;
     }
 }
 
@@ -221,46 +221,194 @@ void qpci_config_writel(QPCIDevice *dev, uint8_t offset, uint32_t value)
     dev->bus->config_writel(dev->bus, dev->devfn, offset, value);
 }
 
+uint8_t qpci_io_readb(QPCIDevice *dev, QPCIBar token, uint64_t off)
+{
+    if (token.addr < QPCI_PIO_LIMIT) {
+        return dev->bus->pio_readb(dev->bus, token.addr + off);
+    } else {
+        uint8_t val;
+        dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val));
+        return val;
+    }
+}
 
-uint8_t qpci_io_readb(QPCIDevice *dev, void *data)
+uint16_t qpci_io_readw(QPCIDevice *dev, QPCIBar token, uint64_t off)
 {
-    return dev->bus->io_readb(dev->bus, data);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        return dev->bus->pio_readw(dev->bus, token.addr + off);
+    } else {
+        uint16_t val;
+        dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val));
+        return le16_to_cpu(val);
+    }
 }
 
-uint16_t qpci_io_readw(QPCIDevice *dev, void *data)
+uint32_t qpci_io_readl(QPCIDevice *dev, QPCIBar token, uint64_t off)
 {
-    return dev->bus->io_readw(dev->bus, data);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        return dev->bus->pio_readl(dev->bus, token.addr + off);
+    } else {
+        uint32_t val;
+        dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val));
+        return le32_to_cpu(val);
+    }
 }
 
-uint32_t qpci_io_readl(QPCIDevice *dev, void *data)
+uint64_t qpci_io_readq(QPCIDevice *dev, QPCIBar token, uint64_t off)
 {
-    return dev->bus->io_readl(dev->bus, data);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        return dev->bus->pio_readq(dev->bus, token.addr + off);
+    } else {
+        uint64_t val;
+        dev->bus->memread(dev->bus, token.addr + off, &val, sizeof(val));
+        return le64_to_cpu(val);
+    }
 }
 
+void qpci_io_writeb(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint8_t value)
+{
+    if (token.addr < QPCI_PIO_LIMIT) {
+        dev->bus->pio_writeb(dev->bus, token.addr + off, value);
+    } else {
+        dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value));
+    }
+}
 
-void qpci_io_writeb(QPCIDevice *dev, void *data, uint8_t value)
+void qpci_io_writew(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint16_t value)
 {
-    dev->bus->io_writeb(dev->bus, data, value);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        dev->bus->pio_writew(dev->bus, token.addr + off, value);
+    } else {
+        value = cpu_to_le16(value);
+        dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value));
+    }
 }
 
-void qpci_io_writew(QPCIDevice *dev, void *data, uint16_t value)
+void qpci_io_writel(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint32_t value)
 {
-    dev->bus->io_writew(dev->bus, data, value);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        dev->bus->pio_writel(dev->bus, token.addr + off, value);
+    } else {
+        value = cpu_to_le32(value);
+        dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value));
+    }
 }
 
-void qpci_io_writel(QPCIDevice *dev, void *data, uint32_t value)
+void qpci_io_writeq(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint64_t value)
 {
-    dev->bus->io_writel(dev->bus, data, value);
+    if (token.addr < QPCI_PIO_LIMIT) {
+        dev->bus->pio_writeq(dev->bus, token.addr + off, value);
+    } else {
+        value = cpu_to_le64(value);
+        dev->bus->memwrite(dev->bus, token.addr + off, &value, sizeof(value));
+    }
 }
 
-void *qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr)
+void qpci_memread(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                  void *buf, size_t len)
 {
-    return dev->bus->iomap(dev->bus, dev, barno, sizeptr);
+    g_assert(token.addr >= QPCI_PIO_LIMIT);
+    dev->bus->memread(dev->bus, token.addr + off, buf, len);
 }
 
-void qpci_iounmap(QPCIDevice *dev, void *data)
+void qpci_memwrite(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                   const void *buf, size_t len)
 {
-    dev->bus->iounmap(dev->bus, data);
+    g_assert(token.addr >= QPCI_PIO_LIMIT);
+    dev->bus->memwrite(dev->bus, token.addr + off, buf, len);
 }
 
+QPCIBar qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr)
+{
+    QPCIBus *bus = dev->bus;
+    static const int bar_reg_map[] = {
+        PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1, PCI_BASE_ADDRESS_2,
+        PCI_BASE_ADDRESS_3, PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5,
+    };
+    QPCIBar bar;
+    int bar_reg;
+    uint32_t addr, size;
+    uint32_t io_type;
+    uint64_t loc;
+
+    g_assert(barno >= 0 && barno <= 5);
+    bar_reg = bar_reg_map[barno];
+
+    qpci_config_writel(dev, bar_reg, 0xFFFFFFFF);
+    addr = qpci_config_readl(dev, bar_reg);
+
+    io_type = addr & PCI_BASE_ADDRESS_SPACE;
+    if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
+        addr &= PCI_BASE_ADDRESS_IO_MASK;
+    } else {
+        addr &= PCI_BASE_ADDRESS_MEM_MASK;
+    }
 
+    g_assert(addr); /* Must have *some* size bits */
+
+    size = 1U << ctz32(addr);
+    if (sizeptr) {
+        *sizeptr = size;
+    }
+
+    if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
+        loc = QEMU_ALIGN_UP(bus->pio_alloc_ptr, size);
+
+        g_assert(loc >= bus->pio_alloc_ptr);
+        g_assert(loc + size <= QPCI_PIO_LIMIT); /* Keep PIO below 64kiB */
+
+        bus->pio_alloc_ptr = loc + size;
+
+        qpci_config_writel(dev, bar_reg, loc | PCI_BASE_ADDRESS_SPACE_IO);
+    } else {
+        loc = QEMU_ALIGN_UP(bus->mmio_alloc_ptr, size);
+
+        /* Check for space */
+        g_assert(loc >= bus->mmio_alloc_ptr);
+        g_assert(loc + size <= bus->mmio_limit);
+
+        bus->mmio_alloc_ptr = loc + size;
+
+        qpci_config_writel(dev, bar_reg, loc);
+    }
+
+    bar.addr = loc;
+    return bar;
+}
+
+void qpci_iounmap(QPCIDevice *dev, QPCIBar bar)
+{
+    /* FIXME */
+}
+
+QPCIBar qpci_legacy_iomap(QPCIDevice *dev, uint16_t addr)
+{
+    QPCIBar bar = { .addr = addr };
+    return bar;
+}
+
+void qpci_plug_device_test(const char *driver, const char *id,
+                           uint8_t slot, const char *opts)
+{
+    QDict *response;
+    char *cmd;
+
+    cmd = g_strdup_printf("{'execute': 'device_add',"
+                          " 'arguments': {"
+                          "   'driver': '%s',"
+                          "   'addr': '%d',"
+                          "   %s%s"
+                          "   'id': '%s'"
+                          "}}", driver, slot,
+                          opts ? opts : "", opts ? "," : "",
+                          id);
+    response = qmp(cmd);
+    g_free(cmd);
+    g_assert(response);
+    g_assert(!qdict_haskey(response, "error"));
+    QDECREF(response);
+}
diff --git a/tests/libqos/pci.h b/tests/libqos/pci.h
index c06add8dbf..ed480614ff 100644
--- a/tests/libqos/pci.h
+++ b/tests/libqos/pci.h
@@ -15,20 +15,27 @@
 
 #include "libqtest.h"
 
+#define QPCI_PIO_LIMIT    0x10000
+
 #define QPCI_DEVFN(dev, fn) (((dev) << 3) | (fn))
 
 typedef struct QPCIDevice QPCIDevice;
 typedef struct QPCIBus QPCIBus;
+typedef struct QPCIBar QPCIBar;
 
-struct QPCIBus
-{
-    uint8_t (*io_readb)(QPCIBus *bus, void *addr);
-    uint16_t (*io_readw)(QPCIBus *bus, void *addr);
-    uint32_t (*io_readl)(QPCIBus *bus, void *addr);
+struct QPCIBus {
+    uint8_t (*pio_readb)(QPCIBus *bus, uint32_t addr);
+    uint16_t (*pio_readw)(QPCIBus *bus, uint32_t addr);
+    uint32_t (*pio_readl)(QPCIBus *bus, uint32_t addr);
+    uint64_t (*pio_readq)(QPCIBus *bus, uint32_t addr);
 
-    void (*io_writeb)(QPCIBus *bus, void *addr, uint8_t value);
-    void (*io_writew)(QPCIBus *bus, void *addr, uint16_t value);
-    void (*io_writel)(QPCIBus *bus, void *addr, uint32_t value);
+    void (*pio_writeb)(QPCIBus *bus, uint32_t addr, uint8_t value);
+    void (*pio_writew)(QPCIBus *bus, uint32_t addr, uint16_t value);
+    void (*pio_writel)(QPCIBus *bus, uint32_t addr, uint32_t value);
+    void (*pio_writeq)(QPCIBus *bus, uint32_t addr, uint64_t value);
+
+    void (*memread)(QPCIBus *bus, uint32_t addr, void *buf, size_t len);
+    void (*memwrite)(QPCIBus *bus, uint32_t addr, const void *buf, size_t len);
 
     uint8_t (*config_readb)(QPCIBus *bus, int devfn, uint8_t offset);
     uint16_t (*config_readw)(QPCIBus *bus, int devfn, uint8_t offset);
@@ -41,8 +48,12 @@ struct QPCIBus
     void (*config_writel)(QPCIBus *bus, int devfn,
                           uint8_t offset, uint32_t value);
 
-    void *(*iomap)(QPCIBus *bus, QPCIDevice *dev, int barno, uint64_t *sizeptr);
-    void (*iounmap)(QPCIBus *bus, void *data);
+    uint16_t pio_alloc_ptr;
+    uint64_t mmio_alloc_ptr, mmio_limit;
+};
+
+struct QPCIBar {
+    uint64_t addr;
 };
 
 struct QPCIDevice
@@ -50,8 +61,8 @@ struct QPCIDevice
     QPCIBus *bus;
     int devfn;
     bool msix_enabled;
-    void *msix_table;
-    void *msix_pba;
+    QPCIBar msix_table_bar, msix_pba_bar;
+    uint64_t msix_table_off, msix_pba_off;
 };
 
 void qpci_device_foreach(QPCIBus *bus, int vendor_id, int device_id,
@@ -75,16 +86,27 @@ void qpci_config_writeb(QPCIDevice *dev, uint8_t offset, uint8_t value);
 void qpci_config_writew(QPCIDevice *dev, uint8_t offset, uint16_t value);
 void qpci_config_writel(QPCIDevice *dev, uint8_t offset, uint32_t value);
 
-uint8_t qpci_io_readb(QPCIDevice *dev, void *data);
-uint16_t qpci_io_readw(QPCIDevice *dev, void *data);
-uint32_t qpci_io_readl(QPCIDevice *dev, void *data);
-
-void qpci_io_writeb(QPCIDevice *dev, void *data, uint8_t value);
-void qpci_io_writew(QPCIDevice *dev, void *data, uint16_t value);
-void qpci_io_writel(QPCIDevice *dev, void *data, uint32_t value);
-
-void *qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr);
-void qpci_iounmap(QPCIDevice *dev, void *data);
+uint8_t qpci_io_readb(QPCIDevice *dev, QPCIBar token, uint64_t off);
+uint16_t qpci_io_readw(QPCIDevice *dev, QPCIBar token, uint64_t off);
+uint32_t qpci_io_readl(QPCIDevice *dev, QPCIBar token, uint64_t off);
+uint64_t qpci_io_readq(QPCIDevice *dev, QPCIBar token, uint64_t off);
+
+void qpci_io_writeb(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint8_t value);
+void qpci_io_writew(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint16_t value);
+void qpci_io_writel(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint32_t value);
+void qpci_io_writeq(QPCIDevice *dev, QPCIBar token, uint64_t off,
+                    uint64_t value);
+
+void qpci_memread(QPCIDevice *bus, QPCIBar token, uint64_t off,
+                  void *buf, size_t len);
+void qpci_memwrite(QPCIDevice *bus, QPCIBar token, uint64_t off,
+                   const void *buf, size_t len);
+QPCIBar qpci_iomap(QPCIDevice *dev, int barno, uint64_t *sizeptr);
+void qpci_iounmap(QPCIDevice *dev, QPCIBar addr);
+QPCIBar qpci_legacy_iomap(QPCIDevice *dev, uint16_t addr);
 
 void qpci_plug_device_test(const char *driver, const char *id,
                            uint8_t slot, const char *opts);
diff --git a/tests/libqos/rtas.c b/tests/libqos/rtas.c
new file mode 100644
index 0000000000..0269803ce0
--- /dev/null
+++ b/tests/libqos/rtas.c
@@ -0,0 +1,116 @@
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/rtas.h"
+
+static void qrtas_copy_args(uint64_t target_args, uint32_t nargs,
+                            uint32_t *args)
+{
+    int i;
+
+    for (i = 0; i < nargs; i++) {
+        writel(target_args + i * sizeof(uint32_t), args[i]);
+    }
+}
+
+static void qrtas_copy_ret(uint64_t target_ret, uint32_t nret, uint32_t *ret)
+{
+    int i;
+
+    for (i = 0; i < nret; i++) {
+        ret[i] = readl(target_ret + i * sizeof(uint32_t));
+    }
+}
+
+static uint64_t qrtas_call(QGuestAllocator *alloc, const char *name,
+                           uint32_t nargs, uint32_t *args,
+                           uint32_t nret, uint32_t *ret)
+{
+    uint64_t res;
+    uint64_t target_args, target_ret;
+
+    target_args = guest_alloc(alloc, nargs * sizeof(uint32_t));
+    target_ret = guest_alloc(alloc, nret * sizeof(uint32_t));
+
+    qrtas_copy_args(target_args, nargs, args);
+    res = qtest_rtas_call(global_qtest, name,
+                          nargs, target_args, nret, target_ret);
+    qrtas_copy_ret(target_ret, nret, ret);
+
+    guest_free(alloc, target_ret);
+    guest_free(alloc, target_args);
+
+    return res;
+}
+
+int qrtas_get_time_of_day(QGuestAllocator *alloc, struct tm *tm, uint32_t *ns)
+{
+    int res;
+    uint32_t ret[8];
+
+    res = qrtas_call(alloc, "get-time-of-day", 0, NULL, 8, ret);
+    if (res != 0) {
+        return res;
+    }
+
+    res = ret[0];
+    memset(tm, 0, sizeof(*tm));
+    tm->tm_year = ret[1] - 1900;
+    tm->tm_mon = ret[2] - 1;
+    tm->tm_mday = ret[3];
+    tm->tm_hour = ret[4];
+    tm->tm_min = ret[5];
+    tm->tm_sec = ret[6];
+    *ns = ret[7];
+
+    return res;
+}
+
+uint32_t qrtas_ibm_read_pci_config(QGuestAllocator *alloc, uint64_t buid,
+                                   uint32_t addr, uint32_t size)
+{
+    int res;
+    uint32_t args[4], ret[2];
+
+    args[0] = addr;
+    args[1] = buid >> 32;
+    args[2] = buid & 0xffffffff;
+    args[3] = size;
+    res = qrtas_call(alloc, "ibm,read-pci-config", 4, args, 2, ret);
+    if (res != 0) {
+        return -1;
+    }
+
+    if (ret[0] != 0) {
+        return -1;
+    }
+
+    return ret[1];
+}
+
+int qrtas_ibm_write_pci_config(QGuestAllocator *alloc, uint64_t buid,
+                               uint32_t addr, uint32_t size, uint32_t val)
+{
+    int res;
+    uint32_t args[5], ret[1];
+
+    args[0] = addr;
+    args[1] = buid >> 32;
+    args[2] = buid & 0xffffffff;
+    args[3] = size;
+    args[4] = val;
+    res = qrtas_call(alloc, "ibm,write-pci-config", 5, args, 1, ret);
+    if (res != 0) {
+        return -1;
+    }
+
+    if (ret[0] != 0) {
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/tests/libqos/rtas.h b/tests/libqos/rtas.h
new file mode 100644
index 0000000000..498eb19230
--- /dev/null
+++ b/tests/libqos/rtas.h
@@ -0,0 +1,15 @@
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBQOS_RTAS_H
+#define LIBQOS_RTAS_H
+#include "libqos/malloc.h"
+
+int qrtas_get_time_of_day(QGuestAllocator *alloc, struct tm *tm, uint32_t *ns);
+uint32_t qrtas_ibm_read_pci_config(QGuestAllocator *alloc, uint64_t buid,
+                                   uint32_t addr, uint32_t size);
+int qrtas_ibm_write_pci_config(QGuestAllocator *alloc, uint64_t buid,
+                               uint32_t addr, uint32_t size, uint32_t val);
+#endif /* LIBQOS_RTAS_H */
diff --git a/tests/libqos/usb.c b/tests/libqos/usb.c
index f794d92da5..72d7a961fe 100644
--- a/tests/libqos/usb.c
+++ b/tests/libqos/usb.c
@@ -21,14 +21,12 @@ void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc, uint32_t devfn, int bar)
     hc->dev = qpci_device_find(pcibus, devfn);
     g_assert(hc->dev != NULL);
     qpci_device_enable(hc->dev);
-    hc->base = qpci_iomap(hc->dev, bar, NULL);
-    g_assert(hc->base != NULL);
+    hc->bar = qpci_iomap(hc->dev, bar, NULL);
 }
 
 void uhci_port_test(struct qhc *hc, int port, uint16_t expect)
 {
-    void *addr = hc->base + 0x10 + 2 * port;
-    uint16_t value = qpci_io_readw(hc->dev, addr);
+    uint16_t value = qpci_io_readw(hc->dev, hc->bar, 0x10 + 2 * port);
     uint16_t mask = ~(UHCI_PORT_WRITE_CLEAR | UHCI_PORT_RSVD1);
 
     g_assert((value & mask) == (expect & mask));
diff --git a/tests/libqos/usb.h b/tests/libqos/usb.h
index 8fe56872b7..423dcfd82f 100644
--- a/tests/libqos/usb.h
+++ b/tests/libqos/usb.h
@@ -5,7 +5,7 @@
 
 struct qhc {
     QPCIDevice *dev;
-    void *base;
+    QPCIBar bar;
 };
 
 void qusb_pci_init_one(QPCIBus *pcibus, struct qhc *hc,
diff --git a/tests/libqos/virtio-mmio.c b/tests/libqos/virtio-mmio.c
index 0cab38f296..7aa8383338 100644
--- a/tests/libqos/virtio-mmio.c
+++ b/tests/libqos/virtio-mmio.c
@@ -15,28 +15,28 @@
 #include "libqos/malloc-generic.h"
 #include "standard-headers/linux/virtio_ring.h"
 
-static uint8_t qvirtio_mmio_config_readb(QVirtioDevice *d, uint64_t addr)
+static uint8_t qvirtio_mmio_config_readb(QVirtioDevice *d, uint64_t off)
 {
     QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d;
-    return readb(dev->addr + addr);
+    return readb(dev->addr + QVIRTIO_MMIO_DEVICE_SPECIFIC + off);
 }
 
-static uint16_t qvirtio_mmio_config_readw(QVirtioDevice *d, uint64_t addr)
+static uint16_t qvirtio_mmio_config_readw(QVirtioDevice *d, uint64_t off)
 {
     QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d;
-    return readw(dev->addr + addr);
+    return readw(dev->addr + QVIRTIO_MMIO_DEVICE_SPECIFIC + off);
 }
 
-static uint32_t qvirtio_mmio_config_readl(QVirtioDevice *d, uint64_t addr)
+static uint32_t qvirtio_mmio_config_readl(QVirtioDevice *d, uint64_t off)
 {
     QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d;
-    return readl(dev->addr + addr);
+    return readl(dev->addr + QVIRTIO_MMIO_DEVICE_SPECIFIC + off);
 }
 
-static uint64_t qvirtio_mmio_config_readq(QVirtioDevice *d, uint64_t addr)
+static uint64_t qvirtio_mmio_config_readq(QVirtioDevice *d, uint64_t off)
 {
     QVirtioMMIODevice *dev = (QVirtioMMIODevice *)d;
-    return readq(dev->addr + addr);
+    return readq(dev->addr + QVIRTIO_MMIO_DEVICE_SPECIFIC + off);
 }
 
 static uint32_t qvirtio_mmio_get_features(QVirtioDevice *d)
@@ -199,6 +199,7 @@ QVirtioMMIODevice *qvirtio_mmio_init_device(uint64_t addr, uint32_t page_size)
     dev->addr = addr;
     dev->page_size = page_size;
     dev->vdev.device_type = readl(addr + QVIRTIO_MMIO_DEVICE_ID);
+    dev->vdev.bus = &qvirtio_mmio;
 
     writel(addr + QVIRTIO_MMIO_GUEST_PAGE_SIZE, page_size);
 
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index 18b92b95dc..d4bf841f23 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -62,73 +62,87 @@ static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
     *vpcidev = (QVirtioPCIDevice *)d;
 }
 
-static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, uint64_t addr)
+#define CONFIG_BASE(dev) (VIRTIO_PCI_CONFIG_OFF((dev)->pdev->msix_enabled))
+
+static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, uint64_t off)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readb(dev->pdev, (void *)(uintptr_t)addr);
+    return qpci_io_readb(dev->pdev, dev->bar, CONFIG_BASE(dev) + off);
 }
 
-static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, uint64_t addr)
+/* PCI is always read in little-endian order
+ * but virtio ( < 1.0) is in guest order
+ * so with a big-endian guest the order has been reversed,
+ * reverse it again
+ * virtio-1.0 is always little-endian, like PCI, but this
+ * case will be managed inside qvirtio_is_big_endian()
+ */
+
+static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, uint64_t off)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readw(dev->pdev, (void *)(uintptr_t)addr);
+    uint16_t value;
+
+    value = qpci_io_readw(dev->pdev, dev->bar, CONFIG_BASE(dev) + off);
+    if (qvirtio_is_big_endian(d)) {
+        value = bswap16(value);
+    }
+    return value;
 }
 
-static uint32_t qvirtio_pci_config_readl(QVirtioDevice *d, uint64_t addr)
+static uint32_t qvirtio_pci_config_readl(QVirtioDevice *d, uint64_t off)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readl(dev->pdev, (void *)(uintptr_t)addr);
+    uint32_t value;
+
+    value = qpci_io_readl(dev->pdev, dev->bar, CONFIG_BASE(dev) + off);
+    if (qvirtio_is_big_endian(d)) {
+        value = bswap32(value);
+    }
+    return value;
 }
 
-static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t addr)
+static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, uint64_t off)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    int i;
-    uint64_t u64 = 0;
+    uint64_t val;
 
-    if (qtest_big_endian()) {
-        for (i = 0; i < 8; ++i) {
-            u64 |= (uint64_t)qpci_io_readb(dev->pdev,
-                                (void *)(uintptr_t)addr + i) << (7 - i) * 8;
-        }
-    } else {
-        for (i = 0; i < 8; ++i) {
-            u64 |= (uint64_t)qpci_io_readb(dev->pdev,
-                                (void *)(uintptr_t)addr + i) << i * 8;
-        }
+    val = qpci_io_readq(dev->pdev, dev->bar, CONFIG_BASE(dev) + off);
+    if (qvirtio_is_big_endian(d)) {
+        val = bswap64(val);
     }
 
-    return u64;
+    return val;
 }
 
 static uint32_t qvirtio_pci_get_features(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readl(dev->pdev, dev->addr + VIRTIO_PCI_HOST_FEATURES);
+    return qpci_io_readl(dev->pdev, dev->bar, VIRTIO_PCI_HOST_FEATURES);
 }
 
 static void qvirtio_pci_set_features(QVirtioDevice *d, uint32_t features)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    qpci_io_writel(dev->pdev, dev->addr + VIRTIO_PCI_GUEST_FEATURES, features);
+    qpci_io_writel(dev->pdev, dev->bar, VIRTIO_PCI_GUEST_FEATURES, features);
 }
 
 static uint32_t qvirtio_pci_get_guest_features(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readl(dev->pdev, dev->addr + VIRTIO_PCI_GUEST_FEATURES);
+    return qpci_io_readl(dev->pdev, dev->bar, VIRTIO_PCI_GUEST_FEATURES);
 }
 
 static uint8_t qvirtio_pci_get_status(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readb(dev->pdev, dev->addr + VIRTIO_PCI_STATUS);
+    return qpci_io_readb(dev->pdev, dev->bar, VIRTIO_PCI_STATUS);
 }
 
 static void qvirtio_pci_set_status(QVirtioDevice *d, uint8_t status)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    qpci_io_writeb(dev->pdev, dev->addr + VIRTIO_PCI_STATUS, status);
+    qpci_io_writeb(dev->pdev, dev->bar, VIRTIO_PCI_STATUS, status);
 }
 
 static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq)
@@ -152,7 +166,7 @@ static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq)
             }
         }
     } else {
-        return qpci_io_readb(dev->pdev, dev->addr + VIRTIO_PCI_ISR) & 1;
+        return qpci_io_readb(dev->pdev, dev->bar, VIRTIO_PCI_ISR) & 1;
     }
 }
 
@@ -176,26 +190,26 @@ static bool qvirtio_pci_get_config_isr_status(QVirtioDevice *d)
             }
         }
     } else {
-        return qpci_io_readb(dev->pdev, dev->addr + VIRTIO_PCI_ISR) & 2;
+        return qpci_io_readb(dev->pdev, dev->bar, VIRTIO_PCI_ISR) & 2;
     }
 }
 
 static void qvirtio_pci_queue_select(QVirtioDevice *d, uint16_t index)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    qpci_io_writeb(dev->pdev, dev->addr + VIRTIO_PCI_QUEUE_SEL, index);
+    qpci_io_writeb(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_SEL, index);
 }
 
 static uint16_t qvirtio_pci_get_queue_size(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readw(dev->pdev, dev->addr + VIRTIO_PCI_QUEUE_NUM);
+    return qpci_io_readw(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_NUM);
 }
 
 static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    qpci_io_writel(dev->pdev, dev->addr + VIRTIO_PCI_QUEUE_PFN, pfn);
+    qpci_io_writel(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_PFN, pfn);
 }
 
 static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
@@ -247,7 +261,7 @@ static void qvirtio_pci_virtqueue_cleanup(QVirtQueue *vq,
 static void qvirtio_pci_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    qpci_io_writew(dev->pdev, dev->addr + VIRTIO_PCI_QUEUE_NOTIFY, vq->index);
+    qpci_io_writew(dev->pdev, dev->bar, VIRTIO_PCI_QUEUE_NOTIFY, vq->index);
 }
 
 const QVirtioBus qvirtio_pci = {
@@ -286,20 +300,20 @@ QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
     QVirtioPCIDevice *dev = NULL;
     qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
 
+    dev->vdev.bus = &qvirtio_pci;
+
     return dev;
 }
 
 void qvirtio_pci_device_enable(QVirtioPCIDevice *d)
 {
     qpci_device_enable(d->pdev);
-    d->addr = qpci_iomap(d->pdev, 0, NULL);
-    g_assert(d->addr != NULL);
+    d->bar = qpci_iomap(d->pdev, 0, NULL);
 }
 
 void qvirtio_pci_device_disable(QVirtioPCIDevice *d)
 {
-    qpci_iounmap(d->pdev, d->addr);
-    d->addr = NULL;
+    qpci_iounmap(d->pdev, d->bar);
 }
 
 void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci,
@@ -307,29 +321,33 @@ void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci,
 {
     uint16_t vector;
     uint32_t control;
-    void *addr;
+    uint64_t off;
 
     g_assert(d->pdev->msix_enabled);
-    addr = d->pdev->msix_table + (entry * 16);
+    off = d->pdev->msix_table_off + (entry * 16);
 
     g_assert_cmpint(entry, >=, 0);
     g_assert_cmpint(entry, <, qpci_msix_table_size(d->pdev));
     vqpci->msix_entry = entry;
 
     vqpci->msix_addr = guest_alloc(alloc, 4);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_LOWER_ADDR,
-                                                    vqpci->msix_addr & ~0UL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_UPPER_ADDR,
-                                            (vqpci->msix_addr >> 32) & ~0UL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_DATA, vqpci->msix_data);
-
-    control = qpci_io_readl(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL,
-                                        control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_LOWER_ADDR, vqpci->msix_addr & ~0UL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_UPPER_ADDR,
+                   (vqpci->msix_addr >> 32) & ~0UL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_DATA, vqpci->msix_data);
+
+    control = qpci_io_readl(d->pdev, d->pdev->msix_table_bar,
+                            off + PCI_MSIX_ENTRY_VECTOR_CTRL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                   control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
 
     qvirtio_pci_queue_select(&d->vdev, vqpci->vq.index);
-    qpci_io_writew(d->pdev, d->addr + VIRTIO_MSI_QUEUE_VECTOR, entry);
-    vector = qpci_io_readw(d->pdev, d->addr + VIRTIO_MSI_QUEUE_VECTOR);
+    qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR, entry);
+    vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_QUEUE_VECTOR);
     g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR);
 }
 
@@ -338,10 +356,10 @@ void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d,
 {
     uint16_t vector;
     uint32_t control;
-    void *addr;
+    uint64_t off;
 
     g_assert(d->pdev->msix_enabled);
-    addr = d->pdev->msix_table + (entry * 16);
+    off = d->pdev->msix_table_off + (entry * 16);
 
     g_assert_cmpint(entry, >=, 0);
     g_assert_cmpint(entry, <, qpci_msix_table_size(d->pdev));
@@ -350,17 +368,21 @@ void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d,
     d->config_msix_data = 0x12345678;
     d->config_msix_addr = guest_alloc(alloc, 4);
 
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_LOWER_ADDR,
-                                                    d->config_msix_addr & ~0UL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_UPPER_ADDR,
-                                            (d->config_msix_addr >> 32) & ~0UL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_DATA, d->config_msix_data);
-
-    control = qpci_io_readl(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
-    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL,
-                                        control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
-
-    qpci_io_writew(d->pdev, d->addr + VIRTIO_MSI_CONFIG_VECTOR, entry);
-    vector = qpci_io_readw(d->pdev, d->addr + VIRTIO_MSI_CONFIG_VECTOR);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_LOWER_ADDR, d->config_msix_addr & ~0UL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_UPPER_ADDR,
+                   (d->config_msix_addr >> 32) & ~0UL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_DATA, d->config_msix_data);
+
+    control = qpci_io_readl(d->pdev, d->pdev->msix_table_bar,
+                            off + PCI_MSIX_ENTRY_VECTOR_CTRL);
+    qpci_io_writel(d->pdev, d->pdev->msix_table_bar,
+                   off + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                   control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
+    qpci_io_writew(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR, entry);
+    vector = qpci_io_readw(d->pdev, d->bar, VIRTIO_MSI_CONFIG_VECTOR);
     g_assert_cmphex(vector, !=, VIRTIO_MSI_NO_VECTOR);
 }
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index efcac2d3de..38c54c63ea 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -16,7 +16,7 @@
 typedef struct QVirtioPCIDevice {
     QVirtioDevice vdev;
     QPCIDevice *pdev;
-    void *addr;
+    QPCIBar bar;
     uint16_t config_msix_entry;
     uint64_t config_msix_addr;
     uint32_t config_msix_data;
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
index d8c2970de7..ec30cb99b2 100644
--- a/tests/libqos/virtio.c
+++ b/tests/libqos/virtio.c
@@ -13,45 +13,40 @@
 #include "standard-headers/linux/virtio_config.h"
 #include "standard-headers/linux/virtio_ring.h"
 
-uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr)
+uint8_t qvirtio_config_readb(QVirtioDevice *d, uint64_t addr)
 {
-    return bus->config_readb(d, addr);
+    return d->bus->config_readb(d, addr);
 }
 
-uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr)
+uint16_t qvirtio_config_readw(QVirtioDevice *d, uint64_t addr)
 {
-    return bus->config_readw(d, addr);
+    return d->bus->config_readw(d, addr);
 }
 
-uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr)
+uint32_t qvirtio_config_readl(QVirtioDevice *d, uint64_t addr)
 {
-    return bus->config_readl(d, addr);
+    return d->bus->config_readl(d, addr);
 }
 
-uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr)
+uint64_t qvirtio_config_readq(QVirtioDevice *d, uint64_t addr)
 {
-    return bus->config_readq(d, addr);
+    return d->bus->config_readq(d, addr);
 }
 
-uint32_t qvirtio_get_features(const QVirtioBus *bus, QVirtioDevice *d)
+uint32_t qvirtio_get_features(QVirtioDevice *d)
 {
-    return bus->get_features(d);
+    return d->bus->get_features(d);
 }
 
-void qvirtio_set_features(const QVirtioBus *bus, QVirtioDevice *d,
-                                                            uint32_t features)
+void qvirtio_set_features(QVirtioDevice *d, uint32_t features)
 {
-    bus->set_features(d, features);
+    d->bus->set_features(d, features);
 }
 
-QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
-                                        QGuestAllocator *alloc, uint16_t index)
+QVirtQueue *qvirtqueue_setup(QVirtioDevice *d,
+                             QGuestAllocator *alloc, uint16_t index)
 {
-    return bus->virtqueue_setup(d, alloc, index);
+    return d->bus->virtqueue_setup(d, alloc, index);
 }
 
 void qvirtqueue_cleanup(const QVirtioBus *bus, QVirtQueue *vq,
@@ -60,40 +55,40 @@ void qvirtqueue_cleanup(const QVirtioBus *bus, QVirtQueue *vq,
     return bus->virtqueue_cleanup(vq, alloc);
 }
 
-void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d)
+void qvirtio_reset(QVirtioDevice *d)
 {
-    bus->set_status(d, 0);
-    g_assert_cmphex(bus->get_status(d), ==, 0);
+    d->bus->set_status(d, 0);
+    g_assert_cmphex(d->bus->get_status(d), ==, 0);
 }
 
-void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d)
+void qvirtio_set_acknowledge(QVirtioDevice *d)
 {
-    bus->set_status(d, bus->get_status(d) | VIRTIO_CONFIG_S_ACKNOWLEDGE);
-    g_assert_cmphex(bus->get_status(d), ==, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+    d->bus->set_status(d, d->bus->get_status(d) | VIRTIO_CONFIG_S_ACKNOWLEDGE);
+    g_assert_cmphex(d->bus->get_status(d), ==, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 }
 
-void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d)
+void qvirtio_set_driver(QVirtioDevice *d)
 {
-    bus->set_status(d, bus->get_status(d) | VIRTIO_CONFIG_S_DRIVER);
-    g_assert_cmphex(bus->get_status(d), ==,
+    d->bus->set_status(d, d->bus->get_status(d) | VIRTIO_CONFIG_S_DRIVER);
+    g_assert_cmphex(d->bus->get_status(d), ==,
                     VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
 }
 
-void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d)
+void qvirtio_set_driver_ok(QVirtioDevice *d)
 {
-    bus->set_status(d, bus->get_status(d) | VIRTIO_CONFIG_S_DRIVER_OK);
-    g_assert_cmphex(bus->get_status(d), ==, VIRTIO_CONFIG_S_DRIVER_OK |
+    d->bus->set_status(d, d->bus->get_status(d) | VIRTIO_CONFIG_S_DRIVER_OK);
+    g_assert_cmphex(d->bus->get_status(d), ==, VIRTIO_CONFIG_S_DRIVER_OK |
                     VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_ACKNOWLEDGE);
 }
 
-void qvirtio_wait_queue_isr(const QVirtioBus *bus, QVirtioDevice *d,
+void qvirtio_wait_queue_isr(QVirtioDevice *d,
                             QVirtQueue *vq, gint64 timeout_us)
 {
     gint64 start_time = g_get_monotonic_time();
 
     for (;;) {
         clock_step(100);
-        if (bus->get_queue_isr_status(d, vq)) {
+        if (d->bus->get_queue_isr_status(d, vq)) {
             return;
         }
         g_assert(g_get_monotonic_time() - start_time <= timeout_us);
@@ -105,8 +100,7 @@ void qvirtio_wait_queue_isr(const QVirtioBus *bus, QVirtioDevice *d,
  * The virtqueue interrupt must not be raised, making this useful for testing
  * event_index functionality.
  */
-uint8_t qvirtio_wait_status_byte_no_isr(const QVirtioBus *bus,
-                                        QVirtioDevice *d,
+uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d,
                                         QVirtQueue *vq,
                                         uint64_t addr,
                                         gint64 timeout_us)
@@ -116,20 +110,19 @@ uint8_t qvirtio_wait_status_byte_no_isr(const QVirtioBus *bus,
 
     while ((val = readb(addr)) == 0xff) {
         clock_step(100);
-        g_assert(!bus->get_queue_isr_status(d, vq));
+        g_assert(!d->bus->get_queue_isr_status(d, vq));
         g_assert(g_get_monotonic_time() - start_time <= timeout_us);
     }
     return val;
 }
 
-void qvirtio_wait_config_isr(const QVirtioBus *bus, QVirtioDevice *d,
-                             gint64 timeout_us)
+void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us)
 {
     gint64 start_time = g_get_monotonic_time();
 
     for (;;) {
         clock_step(100);
-        if (bus->get_config_isr_status(d)) {
+        if (d->bus->get_config_isr_status(d)) {
             return;
         }
         g_assert(g_get_monotonic_time() - start_time <= timeout_us);
@@ -147,7 +140,7 @@ void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr)
 
     for (i = 0; i < vq->size - 1; i++) {
         /* vq->desc[i].addr */
-        writew(vq->desc + (16 * i), 0);
+        writeq(vq->desc + (16 * i), 0);
         /* vq->desc[i].next */
         writew(vq->desc + (16 * i) + 14, i + 1);
     }
@@ -253,20 +246,19 @@ uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect)
     return vq->free_head++; /* Return and increase, in this order */
 }
 
-void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
-                                                            uint32_t free_head)
+void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head)
 {
     /* vq->avail->idx */
-    uint16_t idx = readl(vq->avail + 2);
+    uint16_t idx = readw(vq->avail + 2);
     /* vq->used->flags */
     uint16_t flags;
     /* vq->used->avail_event */
     uint16_t avail_event;
 
     /* vq->avail->ring[idx % vq->size] */
-    writel(vq->avail + 4 + (2 * (idx % vq->size)), free_head);
+    writew(vq->avail + 4 + (2 * (idx % vq->size)), free_head);
     /* vq->avail->idx */
-    writel(vq->avail + 2, idx + 1);
+    writew(vq->avail + 2, idx + 1);
 
     /* Must read after idx is updated */
     flags = readw(vq->avail);
@@ -276,7 +268,7 @@ void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
     /* < 1 because we add elements to avail queue one by one */
     if ((flags & VRING_USED_F_NO_NOTIFY) == 0 &&
                             (!vq->event || (uint16_t)(idx-avail_event) < 1)) {
-        bus->virtqueue_kick(d, vq);
+        d->bus->virtqueue_kick(d, vq);
     }
 }
 
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index 0250842bf2..3397a080e9 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -15,7 +15,10 @@
 
 #define QVIRTIO_F_BAD_FEATURE           0x40000000
 
+typedef struct QVirtioBus QVirtioBus;
+
 typedef struct QVirtioDevice {
+    const QVirtioBus *bus;
     /* Device type */
     uint16_t device_type;
 } QVirtioDevice;
@@ -39,7 +42,7 @@ typedef struct QVRingIndirectDesc {
     uint16_t elem;
 } QVRingIndirectDesc;
 
-typedef struct QVirtioBus {
+struct QVirtioBus {
     uint8_t (*config_readb)(QVirtioDevice *d, uint64_t addr);
     uint16_t (*config_readw)(QVirtioDevice *d, uint64_t addr);
     uint32_t (*config_readl)(QVirtioDevice *d, uint64_t addr);
@@ -84,7 +87,13 @@ typedef struct QVirtioBus {
 
     /* Notify changes in virtqueue */
     void (*virtqueue_kick)(QVirtioDevice *d, QVirtQueue *vq);
-} QVirtioBus;
+};
+
+static inline bool qvirtio_is_big_endian(QVirtioDevice *d)
+{
+    /* FIXME: virtio 1.0 is always little-endian */
+    return qtest_big_endian(global_qtest);
+}
 
 static inline uint32_t qvring_size(uint32_t num, uint32_t align)
 {
@@ -93,34 +102,27 @@ static inline uint32_t qvring_size(uint32_t num, uint32_t align)
         + sizeof(uint16_t) * 3 + sizeof(struct vring_used_elem) * num;
 }
 
-uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr);
-uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr);
-uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr);
-uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
-                                                                uint64_t addr);
-uint32_t qvirtio_get_features(const QVirtioBus *bus, QVirtioDevice *d);
-void qvirtio_set_features(const QVirtioBus *bus, QVirtioDevice *d,
-                                                            uint32_t features);
-
-void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d);
-void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d);
-void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d);
-void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d);
-
-void qvirtio_wait_queue_isr(const QVirtioBus *bus, QVirtioDevice *d,
+uint8_t qvirtio_config_readb(QVirtioDevice *d, uint64_t addr);
+uint16_t qvirtio_config_readw(QVirtioDevice *d, uint64_t addr);
+uint32_t qvirtio_config_readl(QVirtioDevice *d, uint64_t addr);
+uint64_t qvirtio_config_readq(QVirtioDevice *d, uint64_t addr);
+uint32_t qvirtio_get_features(QVirtioDevice *d);
+void qvirtio_set_features(QVirtioDevice *d, uint32_t features);
+
+void qvirtio_reset(QVirtioDevice *d);
+void qvirtio_set_acknowledge(QVirtioDevice *d);
+void qvirtio_set_driver(QVirtioDevice *d);
+void qvirtio_set_driver_ok(QVirtioDevice *d);
+
+void qvirtio_wait_queue_isr(QVirtioDevice *d,
                             QVirtQueue *vq, gint64 timeout_us);
-uint8_t qvirtio_wait_status_byte_no_isr(const QVirtioBus *bus,
-                                        QVirtioDevice *d,
+uint8_t qvirtio_wait_status_byte_no_isr(QVirtioDevice *d,
                                         QVirtQueue *vq,
                                         uint64_t addr,
                                         gint64 timeout_us);
-void qvirtio_wait_config_isr(const QVirtioBus *bus, QVirtioDevice *d,
-                             gint64 timeout_us);
-QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
-                                        QGuestAllocator *alloc, uint16_t index);
+void qvirtio_wait_config_isr(QVirtioDevice *d, gint64 timeout_us);
+QVirtQueue *qvirtqueue_setup(QVirtioDevice *d,
+                             QGuestAllocator *alloc, uint16_t index);
 void qvirtqueue_cleanup(const QVirtioBus *bus, QVirtQueue *vq,
                         QGuestAllocator *alloc);
 
@@ -132,8 +134,7 @@ void qvring_indirect_desc_add(QVRingIndirectDesc *indirect, uint64_t data,
 uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
                                                                     bool next);
 uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect);
-void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
-                                                            uint32_t free_head);
+void qvirtqueue_kick(QVirtioDevice *d, QVirtQueue *vq, uint32_t free_head);
 
 void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx);
 #endif
diff --git a/tests/libqtest.c b/tests/libqtest.c
index eb00f1392b..6f6975248f 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -37,6 +37,7 @@ struct QTestState
     bool irq_level[MAX_IRQ];
     GString *rx;
     pid_t qemu_pid;  /* our child QEMU process */
+    bool big_endian;
 };
 
 static GHookList abrt_hooks;
@@ -47,6 +48,8 @@ static struct sigaction sigact_old;
     g_assert_cmpint(ret, !=, -1); \
 } while (0)
 
+static int qtest_query_target_endianness(QTestState *s);
+
 static int init_socket(const char *socket_path)
 {
     struct sockaddr_un addr;
@@ -209,6 +212,10 @@ QTestState *qtest_init(const char *extra_args)
         kill(s->qemu_pid, SIGSTOP);
     }
 
+    /* ask endianness of the target */
+
+    s->big_endian = qtest_query_target_endianness(s);
+
     return s;
 }
 
@@ -342,6 +349,20 @@ static gchar **qtest_rsp(QTestState *s, int expected_args)
     return words;
 }
 
+static int qtest_query_target_endianness(QTestState *s)
+{
+    gchar **args;
+    int big_endian;
+
+    qtest_sendf(s, "endianness\n");
+    args = qtest_rsp(s, 1);
+    g_assert(strcmp(args[1], "big") == 0 || strcmp(args[1], "little") == 0);
+    big_endian = strcmp(args[1], "big") == 0;
+    g_strfreev(args);
+
+    return big_endian;
+}
+
 typedef struct {
     JSONMessageParser parser;
     QDict *response;
@@ -512,7 +533,7 @@ void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...)
     QDECREF(response);
 }
 
-void qtest_qmp_eventwait(QTestState *s, const char *event)
+QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event)
 {
     QDict *response;
 
@@ -520,13 +541,20 @@ void qtest_qmp_eventwait(QTestState *s, const char *event)
         response = qtest_qmp_receive(s);
         if ((qdict_haskey(response, "event")) &&
             (strcmp(qdict_get_str(response, "event"), event) == 0)) {
-            QDECREF(response);
-            break;
+            return response;
         }
         QDECREF(response);
     }
 }
 
+void qtest_qmp_eventwait(QTestState *s, const char *event)
+{
+    QDict *response;
+
+    response = qtest_qmp_eventwait_ref(s, event);
+    QDECREF(response);
+}
+
 char *qtest_hmpv(QTestState *s, const char *fmt, va_list ap)
 {
     char *cmd;
@@ -751,6 +779,16 @@ void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size)
     g_strfreev(args);
 }
 
+uint64_t qtest_rtas_call(QTestState *s, const char *name,
+                         uint32_t nargs, uint64_t args,
+                         uint32_t nret, uint64_t ret)
+{
+    qtest_sendf(s, "rtas %s %u 0x%"PRIx64" %u 0x%"PRIx64"\n",
+                name, nargs, args, nret, ret);
+    qtest_rsp(s, 0);
+    return 0;
+}
+
 void qtest_add_func(const char *str, void (*fn)(void))
 {
     gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
@@ -758,6 +796,25 @@ void qtest_add_func(const char *str, void (*fn)(void))
     g_free(path);
 }
 
+void qtest_add_data_func_full(const char *str, void *data,
+                              void (*fn)(const void *),
+                              GDestroyNotify data_free_func)
+{
+    gchar *path = g_strdup_printf("/%s/%s", qtest_get_arch(), str);
+#if GLIB_CHECK_VERSION(2, 34, 0)
+    g_test_add_data_func_full(path, data, fn, data_free_func);
+#elif GLIB_CHECK_VERSION(2, 26, 0)
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (GTestFixtureFunc)fn, (GTestFixtureFunc) data_free_func);
+#else
+    /* back-compat casts, remove this once we can require new-enough glib */
+    g_test_add_vtable(path, 0, data, NULL,
+                      (void (*)(void)) fn, (void (*)(void)) data_free_func);
+#endif
+    g_free(path);
+}
+
 void qtest_add_data_func(const char *str, const void *data,
                          void (*fn)(const void *))
 {
@@ -857,50 +914,7 @@ char *hmp(const char *fmt, ...)
     return ret;
 }
 
-bool qtest_big_endian(void)
+bool qtest_big_endian(QTestState *s)
 {
-    const char *arch = qtest_get_arch();
-    int i;
-
-    static const struct {
-        const char *arch;
-        bool big_endian;
-    } endianness[] = {
-        { "aarch64", false },
-        { "alpha", false },
-        { "arm", false },
-        { "cris", false },
-        { "i386", false },
-        { "lm32", true },
-        { "m68k", true },
-        { "microblaze", true },
-        { "microblazeel", false },
-        { "mips", true },
-        { "mips64", true },
-        { "mips64el", false },
-        { "mipsel", false },
-        { "moxie", true },
-        { "or32", true },
-        { "ppc", true },
-        { "ppc64", true },
-        { "ppcemb", true },
-        { "s390x", true },
-        { "sh4", false },
-        { "sh4eb", true },
-        { "sparc", true },
-        { "sparc64", true },
-        { "unicore32", false },
-        { "x86_64", false },
-        { "xtensa", false },
-        { "xtensaeb", true },
-        {},
-    };
-
-    for (i = 0; endianness[i].arch; i++) {
-        if (strcmp(endianness[i].arch, arch) == 0) {
-            return endianness[i].big_endian;
-        }
-    }
-
-    return false;
+    return s->big_endian;
 }
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 37f37adbf7..90f182e1d8 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -113,6 +113,16 @@ QDict *qtest_qmp_receive(QTestState *s);
  */
 void qtest_qmp_eventwait(QTestState *s, const char *event);
 
+/**
+ * qtest_qmp_eventwait_ref:
+ * @s: #QTestState instance to operate on.
+ * @s: #event event to wait for.
+ *
+ * Continuosly polls for QMP responses until it receives the desired event.
+ * Returns a copy of the event for further investigation.
+ */
+QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event);
+
 /**
  * qtest_hmpv:
  * @s: #QTestState instance to operate on.
@@ -317,6 +327,21 @@ uint64_t qtest_readq(QTestState *s, uint64_t addr);
  */
 void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size);
 
+/**
+ * qtest_rtas_call:
+ * @s: #QTestState instance to operate on.
+ * @name: name of the command to call.
+ * @nargs: Number of args.
+ * @args: Guest address to read args from.
+ * @nret: Number of return value.
+ * @ret: Guest address to write return values to.
+ *
+ * Call an RTAS function
+ */
+uint64_t qtest_rtas_call(QTestState *s, const char *name,
+                         uint32_t nargs, uint64_t args,
+                         uint32_t nret, uint64_t ret);
+
 /**
  * qtest_bufread:
  * @s: #QTestState instance to operate on.
@@ -394,6 +419,14 @@ int64_t qtest_clock_step(QTestState *s, int64_t step);
  */
 int64_t qtest_clock_set(QTestState *s, int64_t val);
 
+/**
+ * qtest_big_endian:
+ * @s: QTestState instance to operate on.
+ *
+ * Returns: True if the architecture under test has a big endian configuration.
+ */
+bool qtest_big_endian(QTestState *s);
+
 /**
  * qtest_get_arch:
  *
@@ -425,6 +458,23 @@ void qtest_add_func(const char *str, void (*fn)(void));
 void qtest_add_data_func(const char *str, const void *data,
                          void (*fn)(const void *));
 
+/**
+ * qtest_add_data_func_full:
+ * @str: Test case path.
+ * @data: Test case data
+ * @fn: Test case function
+ * @data_free_func: GDestroyNotify for data
+ *
+ * Add a GTester testcase with the given name, data and function.
+ * The path is prefixed with the architecture under test, as
+ * returned by qtest_get_arch().
+ *
+ * @data is passed to @data_free_func() on test completion.
+ */
+void qtest_add_data_func_full(const char *str, void *data,
+                              void (*fn)(const void *),
+                              GDestroyNotify data_free_func);
+
 /**
  * qtest_add:
  * @testpath: Test case path
@@ -518,6 +568,18 @@ static inline void qmp_eventwait(const char *event)
     return qtest_qmp_eventwait(global_qtest, event);
 }
 
+/**
+ * qmp_eventwait_ref:
+ * @s: #event event to wait for.
+ *
+ * Continuosly polls for QMP responses until it receives the desired event.
+ * Returns a copy of the event for further investigation.
+ */
+static inline QDict *qmp_eventwait_ref(const char *event)
+{
+    return qtest_qmp_eventwait_ref(global_qtest, event);
+}
+
 /**
  * hmp:
  * @fmt...: HMP command to send to QEMU
@@ -841,14 +903,6 @@ static inline int64_t clock_set(int64_t val)
     return qtest_clock_set(global_qtest, val);
 }
 
-/**
- * qtest_big_endian:
- *
- * Returns: True if the architecture under test has a big endian configuration.
- */
-bool qtest_big_endian(void);
-
-
 QDict *qmp_fd_receive(int fd);
 void qmp_fd_sendv(int fd, const char *fmt, va_list ap);
 void qmp_fd_send(int fd, const char *fmt, ...);
diff --git a/tests/m25p80-test.c b/tests/m25p80-test.c
new file mode 100644
index 0000000000..cb7ec81f1a
--- /dev/null
+++ b/tests/m25p80-test.c
@@ -0,0 +1,252 @@
+/*
+ * QTest testcase for the M25P80 Flash (Using the Aspeed SPI
+ * Controller)
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "libqtest.h"
+
+/*
+ * ASPEED SPI Controller registers
+ */
+#define R_CONF              0x00
+#define   CONF_ENABLE_W0       (1 << 16)
+#define R_CE_CTRL           0x04
+#define   CRTL_EXTENDED0       0  /* 32 bit addressing for SPI */
+#define R_CTRL0             0x10
+#define   CTRL_CE_STOP_ACTIVE  (1 << 2)
+#define   CTRL_USERMODE        0x3
+
+#define ASPEED_FMC_BASE    0x1E620000
+#define ASPEED_FLASH_BASE  0x20000000
+
+/*
+ * Flash commands
+ */
+enum {
+    JEDEC_READ = 0x9f,
+    BULK_ERASE = 0xc7,
+    READ = 0x03,
+    PP = 0x02,
+    WREN = 0x6,
+    EN_4BYTE_ADDR = 0xB7,
+    ERASE_SECTOR = 0xd8,
+};
+
+#define FLASH_JEDEC         0x20ba19  /* n25q256a */
+#define FLASH_SIZE          (32 * 1024 * 1024)
+
+#define PAGE_SIZE           256
+
+/*
+ * Use an explicit bswap for the values read/wrote to the flash region
+ * as they are BE and the Aspeed CPU is LE.
+ */
+static inline uint32_t make_be32(uint32_t data)
+{
+    return bswap32(data);
+}
+
+static void spi_conf(uint32_t value)
+{
+    uint32_t conf = readl(ASPEED_FMC_BASE + R_CONF);
+
+    conf |= value;
+    writel(ASPEED_FMC_BASE + R_CONF, conf);
+}
+
+static void spi_ctrl_start_user(void)
+{
+    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+
+    ctrl &= ~CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void spi_ctrl_stop_user(void)
+{
+    uint32_t ctrl = readl(ASPEED_FMC_BASE + R_CTRL0);
+
+    ctrl |= CTRL_USERMODE | CTRL_CE_STOP_ACTIVE;
+    writel(ASPEED_FMC_BASE + R_CTRL0, ctrl);
+}
+
+static void test_read_jedec(void)
+{
+    uint32_t jedec = 0x0;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, JEDEC_READ);
+    jedec |= readb(ASPEED_FLASH_BASE) << 16;
+    jedec |= readb(ASPEED_FLASH_BASE) << 8;
+    jedec |= readb(ASPEED_FLASH_BASE);
+    spi_ctrl_stop_user();
+
+    g_assert_cmphex(jedec, ==, FLASH_JEDEC);
+}
+
+static void read_page(uint32_t addr, uint32_t *page)
+{
+    int i;
+
+    spi_ctrl_start_user();
+
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, READ);
+    writel(ASPEED_FLASH_BASE, make_be32(addr));
+
+    /* Continuous read are supported */
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        page[i] = make_be32(readl(ASPEED_FLASH_BASE));
+    }
+    spi_ctrl_stop_user();
+}
+
+static void test_erase_sector(void)
+{
+    uint32_t some_page_addr = 0x600 * PAGE_SIZE;
+    uint32_t page[PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, WREN);
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, ERASE_SECTOR);
+    writel(ASPEED_FLASH_BASE, make_be32(some_page_addr));
+    spi_ctrl_stop_user();
+
+    /* Previous page should be full of zeroes as backend is not
+     * initialized */
+    read_page(some_page_addr - PAGE_SIZE, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0x0);
+    }
+
+    /* But this one was erased */
+    read_page(some_page_addr, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+}
+
+static void test_erase_all(void)
+{
+    uint32_t some_page_addr = 0x15000 * PAGE_SIZE;
+    uint32_t page[PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    /* Check some random page. Should be full of zeroes as backend is
+     * not initialized */
+    read_page(some_page_addr, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0x0);
+    }
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, WREN);
+    writeb(ASPEED_FLASH_BASE, BULK_ERASE);
+    spi_ctrl_stop_user();
+
+    /* Recheck that some random page */
+    read_page(some_page_addr, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+}
+
+static void test_write_page(void)
+{
+    uint32_t my_page_addr = 0x14000 * PAGE_SIZE; /* beyond 16MB */
+    uint32_t some_page_addr = 0x15000 * PAGE_SIZE;
+    uint32_t page[PAGE_SIZE / 4];
+    int i;
+
+    spi_conf(CONF_ENABLE_W0);
+
+    spi_ctrl_start_user();
+    writeb(ASPEED_FLASH_BASE, EN_4BYTE_ADDR);
+    writeb(ASPEED_FLASH_BASE, PP);
+    writel(ASPEED_FLASH_BASE, make_be32(my_page_addr));
+
+    /* Fill the page with its own addresses */
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        writel(ASPEED_FLASH_BASE, make_be32(my_page_addr + i * 4));
+    }
+    spi_ctrl_stop_user();
+
+    /* Check what was written */
+    read_page(my_page_addr, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, my_page_addr + i * 4);
+    }
+
+    /* Check some other page. It should be full of 0xff */
+    read_page(some_page_addr, page);
+    for (i = 0; i < PAGE_SIZE / 4; i++) {
+        g_assert_cmphex(page[i], ==, 0xffffffff);
+    }
+}
+
+static char tmp_path[] = "/tmp/qtest.m25p80.XXXXXX";
+
+int main(int argc, char **argv)
+{
+    int ret;
+    int fd;
+    char *args;
+
+    g_test_init(&argc, &argv, NULL);
+
+    fd = mkstemp(tmp_path);
+    g_assert(fd >= 0);
+    ret = ftruncate(fd, FLASH_SIZE);
+    g_assert(ret == 0);
+    close(fd);
+
+    args = g_strdup_printf("-m 256 -machine palmetto-bmc "
+                           "-drive file=%s,format=raw,if=mtd",
+                           tmp_path);
+    qtest_start(args);
+
+    qtest_add_func("/m25p80/read_jedec", test_read_jedec);
+    qtest_add_func("/m25p80/erase_sector", test_erase_sector);
+    qtest_add_func("/m25p80/erase_all",  test_erase_all);
+    qtest_add_func("/m25p80/write_page", test_write_page);
+
+    ret = g_test_run();
+
+    qtest_quit(global_qtest);
+    unlink(tmp_path);
+    g_free(args);
+    return ret;
+}
diff --git a/tests/pc-cpu-test.c b/tests/pc-cpu-test.c
index 4428cea5f1..c3a2633d3c 100644
--- a/tests/pc-cpu-test.c
+++ b/tests/pc-cpu-test.c
@@ -14,7 +14,7 @@
 #include "qapi/qmp/types.h"
 
 struct PCTestData {
-    const char *machine;
+    char *machine;
     const char *cpu_model;
     unsigned sockets;
     unsigned cores;
@@ -71,6 +71,14 @@ static void test_pc_without_cpu_add(gconstpointer data)
     g_free(args);
 }
 
+static void test_data_free(gpointer data)
+{
+    PCTestData *pc = data;
+
+    g_free(pc->machine);
+    g_free(pc);
+}
+
 static void add_pc_test_cases(void)
 {
     QDict *response, *minfo;
@@ -78,7 +86,8 @@ static void add_pc_test_cases(void)
     const QListEntry *p;
     QObject *qobj;
     QString *qstr;
-    const char *mname, *path;
+    const char *mname;
+    char *path;
     PCTestData *data;
 
     qtest_start("-machine none");
@@ -99,7 +108,7 @@ static void add_pc_test_cases(void)
             continue;
         }
         data = g_malloc(sizeof(PCTestData));
-        data->machine = mname;
+        data->machine = g_strdup(mname);
         data->cpu_model = "Haswell"; /* 1.3+ theoretically */
         data->sockets = 1;
         data->cores = 3;
@@ -119,14 +128,19 @@ static void add_pc_test_cases(void)
             path = g_strdup_printf("cpu/%s/init/%ux%ux%u&maxcpus=%u",
                                    mname, data->sockets, data->cores,
                                    data->threads, data->maxcpus);
-            qtest_add_data_func(path, data, test_pc_without_cpu_add);
+            qtest_add_data_func_full(path, data, test_pc_without_cpu_add,
+                                     test_data_free);
+            g_free(path);
         } else {
             path = g_strdup_printf("cpu/%s/add/%ux%ux%u&maxcpus=%u",
                                    mname, data->sockets, data->cores,
                                    data->threads, data->maxcpus);
-            qtest_add_data_func(path, data, test_pc_with_cpu_add);
+            qtest_add_data_func_full(path, data, test_pc_with_cpu_add,
+                                     test_data_free);
+            g_free(path);
         }
     }
+    QDECREF(response);
     qtest_end();
 }
 
diff --git a/tests/pkix_asn1_tab.c b/tests/pkix_asn1_tab.c
index 903bc02518..f15fc515cb 100644
--- a/tests/pkix_asn1_tab.c
+++ b/tests/pkix_asn1_tab.c
@@ -4,7 +4,9 @@
  */
 
 #include "qemu/osdep.h"
-#include <libtasn1.h>
+#include "tests/crypto-tls-x509-helpers.h"
+
+#ifdef QCRYPTO_HAVE_TLS_TEST_SUPPORT
 
 const ASN1_ARRAY_TYPE pkix_asn1_tab[] = {
   {"PKIX1", 536875024, 0},
@@ -1103,3 +1105,4 @@ const ASN1_ARRAY_TYPE pkix_asn1_tab[] = {
   {0, 1048586, "2"},
   {0, 0, 0}
 };
+#endif /* QCRYPTO_HAVE_TLS_TEST_SUPPORT */
diff --git a/tests/pnv-xscom-test.c b/tests/pnv-xscom-test.c
new file mode 100644
index 0000000000..5951da16cd
--- /dev/null
+++ b/tests/pnv-xscom-test.c
@@ -0,0 +1,140 @@
+/*
+ * QTest testcase for PowerNV XSCOM bus
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+
+#include "libqtest.h"
+
+typedef enum PnvChipType {
+    PNV_CHIP_POWER8E,     /* AKA Murano (default) */
+    PNV_CHIP_POWER8,      /* AKA Venice */
+    PNV_CHIP_POWER8NVL,   /* AKA Naples */
+    PNV_CHIP_POWER9,      /* AKA Nimbus */
+} PnvChipType;
+
+typedef struct PnvChip {
+    PnvChipType chip_type;
+    const char *cpu_model;
+    uint64_t    xscom_base;
+    uint64_t    xscom_core_base;
+    uint64_t    cfam_id;
+    uint32_t    first_core;
+} PnvChip;
+
+static const PnvChip pnv_chips[] = {
+    {
+        .chip_type  = PNV_CHIP_POWER8,
+        .cpu_model  = "POWER8",
+        .xscom_base = 0x0003fc0000000000ull,
+        .xscom_core_base = 0x10000000ull,
+        .cfam_id    = 0x220ea04980000000ull,
+        .first_core = 0x1,
+    }, {
+        .chip_type  = PNV_CHIP_POWER8NVL,
+        .cpu_model  = "POWER8NVL",
+        .xscom_base = 0x0003fc0000000000ull,
+        .xscom_core_base = 0x10000000ull,
+        .cfam_id    = 0x120d304980000000ull,
+        .first_core = 0x1,
+    }, {
+        .chip_type  = PNV_CHIP_POWER9,
+        .cpu_model  = "POWER9",
+        .xscom_base = 0x000603fc00000000ull,
+        .xscom_core_base = 0x0ull,
+        .cfam_id    = 0x100d104980000000ull,
+        .first_core = 0x20,
+    },
+};
+
+static uint64_t pnv_xscom_addr(const PnvChip *chip, uint32_t pcba)
+{
+    uint64_t addr = chip->xscom_base;
+
+    if (chip->chip_type == PNV_CHIP_POWER9) {
+        addr |= ((uint64_t) pcba << 3);
+    } else {
+        addr |= (((uint64_t) pcba << 4) & ~0xffull) |
+            (((uint64_t) pcba << 3) & 0x78);
+    }
+    return addr;
+}
+
+static uint64_t pnv_xscom_read(const PnvChip *chip, uint32_t pcba)
+{
+    return readq(pnv_xscom_addr(chip, pcba));
+}
+
+static void test_xscom_cfam_id(const PnvChip *chip)
+{
+    uint64_t f000f = pnv_xscom_read(chip, 0xf000f);
+
+    g_assert_cmphex(f000f, ==, chip->cfam_id);
+}
+
+static void test_cfam_id(const void *data)
+{
+    char *args;
+    const PnvChip *chip = data;
+
+    args = g_strdup_printf("-M powernv,accel=tcg -cpu %s", chip->cpu_model);
+
+    qtest_start(args);
+    test_xscom_cfam_id(chip);
+    qtest_quit(global_qtest);
+
+    g_free(args);
+}
+
+#define PNV_XSCOM_EX_CORE_BASE(chip, i)                 \
+    ((chip)->xscom_core_base | (((uint64_t)i) << 24))
+#define PNV_XSCOM_EX_DTS_RESULT0     0x50000
+
+static void test_xscom_core(const PnvChip *chip)
+{
+    uint32_t first_core_dts0 =
+        PNV_XSCOM_EX_CORE_BASE(chip, chip->first_core) |
+        PNV_XSCOM_EX_DTS_RESULT0;
+    uint64_t dts0 = pnv_xscom_read(chip, first_core_dts0);
+
+    g_assert_cmphex(dts0, ==, 0x26f024f023f0000ull);
+}
+
+static void test_core(const void *data)
+{
+    char *args;
+    const PnvChip *chip = data;
+
+    args = g_strdup_printf("-M powernv,accel=tcg -cpu %s", chip->cpu_model);
+
+    qtest_start(args);
+    test_xscom_core(chip);
+    qtest_quit(global_qtest);
+
+    g_free(args);
+}
+
+static void add_test(const char *name, void (*test)(const void *data))
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(pnv_chips); i++) {
+        char *tname = g_strdup_printf("pnv-xscom/%s/%s", name,
+                                      pnv_chips[i].cpu_model);
+        qtest_add_data_func(tname, &pnv_chips[i], test);
+        g_free(tname);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    add_test("cfam_id", test_cfam_id);
+    add_test("core", test_core);
+    return g_test_run();
+}
diff --git a/tests/postcopy-test.c b/tests/postcopy-test.c
index 229e9e901a..dafe8beba4 100644
--- a/tests/postcopy-test.c
+++ b/tests/postcopy-test.c
@@ -18,7 +18,7 @@
 #include "qemu/sockets.h"
 #include "sysemu/char.h"
 #include "sysemu/sysemu.h"
-#include "hw/nvram/openbios_firmware_abi.h"
+#include "hw/nvram/chrp_nvram.h"
 
 #define MIN_NVRAM_SIZE 8192 /* from spapr_nvram.c */
 
@@ -137,15 +137,15 @@ static void init_bootfile_ppc(const char *bootpath)
 {
     FILE *bootfile;
     char buf[MIN_NVRAM_SIZE];
-    struct OpenBIOS_nvpart_v1 *header = (struct OpenBIOS_nvpart_v1 *)buf;
+    ChrpNvramPartHdr *header = (ChrpNvramPartHdr *)buf;
 
     memset(buf, 0, MIN_NVRAM_SIZE);
 
     /* Create a "common" partition in nvram to store boot-command property */
 
-    header->signature = OPENBIOS_PART_SYSTEM;
+    header->signature = CHRP_NVPART_SYSTEM;
     memcpy(header->name, "common", 6);
-    OpenBIOS_finish_partition(header, MIN_NVRAM_SIZE);
+    chrp_nvram_finish_partition(header, MIN_NVRAM_SIZE);
 
     /* FW_MAX_SIZE is 4MB, but slof.bin is only 900KB,
      * so let's modify memory between 1MB and 100MB
@@ -176,6 +176,7 @@ static void wait_for_serial(const char *side)
     int started = (strcmp(side, "src_serial") == 0 &&
                    strcmp(arch, "ppc64") == 0) ? 0 : 1;
 
+    g_free(serialpath);
     do {
         int readvalue = fgetc(serialfile);
 
@@ -203,7 +204,6 @@ static void wait_for_serial(const char *side)
         case 'B':
             /* It's alive! */
             fclose(serialfile);
-            g_free(serialpath);
             return;
 
         case EOF:
@@ -260,8 +260,8 @@ static uint64_t get_migration_pass(void)
     } else {
         rsp_ram = qdict_get_qdict(rsp_return, "ram");
         result = qdict_get_try_int(rsp_ram, "dirty-sync-count", 0);
-        QDECREF(rsp);
     }
+    QDECREF(rsp);
     return result;
 }
 
@@ -350,6 +350,7 @@ static void cleanup(const char *filename)
     char *path = g_strdup_printf("%s/%s", tmpfs, filename);
 
     unlink(path);
+    g_free(path);
 }
 
 static void test_migrate(void)
@@ -379,21 +380,27 @@ static void test_migrate(void)
                                   " -incoming %s",
                                   tmpfs, bootpath, uri);
     } else if (strcmp(arch, "ppc64") == 0) {
+        const char *accel;
+
+        /* On ppc64, the test only works with kvm-hv, but not with kvm-pr */
+        accel = access("/sys/module/kvm_hv", F_OK) ? "tcg" : "kvm:tcg";
         init_bootfile_ppc(bootpath);
-        cmd_src = g_strdup_printf("-machine accel=kvm:tcg -m 256M"
+        cmd_src = g_strdup_printf("-machine accel=%s -m 256M"
                                   " -name pcsource,debug-threads=on"
                                   " -serial file:%s/src_serial"
                                   " -drive file=%s,if=pflash,format=raw",
-                                  tmpfs, bootpath);
-        cmd_dst = g_strdup_printf("-machine accel=kvm:tcg -m 256M"
+                                  accel, tmpfs, bootpath);
+        cmd_dst = g_strdup_printf("-machine accel=%s -m 256M"
                                   " -name pcdest,debug-threads=on"
                                   " -serial file:%s/dest_serial"
                                   " -incoming %s",
-                                  tmpfs, uri);
+                                  accel, tmpfs, uri);
     } else {
         g_assert_not_reached();
     }
 
+    g_free(bootpath);
+
     from = qtest_start(cmd_src);
     g_free(cmd_src);
 
diff --git a/tests/prom-env-test.c b/tests/prom-env-test.c
index 7a628574c3..0ba6f48607 100644
--- a/tests/prom-env-test.c
+++ b/tests/prom-env-test.c
@@ -9,11 +9,12 @@
  * This work is licensed under the terms of the GNU GPL, version 2
  * or later. See the COPYING file in the top-level directory.
  *
- * This test is used to check that some OpenBIOS machines can be started
- * successfully in TCG mode. To do this, we first put some Forth code into
- * the "boot-command" Open Firmware environment variable. This Forth code
- * writes a well-known magic value to a known location in memory. Then we
- * start the guest so that OpenBIOS can boot and finally run the Forth code.
+ * This test is used to check that some Open Firmware based machines (i.e.
+ * OpenBIOS or SLOF) can be started successfully in TCG mode. To do this, we
+ * first put some Forth code into the "boot-command" Open Firmware environment
+ * variable. This Forth code writes a well-known magic value to a known location
+ * in memory. Then we start the guest so that the firmware can boot and finally
+ * run the Forth code.
  * The testing code here then can finally check whether the value has been
  * successfully written into the guest memory.
  */
@@ -71,13 +72,16 @@ int main(int argc, char *argv[])
 {
     const char *sparc_machines[] = { "SPARCbook", "Voyager", "SS-20", NULL };
     const char *sparc64_machines[] = { "sun4u", "sun4v", NULL };
-    const char *mac_machines[] = { "mac99", "g3beige", NULL };
+    const char *ppc_machines[] = { "mac99", "g3beige", NULL };
+    const char *ppc64_machines[] = { "mac99", "g3beige", "pseries", NULL };
     const char *arch = qtest_get_arch();
 
     g_test_init(&argc, &argv, NULL);
 
-    if (!strcmp(arch, "ppc") || !strcmp(arch, "ppc64")) {
-        add_tests(mac_machines);
+    if (!strcmp(arch, "ppc")) {
+        add_tests(ppc_machines);
+    } else if (!strcmp(arch, "ppc64")) {
+        add_tests(ppc64_machines);
     } else if (!strcmp(arch, "sparc")) {
         add_tests(sparc_machines);
     } else if (!strcmp(arch, "sparc64")) {
diff --git a/tests/ptimer-test-stubs.c b/tests/ptimer-test-stubs.c
new file mode 100644
index 0000000000..21d4ebb0fe
--- /dev/null
+++ b/tests/ptimer-test-stubs.c
@@ -0,0 +1,114 @@
+/*
+ * Stubs for the ptimer-test
+ *
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "sysemu/replay.h"
+#include "migration/vmstate.h"
+
+#include "ptimer-test.h"
+
+const VMStateInfo vmstate_info_uint8;
+const VMStateInfo vmstate_info_uint32;
+const VMStateInfo vmstate_info_uint64;
+const VMStateInfo vmstate_info_int64;
+const VMStateInfo vmstate_info_timer;
+
+struct QEMUBH {
+    QEMUBHFunc *cb;
+    void *opaque;
+};
+
+QEMUTimerListGroup main_loop_tlg;
+
+int64_t ptimer_test_time_ns;
+
+void timer_init_tl(QEMUTimer *ts,
+                   QEMUTimerList *timer_list, int scale,
+                   QEMUTimerCB *cb, void *opaque)
+{
+    ts->timer_list = timer_list;
+    ts->cb = cb;
+    ts->opaque = opaque;
+    ts->scale = scale;
+    ts->expire_time = -1;
+}
+
+void timer_mod(QEMUTimer *ts, int64_t expire_time)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+    QEMUTimer *t = &timer_list->active_timers;
+
+    while (t->next != NULL) {
+        if (t->next == ts) {
+            break;
+        }
+
+        t = t->next;
+    }
+
+    ts->expire_time = MAX(expire_time * ts->scale, 0);
+    ts->next = NULL;
+    t->next = ts;
+}
+
+void timer_del(QEMUTimer *ts)
+{
+    QEMUTimerList *timer_list = ts->timer_list;
+    QEMUTimer *t = &timer_list->active_timers;
+
+    while (t->next != NULL) {
+        if (t->next == ts) {
+            t->next = ts->next;
+            return;
+        }
+
+        t = t->next;
+    }
+}
+
+int64_t qemu_clock_get_ns(QEMUClockType type)
+{
+    return ptimer_test_time_ns;
+}
+
+int64_t qemu_clock_deadline_ns_all(QEMUClockType type)
+{
+    QEMUTimerList *timer_list = main_loop_tlg.tl[type];
+    QEMUTimer *t = timer_list->active_timers.next;
+    int64_t deadline = -1;
+
+    while (t != NULL) {
+        if (deadline == -1) {
+            deadline = t->expire_time;
+        } else {
+            deadline = MIN(deadline, t->expire_time);
+        }
+
+        t = t->next;
+    }
+
+    return deadline;
+}
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
+{
+    QEMUBH *bh = g_new(QEMUBH, 1);
+
+    bh->cb = cb;
+    bh->opaque = opaque;
+
+    return bh;
+}
+
+void replay_bh_schedule_event(QEMUBH *bh)
+{
+    bh->cb(bh->opaque);
+}
diff --git a/tests/ptimer-test.c b/tests/ptimer-test.c
new file mode 100644
index 0000000000..b36a476483
--- /dev/null
+++ b/tests/ptimer-test.c
@@ -0,0 +1,788 @@
+/*
+ * QTest testcase for the ptimer
+ *
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <glib/gprintf.h>
+
+#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "hw/ptimer.h"
+
+#include "libqtest.h"
+#include "ptimer-test.h"
+
+static bool triggered;
+
+static void ptimer_trigger(void *opaque)
+{
+    triggered = true;
+}
+
+static void ptimer_test_expire_qemu_timers(int64_t expire_time,
+                                           QEMUClockType type)
+{
+    QEMUTimerList *timer_list = main_loop_tlg.tl[type];
+    QEMUTimer *t = timer_list->active_timers.next;
+
+    while (t != NULL) {
+        if (t->expire_time == expire_time) {
+            timer_del(t);
+
+            if (t->cb != NULL) {
+                t->cb(t->opaque);
+            }
+        }
+
+        t = t->next;
+    }
+}
+
+static void ptimer_test_set_qemu_time_ns(int64_t ns)
+{
+    ptimer_test_time_ns = ns;
+}
+
+static void qemu_clock_step(uint64_t ns)
+{
+    int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+    int64_t advanced_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns;
+
+    while (deadline != -1 && deadline <= advanced_time) {
+        ptimer_test_set_qemu_time_ns(deadline);
+        ptimer_test_expire_qemu_timers(deadline, QEMU_CLOCK_VIRTUAL);
+        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+    }
+
+    ptimer_test_set_qemu_time_ns(advanced_time);
+}
+
+static void check_set_count(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+
+    triggered = false;
+
+    ptimer_set_count(ptimer, 1000);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 1000);
+    g_assert_false(triggered);
+}
+
+static void check_set_limit(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+
+    triggered = false;
+
+    ptimer_set_limit(ptimer, 1000, 0);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 1000);
+    g_assert_false(triggered);
+
+    ptimer_set_limit(ptimer, 2000, 1);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 2000);
+    g_assert_cmpuint(ptimer_get_limit(ptimer), ==, 2000);
+    g_assert_false(triggered);
+}
+
+static void check_oneshot(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_set_count(ptimer, 10);
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 * 2 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 8 : 7);
+    g_assert_false(triggered);
+
+    ptimer_stop(ptimer);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 8 : 7);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 11);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 8 : 7);
+    g_assert_false(triggered);
+
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 * 7 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 1 : 0);
+
+    if (no_round_down) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+
+        triggered = false;
+    }
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (no_round_down) {
+        g_assert_true(triggered);
+
+        triggered = false;
+    } else {
+        g_assert_false(triggered);
+    }
+
+    qemu_clock_step(4000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_false(triggered);
+
+    ptimer_set_count(ptimer, 10);
+
+    qemu_clock_step(20000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 10);
+    g_assert_false(triggered);
+
+    ptimer_set_limit(ptimer, 9, 1);
+
+    qemu_clock_step(20000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 9);
+    g_assert_false(triggered);
+
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 8 : 7);
+    g_assert_false(triggered);
+
+    ptimer_set_count(ptimer, 20);
+
+    qemu_clock_step(2000000 * 19 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 1 : 0);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+
+    ptimer_stop(ptimer);
+
+    triggered = false;
+
+    qemu_clock_step(2000000 * 12 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_false(triggered);
+}
+
+static void check_periodic(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool wrap_policy = (*policy & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD);
+    bool no_immediate_trigger = (*policy & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER);
+    bool no_immediate_reload = (*policy & PTIMER_POLICY_NO_IMMEDIATE_RELOAD);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_set_limit(ptimer, 10, 1);
+    ptimer_run(ptimer, 0);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 10);
+    g_assert_false(triggered);
+
+    qemu_clock_step(1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 10 : 9);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 10 - 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, wrap_policy ? 0 : 10);
+    g_assert_true(triggered);
+
+    qemu_clock_step(1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     wrap_policy ? 0 : (no_round_down ? 10 : 9));
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 9 : 8) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+
+    ptimer_set_count(ptimer, 20);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 20);
+    g_assert_false(triggered);
+
+    qemu_clock_step(1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 20 : 19);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 11 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 9 : 8);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 10);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 9 : 8) + (wrap_policy ? 1 : 0));
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    ptimer_set_count(ptimer, 3);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 3);
+    g_assert_false(triggered);
+
+    qemu_clock_step(1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 3 : 2);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 4);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 9 : 8) + (wrap_policy ? 1 : 0));
+    g_assert_true(triggered);
+
+    ptimer_stop(ptimer);
+    triggered = false;
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 9 : 8) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+
+    ptimer_set_count(ptimer, 3);
+    ptimer_run(ptimer, 0);
+
+    qemu_clock_step(2000000 * 3 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     wrap_policy ? 0 : (no_round_down ? 10 : 9));
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 9 : 8) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+
+    ptimer_set_count(ptimer, 0);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     no_immediate_reload ? 0 : 10);
+
+    if (no_immediate_trigger) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+    }
+
+    triggered = false;
+
+    qemu_clock_step(1);
+
+    if (no_immediate_reload) {
+        g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+        g_assert_false(triggered);
+
+        qemu_clock_step(2000000);
+
+        if (no_immediate_trigger) {
+            g_assert_true(triggered);
+        } else {
+            g_assert_false(triggered);
+        }
+
+        triggered = false;
+    }
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 10 : 9);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 12);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
+    g_assert_true(triggered);
+
+    ptimer_stop(ptimer);
+
+    triggered = false;
+
+    qemu_clock_step(2000000 * 10);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+
+    ptimer_run(ptimer, 0);
+    ptimer_set_period(ptimer, 0);
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    (no_round_down ? 8 : 7) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+}
+
+static void check_on_the_fly_mode_change(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool wrap_policy = (*policy & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_set_limit(ptimer, 10, 1);
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 * 9 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 1 : 0);
+    g_assert_false(triggered);
+
+    ptimer_run(ptimer, 0);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 1 : 0);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    wrap_policy ? 0 : (no_round_down ? 10 : 9));
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    qemu_clock_step(2000000 * 9);
+
+    ptimer_run(ptimer, 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     (no_round_down ? 1 : 0) + (wrap_policy ? 1 : 0));
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 3);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+}
+
+static void check_on_the_fly_period_change(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_set_limit(ptimer, 8, 1);
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 * 4 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 4 : 3);
+    g_assert_false(triggered);
+
+    ptimer_set_period(ptimer, 4000000);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 4 : 3);
+
+    qemu_clock_step(4000000 * 2 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 2 : 0);
+    g_assert_false(triggered);
+
+    qemu_clock_step(4000000 * 2);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+}
+
+static void check_on_the_fly_freq_change(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_freq(ptimer, 500);
+    ptimer_set_limit(ptimer, 8, 1);
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(2000000 * 4 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 4 : 3);
+    g_assert_false(triggered);
+
+    ptimer_set_freq(ptimer, 250);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 4 : 3);
+
+    qemu_clock_step(2000000 * 4 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 2 : 0);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 4);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+}
+
+static void check_run_with_period_0(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+
+    triggered = false;
+
+    ptimer_set_count(ptimer, 99);
+    ptimer_run(ptimer, 1);
+
+    qemu_clock_step(10 * NANOSECONDS_PER_SECOND);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 99);
+    g_assert_false(triggered);
+}
+
+static void check_run_with_delta_0(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool wrap_policy = (*policy & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD);
+    bool no_immediate_trigger = (*policy & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER);
+    bool no_immediate_reload = (*policy & PTIMER_POLICY_NO_IMMEDIATE_RELOAD);
+    bool no_round_down = (*policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_set_limit(ptimer, 99, 0);
+    ptimer_run(ptimer, 1);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     no_immediate_reload ? 0 : 99);
+
+    if (no_immediate_trigger) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+    }
+
+    triggered = false;
+
+    if (no_immediate_trigger || no_immediate_reload) {
+        qemu_clock_step(2000000 + 1);
+
+        g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                         no_immediate_reload ? 0 : (no_round_down ? 98 : 97));
+
+        if (no_immediate_trigger && no_immediate_reload) {
+            g_assert_true(triggered);
+
+            triggered = false;
+        } else {
+            g_assert_false(triggered);
+        }
+
+        ptimer_set_count(ptimer, 99);
+        ptimer_run(ptimer, 1);
+    }
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 98 : 97);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 97);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 1 : 0);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 2);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    ptimer_set_count(ptimer, 0);
+    ptimer_run(ptimer, 0);
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                     no_immediate_reload ? 0 : 99);
+
+    if (no_immediate_trigger) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+    }
+
+    triggered = false;
+
+    qemu_clock_step(1);
+
+    if (no_immediate_reload) {
+        qemu_clock_step(2000000);
+    }
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 99 : 98);
+
+    if (no_immediate_reload && no_immediate_trigger) {
+        g_assert_true(triggered);
+    } else {
+        g_assert_false(triggered);
+    }
+
+    triggered = false;
+
+    qemu_clock_step(2000000);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, no_round_down ? 98 : 97);
+    g_assert_false(triggered);
+
+    qemu_clock_step(2000000 * 98);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==,
+                    wrap_policy ? 0 : (no_round_down ? 99 : 98));
+    g_assert_true(triggered);
+
+    ptimer_stop(ptimer);
+}
+
+static void check_periodic_with_load_0(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool continuous_trigger = (*policy & PTIMER_POLICY_CONTINUOUS_TRIGGER);
+    bool no_immediate_trigger = (*policy & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_run(ptimer, 0);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (no_immediate_trigger) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+    }
+
+    triggered = false;
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (continuous_trigger || no_immediate_trigger) {
+        g_assert_true(triggered);
+    } else {
+        g_assert_false(triggered);
+    }
+
+    triggered = false;
+
+    ptimer_set_count(ptimer, 10);
+    ptimer_run(ptimer, 0);
+
+    qemu_clock_step(2000000 * 10 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+    g_assert_true(triggered);
+
+    triggered = false;
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (continuous_trigger) {
+        g_assert_true(triggered);
+    } else {
+        g_assert_false(triggered);
+    }
+
+    ptimer_stop(ptimer);
+}
+
+static void check_oneshot_with_load_0(gconstpointer arg)
+{
+    const uint8_t *policy = arg;
+    QEMUBH *bh = qemu_bh_new(ptimer_trigger, NULL);
+    ptimer_state *ptimer = ptimer_init(bh, *policy);
+    bool no_immediate_trigger = (*policy & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER);
+
+    triggered = false;
+
+    ptimer_set_period(ptimer, 2000000);
+    ptimer_run(ptimer, 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (no_immediate_trigger) {
+        g_assert_false(triggered);
+    } else {
+        g_assert_true(triggered);
+    }
+
+    triggered = false;
+
+    qemu_clock_step(2000000 + 1);
+
+    g_assert_cmpuint(ptimer_get_count(ptimer), ==, 0);
+
+    if (no_immediate_trigger) {
+        g_assert_true(triggered);
+    } else {
+        g_assert_false(triggered);
+    }
+}
+
+static void add_ptimer_tests(uint8_t policy)
+{
+    uint8_t *ppolicy = g_malloc(1);
+    char *policy_name = g_malloc0(256);
+
+    *ppolicy = policy;
+
+    if (policy == PTIMER_POLICY_DEFAULT) {
+        g_sprintf(policy_name, "default");
+    }
+
+    if (policy & PTIMER_POLICY_WRAP_AFTER_ONE_PERIOD) {
+        g_strlcat(policy_name, "wrap_after_one_period,", 256);
+    }
+
+    if (policy & PTIMER_POLICY_CONTINUOUS_TRIGGER) {
+        g_strlcat(policy_name, "continuous_trigger,", 256);
+    }
+
+    if (policy & PTIMER_POLICY_NO_IMMEDIATE_TRIGGER) {
+        g_strlcat(policy_name, "no_immediate_trigger,", 256);
+    }
+
+    if (policy & PTIMER_POLICY_NO_IMMEDIATE_RELOAD) {
+        g_strlcat(policy_name, "no_immediate_reload,", 256);
+    }
+
+    if (policy & PTIMER_POLICY_NO_COUNTER_ROUND_DOWN) {
+        g_strlcat(policy_name, "no_counter_rounddown,", 256);
+    }
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/set_count policy=%s", policy_name),
+        ppolicy, check_set_count);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/set_limit policy=%s", policy_name),
+        ppolicy, check_set_limit);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/oneshot policy=%s", policy_name),
+        ppolicy, check_oneshot);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/periodic policy=%s", policy_name),
+        ppolicy, check_periodic);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/on_the_fly_mode_change policy=%s", policy_name),
+        ppolicy, check_on_the_fly_mode_change);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/on_the_fly_period_change policy=%s", policy_name),
+        ppolicy, check_on_the_fly_period_change);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/on_the_fly_freq_change policy=%s", policy_name),
+        ppolicy, check_on_the_fly_freq_change);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/run_with_period_0 policy=%s", policy_name),
+        ppolicy, check_run_with_period_0);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/run_with_delta_0 policy=%s", policy_name),
+        ppolicy, check_run_with_delta_0);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/periodic_with_load_0 policy=%s", policy_name),
+        ppolicy, check_periodic_with_load_0);
+
+    g_test_add_data_func(
+        g_strdup_printf("/ptimer/oneshot_with_load_0 policy=%s", policy_name),
+        ppolicy, check_oneshot_with_load_0);
+}
+
+static void add_all_ptimer_policies_comb_tests(void)
+{
+    int last_policy = PTIMER_POLICY_NO_COUNTER_ROUND_DOWN;
+    int policy = PTIMER_POLICY_DEFAULT;
+
+    for (; policy < (last_policy << 1); policy++) {
+        add_ptimer_tests(policy);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    int i;
+
+    g_test_init(&argc, &argv, NULL);
+
+    for (i = 0; i < QEMU_CLOCK_MAX; i++) {
+        main_loop_tlg.tl[i] = g_new0(QEMUTimerList, 1);
+    }
+
+    add_all_ptimer_policies_comb_tests();
+
+    qtest_allowed = true;
+
+    return g_test_run();
+}
diff --git a/tests/ptimer-test.h b/tests/ptimer-test.h
new file mode 100644
index 0000000000..09ac56da9e
--- /dev/null
+++ b/tests/ptimer-test.h
@@ -0,0 +1,22 @@
+/*
+ * QTest testcase for the ptimer
+ *
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef PTIMER_TEST_H
+#define PTIMER_TEST_H
+
+extern bool qtest_allowed;
+
+extern int64_t ptimer_test_time_ns;
+
+struct QEMUTimerList {
+    QEMUTimer active_timers;
+};
+
+#endif
diff --git a/tests/pxe-test.c b/tests/pxe-test.c
index b2cc355a95..34282d3704 100644
--- a/tests/pxe-test.c
+++ b/tests/pxe-test.c
@@ -19,16 +19,16 @@
 
 #define NETNAME "net0"
 
-static const char *disk = "tests/pxe-test-disk.raw";
+static char disk[] = "tests/pxe-test-disk-XXXXXX";
 
-static void test_pxe_one(const char *params)
+static void test_pxe_one(const char *params, bool ipv6)
 {
     char *args;
 
-    args = g_strdup_printf("-machine accel=tcg "
-                           "-netdev user,id=" NETNAME ",tftp=./,bootfile=%s "
-                           "%s ",
-                           disk, params);
+    args = g_strdup_printf("-machine accel=tcg -nodefaults -boot order=n "
+                           "-netdev user,id=" NETNAME ",tftp=./,bootfile=%s,"
+                           "ipv4=%s,ipv6=%s %s", disk, ipv6 ? "off" : "on",
+                           ipv6 ? "on" : "off", params);
 
     qtest_start(args);
     boot_sector_test();
@@ -38,12 +38,17 @@ static void test_pxe_one(const char *params)
 
 static void test_pxe_e1000(void)
 {
-    test_pxe_one("-device e1000,netdev=" NETNAME);
+    test_pxe_one("-device e1000,netdev=" NETNAME, false);
 }
 
 static void test_pxe_virtio_pci(void)
 {
-    test_pxe_one("-device virtio-net-pci,netdev=" NETNAME);
+    test_pxe_one("-device virtio-net-pci,netdev=" NETNAME, false);
+}
+
+static void test_pxe_spapr_vlan(void)
+{
+    test_pxe_one("-device spapr-vlan,netdev=" NETNAME, true);
 }
 
 int main(int argc, char *argv[])
@@ -60,6 +65,9 @@ int main(int argc, char *argv[])
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
         qtest_add_func("pxe/e1000", test_pxe_e1000);
         qtest_add_func("pxe/virtio", test_pxe_virtio_pci);
+    } else if (strcmp(arch, "ppc64") == 0) {
+        qtest_add_func("pxe/virtio", test_pxe_virtio_pci);
+        qtest_add_func("pxe/spapr-vlan", test_pxe_spapr_vlan);
     }
     ret = g_test_run();
     boot_sector_cleanup(disk);
diff --git a/tests/q35-test.c b/tests/q35-test.c
index 71538fc17c..763fe3d6ae 100644
--- a/tests/q35-test.c
+++ b/tests/q35-test.c
@@ -42,7 +42,7 @@ static void test_smram_lock(void)
     QPCIDevice *pcidev;
     QDict *response;
 
-    pcibus = qpci_init_pc();
+    pcibus = qpci_init_pc(NULL);
     g_assert(pcibus != NULL);
 
     pcidev = qpci_device_find(pcibus, 0);
diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out
index 853173572b..59b8f74cd4 100644
--- a/tests/qemu-iotests/026.out
+++ b/tests/qemu-iotests/026.out
@@ -14,6 +14,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_update; errno: 5; imm: off; once: off; write
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 
@@ -22,6 +23,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_update; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 
@@ -40,6 +42,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_update; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -48,6 +51,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_update; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -286,12 +290,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_load; errno: 5; imm: off; once: off; write
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_load; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -308,12 +314,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_load; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_load; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -330,12 +338,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_update_part; errno: 5; imm: off; once: off; write
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_update_part; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -352,12 +362,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_update_part; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_update_part; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -374,12 +386,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc; errno: 5; imm: off; once: off; write
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -396,12 +410,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -513,6 +529,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc_write_blocks; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -521,6 +538,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc_write_blocks; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -539,6 +557,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc_write_table; errno: 28; imm: off; once: off; write
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -547,6 +566,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: refblock_alloc_write_table; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -611,6 +631,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_grow_write_table; errno: 5; imm: off; once: off
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -622,6 +643,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
 
 Event: l1_grow_write_table; errno: 28; imm: off; once: off
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache
index 672d77c6ec..b4aeebcc61 100644
--- a/tests/qemu-iotests/026.out.nocache
+++ b/tests/qemu-iotests/026.out.nocache
@@ -14,6 +14,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_update; errno: 5; imm: off; once: off; write 
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 
@@ -22,6 +23,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_update; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 
@@ -40,6 +42,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_update; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -48,6 +51,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_update; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -294,12 +298,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_load; errno: 5; imm: off; once: off; write 
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_load; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -316,12 +322,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_load; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_load; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -338,12 +346,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_update_part; errno: 5; imm: off; once: off; write 
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_update_part; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -360,12 +370,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_update_part; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_update_part; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -382,12 +394,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc; errno: 5; imm: off; once: off; write 
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc; errno: 5; imm: off; once: off; write -b
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -404,12 +418,14 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
@@ -521,6 +537,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc_write_blocks; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -529,6 +546,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc_write_blocks; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -547,6 +565,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc_write_table; errno: 28; imm: off; once: off; write 
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -555,6 +574,7 @@ This means waste of disk space, but no harm to data.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: refblock_alloc_write_table; errno: 28; imm: off; once: off; write -b
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 
@@ -619,6 +639,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow_write_table; errno: 5; imm: off; once: off
+Failed to flush the L2 table cache: Input/output error
 Failed to flush the refcount block cache: Input/output error
 write failed: Input/output error
 No errors were found on the image.
@@ -630,6 +651,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 
 
 Event: l1_grow_write_table; errno: 28; imm: off; once: off
+Failed to flush the L2 table cache: No space left on device
 Failed to flush the refcount block cache: No space left on device
 write failed: No space left on device
 No errors were found on the image.
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 3ac2443e5b..54db54a1ea 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -36,7 +36,7 @@ class TestSingleDrive(iotests.QMPTestCase):
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
         qemu_io('-f', 'raw', '-c', 'write -P 0x1 0 512', backing_img)
         qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x1 524288 512', mid_img)
-        self.vm = iotests.VM().add_drive("blkdebug::" + test_img)
+        self.vm = iotests.VM().add_drive("blkdebug::" + test_img, "backing.node-name=mid")
         self.vm.launch()
 
     def tearDown(self):
@@ -60,6 +60,25 @@ class TestSingleDrive(iotests.QMPTestCase):
                          qemu_io('-f', iotests.imgfmt, '-c', 'map', test_img),
                          'image file map does not match backing file after streaming')
 
+    def test_stream_intermediate(self):
+        self.assert_no_active_block_jobs()
+
+        self.assertNotEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+                            qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+                            'image file map matches backing file before streaming')
+
+        result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed(drive='stream-mid')
+
+        self.assert_no_active_block_jobs()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-f', 'raw', '-c', 'map', backing_img),
+                         qemu_io('-f', iotests.imgfmt, '-c', 'map', mid_img),
+                         'image file map does not match backing file after streaming')
+
     def test_stream_pause(self):
         self.assert_no_active_block_jobs()
 
@@ -126,8 +145,300 @@ class TestSingleDrive(iotests.QMPTestCase):
 
     def test_device_not_found(self):
         result = self.vm.qmp('block-stream', device='nonexistent')
-        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+
+class TestParallelOps(iotests.QMPTestCase):
+    num_ops = 4 # Number of parallel block-stream operations
+    num_imgs = num_ops * 2 + 1
+    image_len = num_ops * 1024 * 1024
+    imgs = []
+
+    def setUp(self):
+        opts = []
+        self.imgs = []
+
+        # Initialize file names and command-line options
+        for i in range(self.num_imgs):
+            img_depth = self.num_imgs - i - 1
+            opts.append("backing." * img_depth + "node-name=node%d" % i)
+            self.imgs.append(os.path.join(iotests.test_dir, 'img-%d.img' % i))
+
+        # Create all images
+        iotests.create_image(self.imgs[0], self.image_len)
+        for i in range(1, self.num_imgs):
+            qemu_img('create', '-f', iotests.imgfmt,
+                     '-o', 'backing_file=%s' % self.imgs[i-1], self.imgs[i])
+
+        # Put data into the images we are copying data from
+        for i in range(self.num_imgs / 2):
+            img_index = i * 2 + 1
+            # Alternate between 512k and 1M.
+            # This way jobs will not finish in the same order they were created
+            num_kb = 512 + 512 * (i % 2)
+            qemu_io('-f', iotests.imgfmt,
+                    '-c', 'write -P %d %d %d' % (i, i*1024*1024, num_kb * 1024),
+                    self.imgs[img_index])
+
+        # Attach the drive to the VM
+        self.vm = iotests.VM()
+        self.vm.add_drive(self.imgs[-1], ','.join(opts))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for img in self.imgs:
+            os.remove(img)
+
+    # Test that it's possible to run several block-stream operations
+    # in parallel in the same snapshot chain
+    def test_stream_parallel(self):
+        self.assert_no_active_block_jobs()
+
+        # Check that the maps don't match before the streaming operations
+        for i in range(2, self.num_imgs, 2):
+            self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]),
+                                qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]),
+                                'image file map matches backing file before streaming')
+
+        # Create all streaming jobs
+        pending_jobs = []
+        for i in range(2, self.num_imgs, 2):
+            node_name = 'node%d' % i
+            job_id = 'stream-%s' % node_name
+            pending_jobs.append(job_id)
+            result = self.vm.qmp('block-stream', device=node_name, job_id=job_id, base=self.imgs[i-2], speed=512*1024)
+            self.assert_qmp(result, 'return', {})
+
+        # Wait for all jobs to be finished.
+        while len(pending_jobs) > 0:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    job_id = self.dictpath(event, 'data/device')
+                    self.assertTrue(job_id in pending_jobs)
+                    self.assert_qmp_absent(event, 'data/error')
+                    pending_jobs.remove(job_id)
+
+        self.assert_no_active_block_jobs()
+        self.vm.shutdown()
+
+        # Check that all maps match now
+        for i in range(2, self.num_imgs, 2):
+            self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i]),
+                             qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[i-1]),
+                             'image file map does not match backing file after streaming')
+
+    # Test that it's not possible to perform two block-stream
+    # operations if there are nodes involved in both.
+    def test_overlapping_1(self):
+        self.assert_no_active_block_jobs()
+
+        # Set a speed limit to make sure that this job blocks the rest
+        result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4', base=self.imgs[1], speed=1024*1024)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-stream', device='node5', job_id='stream-node5', base=self.imgs[2])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3', base=self.imgs[2])
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-stream', device='node4', job_id='stream-node4-v2')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # block-commit should also fail if it touches nodes used by the stream job
+        result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[4], job_id='commit-node4')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[1], top=self.imgs[3], job_id='commit-node1')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # This fails because it needs to modify the backing string in node2, which is blocked
+        result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[0], top=self.imgs[1], job_id='commit-node0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        self.wait_until_completed(drive='stream-node4')
+        self.assert_no_active_block_jobs()
+
+    # Similar to test_overlapping_1, but with block-commit
+    # blocking the other jobs
+    def test_overlapping_2(self):
+        self.assertLessEqual(9, self.num_imgs)
+        self.assert_no_active_block_jobs()
+
+        # Set a speed limit to make sure that this job blocks the rest
+        result = self.vm.qmp('block-commit', device='drive0', top=self.imgs[5], base=self.imgs[3], job_id='commit-node3', speed=1024*1024)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-stream', device='node3', job_id='stream-node3')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-stream', device='node6', base=self.imgs[2], job_id='stream-node6')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], job_id='stream-node4')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        result = self.vm.qmp('block-stream', device='node6', base=self.imgs[4], job_id='stream-node6-v2')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # This fails because block-commit needs to block node6, the overlay of the 'top' image
+        result = self.vm.qmp('block-stream', device='node7', base=self.imgs[5], job_id='stream-node6-v3')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
+        # This fails because block-commit currently blocks the active layer even if it's not used
+        result = self.vm.qmp('block-stream', device='drive0', base=self.imgs[5], job_id='stream-drive0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        self.wait_until_completed(drive='commit-node3')
+
+    # Similar to test_overlapping_2, but here block-commit doesn't use the 'top' parameter.
+    # Internally this uses a mirror block job, hence the separate test case.
+    def test_overlapping_3(self):
+        self.assertLessEqual(8, self.num_imgs)
+        self.assert_no_active_block_jobs()
+
+        # Set a speed limit to make sure that this job blocks the rest
+        result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3], job_id='commit-drive0', speed=1024*1024)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-stream', device='node5', base=self.imgs[3], job_id='stream-node6')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        event = self.vm.get_qmp_event(wait=True)
+        self.assertEqual(event['event'], 'BLOCK_JOB_READY')
+        self.assert_qmp(event, 'data/device', 'commit-drive0')
+        self.assert_qmp(event, 'data/type', 'commit')
+        self.assert_qmp_absent(event, 'data/error')
+
+        result = self.vm.qmp('block-job-complete', device='commit-drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed(drive='commit-drive0')
+
+    # Test a block-stream and a block-commit job in parallel
+    def test_stream_commit(self):
+        self.assertLessEqual(8, self.num_imgs)
+        self.assert_no_active_block_jobs()
+
+        # Stream from node0 into node2
+        result = self.vm.qmp('block-stream', device='node2', job_id='node2')
+        self.assert_qmp(result, 'return', {})
+
+        # Commit from the active layer into node3
+        result = self.vm.qmp('block-commit', device='drive0', base=self.imgs[3])
+        self.assert_qmp(result, 'return', {})
+
+        # Wait for all jobs to be finished.
+        pending_jobs = ['node2', 'drive0']
+        while len(pending_jobs) > 0:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    node_name = self.dictpath(event, 'data/device')
+                    self.assertTrue(node_name in pending_jobs)
+                    self.assert_qmp_absent(event, 'data/error')
+                    pending_jobs.remove(node_name)
+                if event['event'] == 'BLOCK_JOB_READY':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assertTrue('drive0' in pending_jobs)
+                    self.vm.qmp('block-job-complete', device='drive0')
+
+        self.assert_no_active_block_jobs()
+
+    # Test the base_node parameter
+    def test_stream_base_node_name(self):
+        self.assert_no_active_block_jobs()
+
+        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]),
+                            qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]),
+                            'image file map matches backing file before streaming')
+
+        # Error: the base node does not exist
+        result = self.vm.qmp('block-stream', device='node4', base_node='none', job_id='stream')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # Error: the base node is not a backing file of the top node
+        result = self.vm.qmp('block-stream', device='node4', base_node='node6', job_id='stream')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # Error: the base node is the same as the top node
+        result = self.vm.qmp('block-stream', device='node4', base_node='node4', job_id='stream')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # Error: cannot specify 'base' and 'base-node' at the same time
+        result = self.vm.qmp('block-stream', device='node4', base=self.imgs[2], base_node='node2', job_id='stream')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        # Success: the base node is a backing file of the top node
+        result = self.vm.qmp('block-stream', device='node4', base_node='node2', job_id='stream')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed(drive='stream')
+
+        self.assert_no_active_block_jobs()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[4]),
+                         qemu_io('-f', iotests.imgfmt, '-c', 'map', self.imgs[3]),
+                         'image file map matches backing file after streaming')
+
+class TestQuorum(iotests.QMPTestCase):
+    num_children = 3
+    children = []
+    backing = []
+
+    def setUp(self):
+        opts = ['driver=quorum', 'vote-threshold=2']
+
+        # Initialize file names and command-line options
+        for i in range(self.num_children):
+            child_img = os.path.join(iotests.test_dir, 'img-%d.img' % i)
+            backing_img = os.path.join(iotests.test_dir, 'backing-%d.img' % i)
+            self.children.append(child_img)
+            self.backing.append(backing_img)
+            qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
+            qemu_io('-f', iotests.imgfmt,
+                    '-c', 'write -P 0x55 0 1024', backing_img)
+            qemu_img('create', '-f', iotests.imgfmt,
+                     '-o', 'backing_file=%s' % backing_img, child_img)
+            opts.append("children.%d.file.filename=%s" % (i, child_img))
+            opts.append("children.%d.node-name=node%d" % (i, i))
+
+        # Attach the drive to the VM
+        self.vm = iotests.VM()
+        self.vm.add_drive(path = None, opts = ','.join(opts))
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for img in self.children:
+            os.remove(img)
+        for img in self.backing:
+            os.remove(img)
+
+    def test_stream_quorum(self):
+        if not iotests.supports_quorum():
+            return
+
+        self.assertNotEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]),
+                            qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]),
+                            'image file map matches backing file before streaming')
+
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp('block-stream', device='node0', job_id='stream-node0')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed(drive='stream-node0')
+
+        self.assert_no_active_block_jobs()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-f', iotests.imgfmt, '-c', 'map', self.children[0]),
+                         qemu_io('-f', iotests.imgfmt, '-c', 'map', self.backing[0]),
+                         'image file map does not match backing file after streaming')
 
 class TestSmallerBackingFile(iotests.QMPTestCase):
     backing_len = 1 * 1024 * 1024 # MB
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 6323079e08..84bfd63fba 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-..............
+......................
 ----------------------------------------------------------------------
-Ran 14 tests
+Ran 22 tests
 
 OK
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index cbf5e0ba5c..bc6cf782fe 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -38,7 +38,6 @@ class TestSingleDrive(iotests.QMPTestCase):
     image_len = 1 * 1024 * 1024 # MB
     qmp_cmd = 'drive-mirror'
     qmp_target = target_img
-    not_found_error = 'DeviceNotFound'
 
     def setUp(self):
         iotests.create_image(backing_img, self.image_len)
@@ -176,7 +175,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 
         result = self.vm.qmp(self.qmp_cmd, device='ide1-cd0', sync='full',
                              target=self.qmp_target)
-        self.assert_qmp(result, 'error/class', self.not_found_error)
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_image_not_found(self):
         result = self.vm.qmp(self.qmp_cmd, device='drive0', sync='full',
@@ -186,20 +185,18 @@ class TestSingleDrive(iotests.QMPTestCase):
     def test_device_not_found(self):
         result = self.vm.qmp(self.qmp_cmd, device='nonexistent', sync='full',
                              target=self.qmp_target)
-        self.assert_qmp(result, 'error/class', self.not_found_error)
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
 class TestSingleBlockdev(TestSingleDrive):
     qmp_cmd = 'blockdev-mirror'
     qmp_target = 'node1'
-    not_found_error = 'GenericError'
 
     def setUp(self):
         TestSingleDrive.setUp(self)
         qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img)
-        args = {'options':
-                    {'driver': iotests.imgfmt,
-                     'node-name': self.qmp_target,
-                     'file': { 'filename': target_img, 'driver': 'file' } } }
+        args = {'driver': iotests.imgfmt,
+                'node-name': self.qmp_target,
+                'file': { 'filename': target_img, 'driver': 'file' } }
         result = self.vm.qmp("blockdev-add", **args)
         self.assert_qmp(result, 'return', {})
 
@@ -763,9 +760,6 @@ class TestRepairQuorum(iotests.QMPTestCase):
     image_len = 1 * 1024 * 1024 # MB
     IMAGES = [ quorum_img1, quorum_img2, quorum_img3 ]
 
-    def has_quorum(self):
-        return 'quorum' in iotests.qemu_img_pipe('--help')
-
     def setUp(self):
         self.vm = iotests.VM()
 
@@ -784,9 +778,9 @@ class TestRepairQuorum(iotests.QMPTestCase):
         self.vm.launch()
 
         #assemble the quorum block device from the individual files
-        args = { "options" : { "driver": "quorum", "id": "quorum0",
-                 "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] } }
-        if self.has_quorum():
+        args = { "driver": "quorum", "node-name": "quorum0",
+                 "vote-threshold": 2, "children": [ "img0", "img1", "img2" ] }
+        if iotests.supports_quorum():
             result = self.vm.qmp("blockdev-add", **args)
             self.assert_qmp(result, 'return', {})
 
@@ -801,18 +795,17 @@ class TestRepairQuorum(iotests.QMPTestCase):
                 pass
 
     def test_complete(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name="repair0",
-                             replaces="img1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name="repair0", replaces="img1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
-        self.complete_and_wait(drive="quorum0")
+        self.complete_and_wait(drive="job0")
         self.assert_has_block_node("repair0", quorum_repair_img)
         # TODO: a better test requiring some QEMU infrastructure will be added
         #       to check that this file is really driven by quorum
@@ -821,36 +814,34 @@ class TestRepairQuorum(iotests.QMPTestCase):
                         'target image does not match source after mirroring')
 
     def test_cancel(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name="repair0",
-                             replaces="img1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name="repair0", replaces="img1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
-        self.cancel_and_wait(drive="quorum0", force=True)
+        self.cancel_and_wait(drive="job0", force=True)
         # here we check that the last registered quorum file has not been
         # swapped out and unref
         self.assert_has_block_node(None, quorum_img3)
         self.vm.shutdown()
 
     def test_cancel_after_ready(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name="repair0",
-                             replaces="img1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name="repair0", replaces="img1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
-        self.wait_ready_and_cancel(drive="quorum0")
+        self.wait_ready_and_cancel(drive="job0")
         # here we check that the last registered quorum file has not been
         # swapped out and unref
         self.assert_has_block_node(None, quorum_img3)
@@ -859,18 +850,17 @@ class TestRepairQuorum(iotests.QMPTestCase):
                         'target image does not match source after mirroring')
 
     def test_pause(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         self.assert_no_active_block_jobs()
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name="repair0",
-                             replaces="img1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name="repair0", replaces="img1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
-        result = self.vm.qmp('block-job-pause', device='quorum0')
+        result = self.vm.qmp('block-job-pause', device='job0')
         self.assert_qmp(result, 'return', {})
 
         time.sleep(1)
@@ -881,22 +871,22 @@ class TestRepairQuorum(iotests.QMPTestCase):
         result = self.vm.qmp('query-block-jobs')
         self.assert_qmp(result, 'return[0]/offset', offset)
 
-        result = self.vm.qmp('block-job-resume', device='quorum0')
+        result = self.vm.qmp('block-job-resume', device='job0')
         self.assert_qmp(result, 'return', {})
 
-        self.complete_and_wait(drive="quorum0")
+        self.complete_and_wait(drive="job0")
         self.vm.shutdown()
         self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img),
                         'target image does not match source after mirroring')
 
     def test_medium_not_found(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         if iotests.qemu_default_machine != 'pc':
             return
 
-        result = self.vm.qmp('drive-mirror', device='drive0', # CD-ROM
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='drive0', # CD-ROM
                              sync='full',
                              node_name='repair0',
                              replaces='img1',
@@ -904,76 +894,73 @@ class TestRepairQuorum(iotests.QMPTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_image_not_found(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name='repair0',
-                             replaces='img1',
-                             mode='existing',
-                             target=quorum_repair_img, format=iotests.imgfmt)
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name='repair0', replaces='img1',
+                             mode='existing', target=quorum_repair_img,
+                             format=iotests.imgfmt)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_device_not_found(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
-        result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+        result = self.vm.qmp('drive-mirror', job_id='job0',
+                             device='nonexistent', sync='full',
                              node_name='repair0',
                              replaces='img1',
                              target=quorum_repair_img, format=iotests.imgfmt)
-        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_wrong_sync_mode(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
-        result = self.vm.qmp('drive-mirror', device='quorum0',
+        result = self.vm.qmp('drive-mirror', device='quorum0', job_id='job0',
                              node_name='repair0',
                              replaces='img1',
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_no_node_name(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             replaces='img1',
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', replaces='img1',
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_nonexistent_replaces(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name='repair0',
-                             replaces='img77',
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name='repair0', replaces='img77',
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_after_a_quorum_snapshot(self):
-        if not self.has_quorum():
+        if not iotests.supports_quorum():
             return
 
         result = self.vm.qmp('blockdev-snapshot-sync', node_name='img1',
                              snapshot_file=quorum_snapshot_file,
                              snapshot_node_name="snap1");
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name='repair0',
-                             replaces="img1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name='repair0', replaces="img1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'error/class', 'GenericError')
 
-        result = self.vm.qmp('drive-mirror', device='quorum0', sync='full',
-                             node_name='repair0',
-                             replaces="snap1",
+        result = self.vm.qmp('drive-mirror', job_id='job0', device='quorum0',
+                             sync='full', node_name='repair0', replaces="snap1",
                              target=quorum_repair_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
-        self.complete_and_wait(drive="quorum0")
+        self.complete_and_wait('job0')
         self.assert_has_block_node("repair0", quorum_repair_img)
         # TODO: a better test requiring some QEMU infrastructure will be added
         #       to check that this file is really driven by quorum
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index 408d613bc1..42bf4164ca 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -222,7 +222,7 @@ Testing: -drive driver=file
 QEMU_PROG: -drive driver=file: The 'file' block driver requires a file name
 
 Testing: -drive driver=nbd
-QEMU_PROG: -drive driver=nbd: one of path and host must be specified.
+QEMU_PROG: -drive driver=nbd: NBD server address missing
 
 Testing: -drive driver=raw
 QEMU_PROG: -drive driver=raw: Can't use 'raw' as a block driver for the protocol level
@@ -231,7 +231,7 @@ Testing: -drive file.driver=file
 QEMU_PROG: -drive file.driver=file: The 'file' block driver requires a file name
 
 Testing: -drive file.driver=nbd
-QEMU_PROG: -drive file.driver=nbd: one of path and host must be specified.
+QEMU_PROG: -drive file.driver=nbd: NBD server address missing
 
 Testing: -drive file.driver=raw
 QEMU_PROG: -drive file.driver=raw: Can't use 'raw' as a block driver for the protocol level
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index ec6d22229c..603bb768d6 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -316,7 +316,7 @@ Testing: -drive driver=file
 QEMU_PROG: -drive driver=file: The 'file' block driver requires a file name
 
 Testing: -drive driver=nbd
-QEMU_PROG: -drive driver=nbd: one of path and host must be specified.
+QEMU_PROG: -drive driver=nbd: NBD server address missing
 
 Testing: -drive driver=raw
 QEMU_PROG: -drive driver=raw: Can't use 'raw' as a block driver for the protocol level
@@ -325,7 +325,7 @@ Testing: -drive file.driver=file
 QEMU_PROG: -drive file.driver=file: The 'file' block driver requires a file name
 
 Testing: -drive file.driver=nbd
-QEMU_PROG: -drive file.driver=nbd: one of path and host must be specified.
+QEMU_PROG: -drive file.driver=nbd: NBD server address missing
 
 Testing: -drive file.driver=raw
 QEMU_PROG: -drive file.driver=raw: Can't use 'raw' as a block driver for the protocol level
diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055
index c8e3578702..1d3fd04b65 100755
--- a/tests/qemu-iotests/055
+++ b/tests/qemu-iotests/055
@@ -29,17 +29,24 @@ test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 blockdev_target_img = os.path.join(iotests.test_dir, 'blockdev-target.img')
 
-class TestSingleDrive(iotests.QMPTestCase):
-    image_len = 64 * 1024 * 1024 # MB
+image_len = 64 * 1024 * 1024 # MB
+
+def setUpModule():
+    qemu_img('create', '-f', iotests.imgfmt, test_img, str(image_len))
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x11 0 64k', test_img)
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x00 64k 128k', test_img)
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x22 162k 32k', test_img)
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
+    qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x33 67043328 64k', test_img)
+
+def tearDownModule():
+    os.remove(test_img)
+
 
+class TestSingleDrive(iotests.QMPTestCase):
     def setUp(self):
-        # Write data to the image so we can compare later
-        qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleDrive.image_len))
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 0 64k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
-        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
         self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         if iotests.qemu_default_machine == 'pc':
@@ -48,7 +55,6 @@ class TestSingleDrive(iotests.QMPTestCase):
 
     def tearDown(self):
         self.vm.shutdown()
-        os.remove(test_img)
         os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
@@ -134,10 +140,7 @@ class TestSingleDrive(iotests.QMPTestCase):
 
     def do_test_device_not_found(self, cmd, **args):
         result = self.vm.qmp(cmd, **args)
-        if cmd == 'drive-backup':
-            self.assert_qmp(result, 'error/class', 'DeviceNotFound')
-        else:
-            self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_device_not_found(self):
         self.do_test_device_not_found('drive-backup', device='nonexistent',
@@ -158,19 +161,14 @@ class TestSingleDrive(iotests.QMPTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
 
 class TestSetSpeed(iotests.QMPTestCase):
-    image_len = 80 * 1024 * 1024 # MB
-
     def setUp(self):
-        qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len))
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P1 0 512', test_img)
-        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
         self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         self.vm.launch()
 
     def tearDown(self):
         self.vm.shutdown()
-        os.remove(test_img)
         os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
@@ -246,15 +244,8 @@ class TestSetSpeed(iotests.QMPTestCase):
         self.do_test_set_speed_invalid('blockdev-backup',  'drive1')
 
 class TestSingleTransaction(iotests.QMPTestCase):
-    image_len = 64 * 1024 * 1024 # MB
-
     def setUp(self):
-        qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleTransaction.image_len))
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 0 64k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xd5 1M 32k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 32M 124k', test_img)
-        qemu_io('-f', iotests.imgfmt, '-c', 'write -P0xdc 67043328 64k', test_img)
-        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(TestSingleDrive.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, blockdev_target_img, str(image_len))
 
         self.vm = iotests.VM().add_drive(test_img).add_drive(blockdev_target_img)
         if iotests.qemu_default_machine == 'pc':
@@ -263,7 +254,6 @@ class TestSingleTransaction(iotests.QMPTestCase):
 
     def tearDown(self):
         self.vm.shutdown()
-        os.remove(test_img)
         os.remove(blockdev_target_img)
         try:
             os.remove(target_img)
@@ -371,7 +361,7 @@ class TestSingleTransaction(iotests.QMPTestCase):
                           'sync': 'full' },
             }
         ])
-        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
         result = self.vm.qmp('transaction', actions=[{
                 'type': 'blockdev-backup',
@@ -451,5 +441,114 @@ class TestSingleTransaction(iotests.QMPTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_no_active_block_jobs()
 
+
+class TestDriveCompression(iotests.QMPTestCase):
+    image_len = 64 * 1024 * 1024 # MB
+    fmt_supports_compression = [{'type': 'qcow2', 'args': ()},
+                                {'type': 'vmdk', 'args': ('-o', 'subformat=streamOptimized')}]
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(blockdev_target_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
+
+    def do_prepare_drives(self, fmt, args):
+        self.vm = iotests.VM().add_drive(test_img)
+
+        qemu_img('create', '-f', fmt, blockdev_target_img,
+                 str(TestDriveCompression.image_len), *args)
+        self.vm.add_drive(blockdev_target_img, format=fmt)
+
+        self.vm.launch()
+
+    def do_test_compress_complete(self, cmd, format, **args):
+        self.do_prepare_drives(format['type'], format['args'])
+
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, **args)
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed()
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
+                                               iotests.imgfmt, format['type']),
+                        'target image does not match source after backup')
+
+    def test_complete_compress_drive_backup(self):
+        for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_complete('drive-backup', format,
+                                           target=blockdev_target_img, mode='existing')
+
+    def test_complete_compress_blockdev_backup(self):
+        for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_complete('blockdev-backup', format, target='drive1')
+
+    def do_test_compress_cancel(self, cmd, format, **args):
+        self.do_prepare_drives(format['type'], format['args'])
+
+        self.assert_no_active_block_jobs()
+
+        result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, **args)
+        self.assert_qmp(result, 'return', {})
+
+        event = self.cancel_and_wait()
+        self.assert_qmp(event, 'data/type', 'backup')
+
+        self.vm.shutdown()
+
+    def test_compress_cancel_drive_backup(self):
+        for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_cancel('drive-backup', format,
+                                         target=blockdev_target_img, mode='existing')
+
+    def test_compress_cancel_blockdev_backup(self):
+       for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_cancel('blockdev-backup', format, target='drive1')
+
+    def do_test_compress_pause(self, cmd, format, **args):
+        self.do_prepare_drives(format['type'], format['args'])
+
+        self.assert_no_active_block_jobs()
+
+        self.vm.pause_drive('drive0')
+        result = self.vm.qmp(cmd, device='drive0', sync='full', compress=True, **args)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.vm.resume_drive('drive0')
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.wait_until_completed()
+
+        self.vm.shutdown()
+        self.assertTrue(iotests.compare_images(test_img, blockdev_target_img,
+                                               iotests.imgfmt, format['type']),
+                        'target image does not match source after backup')
+
+    def test_compress_pause_drive_backup(self):
+        for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_pause('drive-backup', format,
+                                        target=blockdev_target_img, mode='existing')
+
+    def test_compress_pause_blockdev_backup(self):
+        for format in TestDriveCompression.fmt_supports_compression:
+            self.do_test_compress_pause('blockdev-backup', format, target='drive1')
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['raw', 'qcow2'])
diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out
index 42314e9c00..5ce2f9a2ed 100644
--- a/tests/qemu-iotests/055.out
+++ b/tests/qemu-iotests/055.out
@@ -1,5 +1,5 @@
-........................
+..............................
 ----------------------------------------------------------------------
-Ran 24 tests
+Ran 30 tests
 
 OK
diff --git a/tests/qemu-iotests/057 b/tests/qemu-iotests/057
index 9cdd582e39..9f0a5a3057 100755
--- a/tests/qemu-iotests/057
+++ b/tests/qemu-iotests/057
@@ -182,7 +182,7 @@ class TestSingleTransaction(ImageSnapshotTestCase):
                               'name': 'a' },
                   }]
         result = self.vm.qmp('transaction', actions = actions)
-        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_error_exist(self):
         self.createSnapshotInTransaction(1)
@@ -239,7 +239,7 @@ class TestSnapshotDelete(ImageSnapshotTestCase):
         result = self.vm.qmp('blockdev-snapshot-delete-internal-sync',
                               device = 'drive_error',
                               id = '0')
-        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+        self.assert_qmp(result, 'error/class', 'GenericError')
 
     def test_error_no_id_and_name(self):
         result = self.vm.qmp('blockdev-snapshot-delete-internal-sync',
diff --git a/tests/qemu-iotests/067 b/tests/qemu-iotests/067
index c1df48eded..38d23fce6b 100755
--- a/tests/qemu-iotests/067
+++ b/tests/qemu-iotests/067
@@ -119,23 +119,21 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "disk",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
-{ "execute": "query-block" }
+{ "execute": "query-named-block-nodes" }
 { "execute": "device_add",
    "arguments": { "driver": "virtio-blk", "drive": "disk",
                   "id": "virtio0" } }
 { "execute": "device_del", "arguments": { "id": "virtio0" } }
 { "execute": "system_reset" }
-{ "execute": "query-block" }
+{ "execute": "query-named-block-nodes" }
 { "execute": "quit" }
 EOF
 
diff --git a/tests/qemu-iotests/067.out b/tests/qemu-iotests/067.out
index 7e25a49029..782eae27a0 100644
--- a/tests/qemu-iotests/067.out
+++ b/tests/qemu-iotests/067.out
@@ -258,49 +258,72 @@ Testing:
 {
     "return": [
         {
-            "device": "disk",
-            "locked": false,
-            "removable": true,
-            "inserted": {
-                "iops_rd": 0,
-                "detect_zeroes": "off",
-                "image": {
-                    "virtual-size": 134217728,
-                    "filename": "TEST_DIR/t.qcow2",
-                    "cluster-size": 65536,
-                    "format": "qcow2",
-                    "actual-size": SIZE,
-                    "format-specific": {
-                        "type": "qcow2",
-                        "data": {
-                            "compat": "1.1",
-                            "lazy-refcounts": false,
-                            "refcount-bits": 16,
-                            "corrupt": false
-                        }
-                    },
-                    "dirty-flag": false
-                },
-                "iops_wr": 0,
-                "ro": false,
-                "node-name": "NODE_NAME",
-                "backing_file_depth": 0,
-                "drv": "qcow2",
-                "iops": 0,
-                "bps_wr": 0,
-                "write_threshold": 0,
-                "encrypted": false,
-                "bps": 0,
-                "bps_rd": 0,
-                "cache": {
-                    "no-flush": false,
-                    "direct": false,
-                    "writeback": true
+            "iops_rd": 0,
+            "detect_zeroes": "off",
+            "image": {
+                "virtual-size": 134217728,
+                "filename": "TEST_DIR/t.qcow2",
+                "cluster-size": 65536,
+                "format": "qcow2",
+                "actual-size": SIZE,
+                "format-specific": {
+                    "type": "qcow2",
+                    "data": {
+                        "compat": "1.1",
+                        "lazy-refcounts": false,
+                        "refcount-bits": 16,
+                        "corrupt": false
+                    }
                 },
-                "file": "TEST_DIR/t.qcow2",
-                "encryption_key_missing": false
+                "dirty-flag": false
             },
-            "type": "unknown"
+            "iops_wr": 0,
+            "ro": false,
+            "node-name": "disk",
+            "backing_file_depth": 0,
+            "drv": "qcow2",
+            "iops": 0,
+            "bps_wr": 0,
+            "write_threshold": 0,
+            "encrypted": false,
+            "bps": 0,
+            "bps_rd": 0,
+            "cache": {
+                "no-flush": false,
+                "direct": false,
+                "writeback": true
+            },
+            "file": "TEST_DIR/t.qcow2",
+            "encryption_key_missing": false
+        },
+        {
+            "iops_rd": 0,
+            "detect_zeroes": "off",
+            "image": {
+                "virtual-size": 197120,
+                "filename": "TEST_DIR/t.qcow2",
+                "format": "file",
+                "actual-size": SIZE,
+                "dirty-flag": false
+            },
+            "iops_wr": 0,
+            "ro": false,
+            "node-name": "NODE_NAME",
+            "backing_file_depth": 0,
+            "drv": "file",
+            "iops": 0,
+            "bps_wr": 0,
+            "write_threshold": 0,
+            "encrypted": false,
+            "bps": 0,
+            "bps_rd": 0,
+            "cache": {
+                "no-flush": false,
+                "direct": false,
+                "writeback": true
+            },
+            "file": "TEST_DIR/t.qcow2",
+            "encryption_key_missing": false
         }
     ]
 }
@@ -319,50 +342,72 @@ Testing:
 {
     "return": [
         {
-            "io-status": "ok",
-            "device": "disk",
-            "locked": false,
-            "removable": true,
-            "inserted": {
-                "iops_rd": 0,
-                "detect_zeroes": "off",
-                "image": {
-                    "virtual-size": 134217728,
-                    "filename": "TEST_DIR/t.qcow2",
-                    "cluster-size": 65536,
-                    "format": "qcow2",
-                    "actual-size": SIZE,
-                    "format-specific": {
-                        "type": "qcow2",
-                        "data": {
-                            "compat": "1.1",
-                            "lazy-refcounts": false,
-                            "refcount-bits": 16,
-                            "corrupt": false
-                        }
-                    },
-                    "dirty-flag": false
+            "iops_rd": 0,
+            "detect_zeroes": "off",
+            "image": {
+                "virtual-size": 134217728,
+                "filename": "TEST_DIR/t.qcow2",
+                "cluster-size": 65536,
+                "format": "qcow2",
+                "actual-size": SIZE,
+                "format-specific": {
+                    "type": "qcow2",
+                    "data": {
+                        "compat": "1.1",
+                        "lazy-refcounts": false,
+                        "refcount-bits": 16,
+                        "corrupt": false
+                    }
                 },
-                "iops_wr": 0,
-                "ro": false,
-                "node-name": "NODE_NAME",
-                "backing_file_depth": 0,
-                "drv": "qcow2",
-                "iops": 0,
-                "bps_wr": 0,
-                "write_threshold": 0,
-                "encrypted": false,
-                "bps": 0,
-                "bps_rd": 0,
-                "cache": {
-                    "no-flush": false,
-                    "direct": false,
-                    "writeback": true
-                },
-                "file": "TEST_DIR/t.qcow2",
-                "encryption_key_missing": false
+                "dirty-flag": false
             },
-            "type": "unknown"
+            "iops_wr": 0,
+            "ro": false,
+            "node-name": "disk",
+            "backing_file_depth": 0,
+            "drv": "qcow2",
+            "iops": 0,
+            "bps_wr": 0,
+            "write_threshold": 0,
+            "encrypted": false,
+            "bps": 0,
+            "bps_rd": 0,
+            "cache": {
+                "no-flush": false,
+                "direct": false,
+                "writeback": true
+            },
+            "file": "TEST_DIR/t.qcow2",
+            "encryption_key_missing": false
+        },
+        {
+            "iops_rd": 0,
+            "detect_zeroes": "off",
+            "image": {
+                "virtual-size": 197120,
+                "filename": "TEST_DIR/t.qcow2",
+                "format": "file",
+                "actual-size": SIZE,
+                "dirty-flag": false
+            },
+            "iops_wr": 0,
+            "ro": false,
+            "node-name": "NODE_NAME",
+            "backing_file_depth": 0,
+            "drv": "file",
+            "iops": 0,
+            "bps_wr": 0,
+            "write_threshold": 0,
+            "encrypted": false,
+            "bps": 0,
+            "bps_rd": 0,
+            "cache": {
+                "no-flush": false,
+                "direct": false,
+                "writeback": true
+            },
+            "file": "TEST_DIR/t.qcow2",
+            "encryption_key_missing": false
         }
     ]
 }
diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index bdfd91fef1..48b495513f 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -107,25 +107,21 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "node-name": "drive0",
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+        "node-name": "drive0",
+        "driver": "file",
+        "filename": "$TEST_IMG"
     }
 }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "driver": "$IMGFMT",
-            "id": "drive0-debug",
-            "file": {
-                "driver": "blkdebug",
-                "image": "drive0",
-                "inject-error": [{
-                    "event": "l2_load"
-                }]
-            }
+        "driver": "$IMGFMT",
+        "node-name": "drive0-debug",
+        "file": {
+            "driver": "blkdebug",
+            "image": "drive0",
+            "inject-error": [{
+                "event": "l2_load"
+            }]
         }
     }
 }
@@ -145,26 +141,22 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "node-name": "drive0",
-            "driver": "$IMGFMT",
-            "file": {
-                "driver": "file",
-                "filename": "$TEST_IMG"
-            }
+        "node-name": "drive0",
+        "driver": "$IMGFMT",
+        "file": {
+            "driver": "file",
+            "filename": "$TEST_IMG"
         }
     }
 }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "driver": "blkverify",
-            "id": "drive0-verify",
-            "test": "drive0",
-            "raw": {
-                "driver": "file",
-                "filename": "$TEST_IMG.base"
-            }
+        "driver": "blkverify",
+        "node-name": "drive0-verify",
+        "test": "drive0",
+        "raw": {
+            "driver": "file",
+            "filename": "$TEST_IMG.base"
         }
     }
 }
@@ -184,27 +176,23 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "node-name": "drive0",
-            "driver": "file",
-            "filename": "$TEST_IMG.base"
-        }
+        "node-name": "drive0",
+        "driver": "file",
+        "filename": "$TEST_IMG.base"
     }
 }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "driver": "blkverify",
-            "id": "drive0-verify",
-            "test": {
-                "driver": "$IMGFMT",
-                "file": {
-                    "driver": "file",
-                    "filename": "$TEST_IMG"
-                }
-            },
-            "raw": "drive0"
-        }
+        "driver": "blkverify",
+        "node-name": "drive0-verify",
+        "test": {
+            "driver": "$IMGFMT",
+            "file": {
+                "driver": "file",
+                "filename": "$TEST_IMG"
+            }
+        },
+        "raw": "drive0"
     }
 }
 { "execute": "human-monitor-command",
@@ -223,30 +211,26 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "node-name": "drive0",
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+        "node-name": "drive0",
+        "driver": "file",
+        "filename": "$TEST_IMG"
     }
 }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "driver": "$IMGFMT",
-            "id": "drive0-debug",
-            "file": {
-                "driver": "blkdebug",
-                "image": "drive0",
-                "inject-error": [{
-                    "event": "read_aio",
-                    "state": 42
-                }],
-                "set-state": [{
-                    "event": "write_aio",
-                    "new_state": 42
-                }]
-            }
+        "driver": "$IMGFMT",
+        "node-name": "drive0-debug",
+        "file": {
+            "driver": "blkdebug",
+            "image": "drive0",
+            "inject-error": [{
+                "event": "read_aio",
+                "state": 42
+            }],
+            "set-state": [{
+                "event": "write_aio",
+                "new_state": 42
+            }]
         }
     }
 }
diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out
index 8c6851e792..8ff423f56b 100644
--- a/tests/qemu-iotests/071.out
+++ b/tests/qemu-iotests/071.out
@@ -86,5 +86,7 @@ read failed: Input/output error
 {"return": ""}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
+QEMU_PROG: Failed to flush the L2 table cache: Input/output error
+QEMU_PROG: Failed to flush the refcount block cache: Input/output error
 
 *** done
diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
index d89fabcdbd..da3fb0984b 100755
--- a/tests/qemu-iotests/081
+++ b/tests/qemu-iotests/081
@@ -105,40 +105,36 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "node-name": "drive2",
-            "driver": "$IMGFMT",
-            "file": {
-                "driver": "file",
-                "filename": "$TEST_DIR/2.raw"
-            }
+        "node-name": "drive2",
+        "driver": "$IMGFMT",
+        "file": {
+            "driver": "file",
+            "filename": "$TEST_DIR/2.raw"
         }
     }
 }
 { "execute": "blockdev-add",
     "arguments": {
-        "options": {
-            "driver": "quorum",
-            "id": "drive0-quorum",
-            "vote-threshold": 2,
-            "children": [
-                {
-                    "driver": "$IMGFMT",
-                    "file": {
-                        "driver": "file",
-                        "filename": "$TEST_DIR/1.raw"
-                    }
-                },
-                "drive2",
-                {
-                    "driver": "$IMGFMT",
-                    "file": {
-                        "driver": "file",
-                        "filename": "$TEST_DIR/3.raw"
-                    }
+        "driver": "quorum",
+        "node-name": "drive0-quorum",
+        "vote-threshold": 2,
+        "children": [
+            {
+                "driver": "$IMGFMT",
+                "file": {
+                    "driver": "file",
+                    "filename": "$TEST_DIR/1.raw"
                 }
-            ]
-        }
+            },
+            "drive2",
+            {
+                "driver": "$IMGFMT",
+                "file": {
+                    "driver": "file",
+                    "filename": "$TEST_DIR/3.raw"
+                }
+            }
+        ]
     }
 }
 { "execute": "human-monitor-command",
diff --git a/tests/qemu-iotests/085 b/tests/qemu-iotests/085
index aa77eca77d..c53e97f067 100755
--- a/tests/qemu-iotests/085
+++ b/tests/qemu-iotests/085
@@ -100,11 +100,10 @@ function add_snapshot_image()
     _make_test_img -b "${base_image}" "$size"
     mv "${TEST_IMG}" "${snapshot_file}"
     cmd="{ 'execute': 'blockdev-add', 'arguments':
-           { 'options':
-             { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params}
-               'file':
-               { 'driver': 'file', 'filename': '${snapshot_file}',
-                 'node-name': 'file_${1}' } } } }"
+           { 'driver': 'qcow2', 'node-name': 'snap_${1}', ${extra_params}
+             'file':
+             { 'driver': 'file', 'filename': '${snapshot_file}',
+               'node-name': 'file_${1}' } } }"
     _send_qemu_cmd $h "${cmd}" "return"
 }
 
diff --git a/tests/qemu-iotests/085.out b/tests/qemu-iotests/085.out
index 01c78d6894..08e4bb7218 100644
--- a/tests/qemu-iotests/085.out
+++ b/tests/qemu-iotests/085.out
@@ -68,9 +68,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/
 
 === Invalid command - snapshot node used as active layer ===
 
-{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio0"}}
-{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio0"}}
-{"error": {"class": "GenericError", "desc": "The snapshot is already in use by virtio1"}}
+{"error": {"class": "GenericError", "desc": "The snapshot is already in use"}}
+{"error": {"class": "GenericError", "desc": "The snapshot is already in use"}}
+{"error": {"class": "GenericError", "desc": "The snapshot is already in use"}}
 
 === Invalid command - snapshot node used as backing hd ===
 
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index e7bca37efc..9de57ddf6d 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -61,12 +61,10 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
@@ -77,81 +75,25 @@ echo
 echo === Duplicate ID ===
 echo
 
-run_qemu <<EOF
+run_qemu -drive driver=$IMGFMT,id=disk,node-name=test-node,file="$TEST_IMG" <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "node-name": "test-node",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
-      }
-    }
-  }
-{ "execute": "blockdev-add",
-  "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
-      }
-    }
-  }
-{ "execute": "blockdev-add",
-  "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "test-node",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "disk",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk2",
-        "node-name": "disk",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
-      }
-    }
-  }
-{ "execute": "blockdev-add",
-  "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk2",
-        "node-name": "test-node",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
-      }
-    }
-  }
-{ "execute": "blockdev-add",
-  "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk3",
-        "node-name": "disk3",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "test-node",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
@@ -166,14 +108,12 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "aio": "native",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "disk",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG",
+          "aio": "native"
       }
     }
   }
@@ -189,13 +129,11 @@ run_qemu -S <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "disk",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
@@ -206,13 +144,11 @@ run_qemu <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "driver": "$IMGFMT",
-        "id": "disk",
-        "file": {
-            "driver": "file",
-            "filename": "$TEST_IMG"
-        }
+      "driver": "$IMGFMT",
+      "node-name": "disk",
+      "file": {
+          "driver": "file",
+          "filename": "$TEST_IMG"
       }
     }
   }
@@ -228,9 +164,7 @@ run_qemu -S <<EOF
 { "execute": "qmp_capabilities" }
 { "execute": "blockdev-add",
   "arguments": {
-      "options": {
-        "id": "disk"
-      }
+      "node-name": "disk"
     }
   }
 { "execute": "quit" }
diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out
index a95c4b0be8..dc6baf9366 100644
--- a/tests/qemu-iotests/087.out
+++ b/tests/qemu-iotests/087.out
@@ -6,22 +6,18 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 Testing:
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "'id' and/or 'node-name' need to be specified for the root node"}}
+{"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
 
 === Duplicate ID ===
 
-Testing:
+Testing: -drive driver=IMGFMT,id=disk,node-name=test-node,file=TEST_DIR/t.IMGFMT
 QMP_VERSION
 {"return": {}}
-{"return": {}}
-{"error": {"class": "GenericError", "desc": "Device with id 'disk' already exists"}}
-{"error": {"class": "GenericError", "desc": "Device name 'test-node' conflicts with an existing node name"}}
 {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}}
 {"error": {"class": "GenericError", "desc": "Duplicate node name"}}
-{"error": {"class": "GenericError", "desc": "Device name 'disk3' conflicts with an existing node name"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
@@ -31,7 +27,7 @@ QMP_VERSION
 Testing:
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "aio=native requires cache.direct=true"}}
+{"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
@@ -60,7 +56,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on
 Testing: -S
 QMP_VERSION
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter type for 'driver', expected: string"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN"}
 
diff --git a/tests/qemu-iotests/093 b/tests/qemu-iotests/093
index ffcb271b36..2ed393a548 100755
--- a/tests/qemu-iotests/093
+++ b/tests/qemu-iotests/093
@@ -53,7 +53,7 @@ class ThrottleTestCase(iotests.QMPTestCase):
             result = self.vm.qmp("block_set_io_throttle", conv_keys=False, **params)
             self.assert_qmp(result, 'return', {})
 
-    def do_test_throttle(self, ndrives, seconds, params):
+    def do_test_throttle(self, ndrives, seconds, params, first_drive = 0):
         def check_limit(limit, num):
             # IO throttling algorithm is discrete, allow 10% error so the test
             # is more robust
@@ -85,12 +85,14 @@ class ThrottleTestCase(iotests.QMPTestCase):
         # Send I/O requests to all drives
         for i in range(rd_nr):
             for drive in range(0, ndrives):
-                self.vm.hmp_qemu_io("drive%d" % drive, "aio_read %d %d" %
+                idx = first_drive + drive
+                self.vm.hmp_qemu_io("drive%d" % idx, "aio_read %d %d" %
                                     (i * rq_size, rq_size))
 
         for i in range(wr_nr):
             for drive in range(0, ndrives):
-                self.vm.hmp_qemu_io("drive%d" % drive, "aio_write %d %d" %
+                idx = first_drive + drive
+                self.vm.hmp_qemu_io("drive%d" % idx, "aio_write %d %d" %
                                     (i * rq_size, rq_size))
 
         # We'll store the I/O stats for each drive in these arrays
@@ -105,15 +107,17 @@ class ThrottleTestCase(iotests.QMPTestCase):
 
         # Read the stats before advancing the clock
         for i in range(0, ndrives):
+            idx = first_drive + i
             start_rd_bytes[i], start_rd_iops[i], start_wr_bytes[i], \
-                start_wr_iops[i] = self.blockstats('drive%d' % i)
+                start_wr_iops[i] = self.blockstats('drive%d' % idx)
 
         self.vm.qtest("clock_step %d" % ns)
 
         # Read the stats after advancing the clock
         for i in range(0, ndrives):
+            idx = first_drive + i
             end_rd_bytes[i], end_rd_iops[i], end_wr_bytes[i], \
-                end_wr_iops[i] = self.blockstats('drive%d' % i)
+                end_wr_iops[i] = self.blockstats('drive%d' % idx)
 
         # Check that the I/O is within the limits and evenly distributed
         for i in range(0, ndrives):
@@ -129,6 +133,7 @@ class ThrottleTestCase(iotests.QMPTestCase):
             self.assertTrue(check_limit(params['iops_rd'], rd_iops))
             self.assertTrue(check_limit(params['iops_wr'], wr_iops))
 
+    # Connect N drives to a VM and test I/O in all of them
     def test_all(self):
         params = {"bps": 4096,
                   "bps_rd": 4096,
@@ -146,6 +151,24 @@ class ThrottleTestCase(iotests.QMPTestCase):
                 self.configure_throttle(ndrives, limits)
                 self.do_test_throttle(ndrives, 5, limits)
 
+    # Connect N drives to a VM and test I/O in just one of them a time
+    def test_one(self):
+        params = {"bps": 4096,
+                  "bps_rd": 4096,
+                  "bps_wr": 4096,
+                  "iops": 10,
+                  "iops_rd": 10,
+                  "iops_wr": 10,
+                 }
+        # Repeat the test for each one of the drives
+        for drive in range(0, self.max_drives):
+            # Pick each out of all possible params and test
+            for tk in params:
+                limits = dict([(k, 0) for k in params])
+                limits[tk] = params[tk] * self.max_drives
+                self.configure_throttle(self.max_drives, limits)
+                self.do_test_throttle(1, 5, limits, drive)
+
     def test_burst(self):
         params = {"bps": 4096,
                   "bps_rd": 4096,
diff --git a/tests/qemu-iotests/093.out b/tests/qemu-iotests/093.out
index 914e3737bd..2f7d3902f2 100644
--- a/tests/qemu-iotests/093.out
+++ b/tests/qemu-iotests/093.out
@@ -1,5 +1,5 @@
-.....
+.......
 ----------------------------------------------------------------------
-Ran 5 tests
+Ran 7 tests
 
 OK
diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097
index 01d8dd0331..4c33e8038a 100755
--- a/tests/qemu-iotests/097
+++ b/tests/qemu-iotests/097
@@ -46,7 +46,7 @@ _supported_proto file
 _supported_os Linux
 
 
-# Four passes:
+# Four main passes:
 #  0: Two-layer backing chain, commit to upper backing file (implicitly)
 #     (in this case, the top image will be emptied)
 #  1: Two-layer backing chain, commit to upper backing file (explicitly)
@@ -56,22 +56,30 @@ _supported_os Linux
 #  3: Two-layer backing chain, commit to lower backing file
 #     (in this case, the top image will implicitly stay unchanged)
 #
+# Each pass is run twice, since qcow2 has different code paths for cleaning
+# an image depending on whether it has a snapshot.
+#
 # 020 already tests committing, so this only tests whether image chains are
 # working properly and that all images above the base are emptied; therefore,
-# no complicated patterns are necessary
+# no complicated patterns are necessary.  Check near the 2G mark, as qcow2
+# has been buggy at that boundary in the past.
 for i in 0 1 2 3; do
+for j in 0 1; do
 
 echo
-echo "=== Test pass $i ==="
+echo "=== Test pass $i.$j ==="
 echo
 
-TEST_IMG="$TEST_IMG.base" _make_test_img 64M
-TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 64M
-_make_test_img -b "$TEST_IMG.itmd" 64M
+TEST_IMG="$TEST_IMG.base" _make_test_img 2100M
+TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 2100M
+_make_test_img -b "$TEST_IMG.itmd" 2100M
+if [ $j -eq 0 ]; then
+    $QEMU_IMG snapshot -c snap "$TEST_IMG"
+fi
 
-$QEMU_IO -c 'write -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io
-$QEMU_IO -c 'write -P 2 64k 128k' "$TEST_IMG.itmd" | _filter_qemu_io
-$QEMU_IO -c 'write -P 3 128k 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c 'write -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c 'write -P 2 0x7ffe0000 128k' "$TEST_IMG.itmd" | _filter_qemu_io
+$QEMU_IO -c 'write -P 3 0x7fff0000 64k' "$TEST_IMG" | _filter_qemu_io
 
 if [ $i -lt 3 ]; then
     if [ $i == 0 ]; then
@@ -88,12 +96,12 @@ if [ $i -lt 3 ]; then
     fi
 
     # Bottom should be unchanged
-    $QEMU_IO -c 'read -P 1 0 192k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 192k' "$TEST_IMG.base" | _filter_qemu_io
 
     # Intermediate should contain changes from top
-    $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.itmd" | _filter_qemu_io
-    $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.itmd" | _filter_qemu_io
-    $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.itmd" | _filter_qemu_io
 
     # And in pass 0, the top image should be empty, whereas in both other passes
     # it should be unchanged (which is both checked by qemu-img map)
@@ -101,9 +109,9 @@ else
     $QEMU_IMG commit -b "$TEST_IMG.base" "$TEST_IMG"
 
     # Bottom should contain all changes
-    $QEMU_IO -c 'read -P 1 0 64k' "$TEST_IMG.base" | _filter_qemu_io
-    $QEMU_IO -c 'read -P 2 64k 64k' "$TEST_IMG.base" | _filter_qemu_io
-    $QEMU_IO -c 'read -P 3 128k 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 1 0x7ffd0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 2 0x7ffe0000 64k' "$TEST_IMG.base" | _filter_qemu_io
+    $QEMU_IO -c 'read -P 3 0x7fff0000 64k' "$TEST_IMG.base" | _filter_qemu_io
 
     # Both top and intermediate should be unchanged
 fi
@@ -113,6 +121,7 @@ $QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map
 $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
 
 done
+done
 
 
 # success, all done
diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out
index 48abd2e64e..8106cc9275 100644
--- a/tests/qemu-iotests/097.out
+++ b/tests/qemu-iotests/097.out
@@ -1,119 +1,236 @@
 QA output created by 097
 
-=== Test pass 0 ===
+=== Test pass 0.0 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 0
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 65536
+wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 131072
+wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
-read 196608/196608 bytes at offset 0
+read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 0
+read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 65536
+read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 131072
+read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
-0               0x30000         TEST_DIR/t.IMGFMT.base
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x20000         TEST_DIR/t.IMGFMT.itmd
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x20000         TEST_DIR/t.IMGFMT.itmd
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 
-=== Test pass 1 ===
+=== Test pass 0.1 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 0
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 65536
+wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 131072
+wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
-read 196608/196608 bytes at offset 0
+read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 0
+read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 65536
+read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 131072
+read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
-0               0x30000         TEST_DIR/t.IMGFMT.base
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x20000         TEST_DIR/t.IMGFMT.itmd
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x10000         TEST_DIR/t.IMGFMT.itmd
-0x20000         0x10000         TEST_DIR/t.IMGFMT
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 
-=== Test pass 2 ===
+=== Test pass 1.0 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 0
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 65536
+wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 131072
+wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
-read 196608/196608 bytes at offset 0
+read 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 0
+read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 65536
+read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 131072
+read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
-0               0x30000         TEST_DIR/t.IMGFMT.base
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x20000         TEST_DIR/t.IMGFMT.itmd
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x10000         TEST_DIR/t.IMGFMT.itmd
-0x20000         0x10000         TEST_DIR/t.IMGFMT
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
 
-=== Test pass 3 ===
+=== Test pass 1.1 ===
 
-Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
-Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.base
-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file=TEST_DIR/t.IMGFMT.itmd
-wrote 196608/196608 bytes at offset 0
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 131072/131072 bytes at offset 65536
+wrote 131072/131072 bytes at offset 2147352576
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 65536/65536 bytes at offset 131072
+wrote 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Image committed.
-read 65536/65536 bytes at offset 0
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+
+=== Test pass 2.0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+
+=== Test pass 2.1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+
+=== Test pass 3.0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 65536/65536 bytes at offset 2147287040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147352576
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
+Offset          Length          File
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
+
+=== Test pass 3.1 ===
+
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600
+Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd
+wrote 196608/196608 bytes at offset 2147287040
+192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 2147352576
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 2147418112
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Image committed.
+read 65536/65536 bytes at offset 2147287040
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 65536
+read 65536/65536 bytes at offset 2147352576
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-read 65536/65536 bytes at offset 131072
+read 65536/65536 bytes at offset 2147418112
 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 Offset          Length          File
-0               0x30000         TEST_DIR/t.IMGFMT.base
+0x7ffd0000      0x30000         TEST_DIR/t.IMGFMT.base
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x20000         TEST_DIR/t.IMGFMT.itmd
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x20000         TEST_DIR/t.IMGFMT.itmd
 Offset          Length          File
-0               0x10000         TEST_DIR/t.IMGFMT.base
-0x10000         0x10000         TEST_DIR/t.IMGFMT.itmd
-0x20000         0x10000         TEST_DIR/t.IMGFMT
+0x7ffd0000      0x10000         TEST_DIR/t.IMGFMT.base
+0x7ffe0000      0x10000         TEST_DIR/t.IMGFMT.itmd
+0x7fff0000      0x10000         TEST_DIR/t.IMGFMT
 *** done
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
index 280ed27aa9..927151a285 100755
--- a/tests/qemu-iotests/109
+++ b/tests/qemu-iotests/109
@@ -62,6 +62,9 @@ function run_qemu()
         "return"
 
     _send_qemu_cmd $QEMU_HANDLE '' "$qmp_event"
+    if test "$qmp_event" = BLOCK_JOB_ERROR; then
+        _send_qemu_cmd $QEMU_HANDLE '' "BLOCK_JOB_COMPLETED"
+    fi
     _send_qemu_cmd $QEMU_HANDLE '{"execute":"query-block-jobs"}' "return"
     _cleanup_qemu
 }
diff --git a/tests/qemu-iotests/117 b/tests/qemu-iotests/117
index 9385b3f8da..e955d52de3 100755
--- a/tests/qemu-iotests/117
+++ b/tests/qemu-iotests/117
@@ -52,16 +52,16 @@ _send_qemu_cmd $QEMU_HANDLE \
 
 _send_qemu_cmd $QEMU_HANDLE \
     "{ 'execute': 'blockdev-add',
-       'arguments': { 'options': { 'id': 'protocol',
-                                   'driver': 'file',
-                                   'filename': '$TEST_IMG' } } }" \
+       'arguments': { 'node-name': 'protocol',
+                      'driver': 'file',
+                      'filename': '$TEST_IMG' } }" \
     'return'
 
 _send_qemu_cmd $QEMU_HANDLE \
     "{ 'execute': 'blockdev-add',
-       'arguments': { 'options': { 'id': 'format',
-                                   'driver': '$IMGFMT',
-                                   'file': 'protocol' } } }" \
+       'arguments': { 'node-name': 'format',
+                      'driver': '$IMGFMT',
+                      'file': 'protocol' } }" \
     'return'
 
 _send_qemu_cmd $QEMU_HANDLE \
diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118
index 9e5951f645..8a9e838c90 100755
--- a/tests/qemu-iotests/118
+++ b/tests/qemu-iotests/118
@@ -62,6 +62,9 @@ class ChangeBaseClass(iotests.QMPTestCase):
             self.fail('Timeout while waiting for the tray to close')
 
 class GeneralChangeTestsBaseClass(ChangeBaseClass):
+
+    device_name = None
+
     def test_change(self):
         result = self.vm.qmp('change', device='drive0', target=new_img,
                                        arg=iotests.imgfmt)
@@ -76,9 +79,15 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img)
 
     def test_blockdev_change_medium(self):
-        result = self.vm.qmp('blockdev-change-medium', device='drive0',
-                                                       filename=new_img,
-                                                       format=iotests.imgfmt)
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-change-medium',
+                                 id=self.device_name, filename=new_img,
+                                 format=iotests.imgfmt)
+        else:
+            result = self.vm.qmp('blockdev-change-medium',
+                                 device='drive0', filename=new_img,
+                                 format=iotests.imgfmt)
+
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_open()
@@ -90,7 +99,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img)
 
     def test_eject(self):
-        result = self.vm.qmp('eject', device='drive0', force=True)
+        if self.device_name is not None:
+            result = self.vm.qmp('eject', id=self.device_name, force=True)
+        else:
+            result = self.vm.qmp('eject', device='drive0', force=True)
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_open()
@@ -101,7 +113,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         self.assert_qmp_absent(result, 'return[0]/inserted')
 
     def test_tray_eject_change(self):
-        result = self.vm.qmp('eject', device='drive0', force=True)
+        if self.device_name is not None:
+            result = self.vm.qmp('eject', id=self.device_name, force=True)
+        else:
+            result = self.vm.qmp('eject', device='drive0', force=True)
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_open()
@@ -111,9 +126,12 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
             self.assert_qmp(result, 'return[0]/tray_open', True)
         self.assert_qmp_absent(result, 'return[0]/inserted')
 
-        result = self.vm.qmp('blockdev-change-medium', device='drive0',
-                                                       filename=new_img,
-                                                       format=iotests.imgfmt)
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-change-medium', id=self.device_name,
+                                 filename=new_img, format=iotests.imgfmt)
+        else:
+            result = self.vm.qmp('blockdev-change-medium', device='drive0',
+                                 filename=new_img, format=iotests.imgfmt)
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_close()
@@ -124,7 +142,12 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img)
 
     def test_tray_open_close(self):
-        result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True)
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-open-tray',
+                                 id=self.device_name, force=True)
+        else:
+            result = self.vm.qmp('blockdev-open-tray',
+                                 device='drive0', force=True)
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_open()
@@ -137,7 +160,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         else:
             self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img)
 
-        result = self.vm.qmp('blockdev-close-tray', device='drive0')
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-close-tray', id=self.device_name)
+        else:
+            result = self.vm.qmp('blockdev-close-tray', device='drive0')
         self.assert_qmp(result, 'return', {})
 
         if self.has_real_tray or not self.was_empty:
@@ -162,7 +188,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
             self.assert_qmp(result, 'return[0]/tray_open', True)
         self.assert_qmp_absent(result, 'return[0]/inserted')
 
-        result = self.vm.qmp('blockdev-close-tray', device='drive0')
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-close-tray', id=self.device_name)
+        else:
+            result = self.vm.qmp('blockdev-close-tray', device='drive0')
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_close()
@@ -200,13 +229,18 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
 
     def test_cycle(self):
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'new',
-                                      'driver': iotests.imgfmt,
-                                      'file': {'filename': new_img,
-                                               'driver': 'file'}})
+                             node_name='new',
+                             driver=iotests.imgfmt,
+                             file={'filename': new_img,
+                                    'driver': 'file'})
         self.assert_qmp(result, 'return', {})
 
-        result = self.vm.qmp('blockdev-open-tray', device='drive0', force=True)
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-open-tray',
+                                 id=self.device_name, force=True)
+        else:
+            result = self.vm.qmp('blockdev-open-tray',
+                                 device='drive0', force=True)
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_open()
@@ -219,7 +253,11 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
         else:
             self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img)
 
-        result = self.vm.qmp('x-blockdev-remove-medium', device='drive0')
+        if self.device_name is not None:
+            result = self.vm.qmp('x-blockdev-remove-medium',
+                                 id=self.device_name)
+        else:
+            result = self.vm.qmp('x-blockdev-remove-medium', device='drive0')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block')
@@ -227,8 +265,12 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
             self.assert_qmp(result, 'return[0]/tray_open', True)
         self.assert_qmp_absent(result, 'return[0]/inserted')
 
-        result = self.vm.qmp('x-blockdev-insert-medium', device='drive0',
-                                                       node_name='new')
+        if self.device_name is not None:
+            result = self.vm.qmp('x-blockdev-insert-medium',
+                                 id=self.device_name, node_name='new')
+        else:
+            result = self.vm.qmp('x-blockdev-insert-medium',
+                                 device='drive0', node_name='new')
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block')
@@ -236,7 +278,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
             self.assert_qmp(result, 'return[0]/tray_open', True)
         self.assert_qmp(result, 'return[0]/inserted/image/filename', new_img)
 
-        result = self.vm.qmp('blockdev-close-tray', device='drive0')
+        if self.device_name is not None:
+            result = self.vm.qmp('blockdev-close-tray', id=self.device_name)
+        else:
+            result = self.vm.qmp('blockdev-close-tray', device='drive0')
         self.assert_qmp(result, 'return', {})
 
         self.wait_for_close()
@@ -264,10 +309,10 @@ class GeneralChangeTestsBaseClass(ChangeBaseClass):
             return
 
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'new',
-                                      'driver': iotests.imgfmt,
-                                      'file': {'filename': new_img,
-                                               'driver': 'file'}})
+                             node_name='new',
+                             driver=iotests.imgfmt,
+                             file={'filename': new_img,
+                                   'driver': 'file'})
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('x-blockdev-insert-medium', device='drive0',
@@ -280,7 +325,13 @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
     def setUp(self, media, interface):
         qemu_img('create', '-f', iotests.imgfmt, old_img, '1440k')
         qemu_img('create', '-f', iotests.imgfmt, new_img, '1440k')
-        self.vm = iotests.VM().add_drive(old_img, 'media=%s' % media, interface)
+        self.vm = iotests.VM()
+        if interface == 'ide':
+            self.device_name = 'qdev0'
+            self.vm.add_drive(old_img, 'media=%s' % media, 'none')
+            self.vm.add_device('ide-cd,drive=drive0,id=%s' % self.device_name)
+        else:
+            self.vm.add_drive(old_img, 'media=%s' % media, interface)
         self.vm.launch()
 
     def tearDown(self):
@@ -290,10 +341,10 @@ class TestInitiallyFilled(GeneralChangeTestsBaseClass):
 
     def test_insert_on_filled(self):
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'new',
-                                      'driver': iotests.imgfmt,
-                                      'file': {'filename': new_img,
-                                               'driver': 'file'}})
+                             node_name='new',
+                             driver=iotests.imgfmt,
+                             file={'filename': new_img,
+                                   'driver': 'file'})
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('blockdev-open-tray', device='drive0')
@@ -558,11 +609,11 @@ class TestChangeReadOnly(ChangeBaseClass):
         self.assert_qmp(result, 'return[0]/inserted/image/filename', old_img)
 
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'new',
-                                      'driver': iotests.imgfmt,
-                                      'read-only': True,
-                                      'file': {'filename': new_img,
-                                               'driver': 'file'}})
+                             node_name='new',
+                             driver=iotests.imgfmt,
+                             read_only=True,
+                             file={'filename': new_img,
+                                    'driver': 'file'})
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('query-block')
@@ -597,13 +648,9 @@ class TestBlockJobsAfterCycle(ChangeBaseClass):
         qemu_img('create', '-f', iotests.imgfmt, old_img, '1M')
 
         self.vm = iotests.VM()
+        self.vm.add_drive_raw("id=drive0,driver=null-co,if=none")
         self.vm.launch()
 
-        result = self.vm.qmp('blockdev-add',
-                             options={'id': 'drive0',
-                                      'driver': 'null-co'})
-        self.assert_qmp(result, 'return', {})
-
         result = self.vm.qmp('query-block')
         self.assert_qmp(result, 'return[0]/inserted/image/format', 'null-co')
 
@@ -616,10 +663,10 @@ class TestBlockJobsAfterCycle(ChangeBaseClass):
         self.assert_qmp_absent(result, 'return[0]/inserted')
 
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'node0',
-                                      'driver': iotests.imgfmt,
-                                      'file': {'filename': old_img,
-                                               'driver': 'file'}})
+                             node_name='node0',
+                             driver=iotests.imgfmt,
+                             file={'filename': old_img,
+                                   'driver': 'file'})
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('x-blockdev-insert-medium', device='drive0',
diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index de7cdbe00e..d0d2c2bfb0 100644
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -49,8 +49,8 @@ def transaction_bitmap_clear(node, name, **kwargs):
 
 
 def transaction_drive_backup(device, target, **kwargs):
-    return transaction_action('drive-backup', device=device, target=target,
-                              **kwargs)
+    return transaction_action('drive-backup', job_id=device, device=device,
+                              target=target, **kwargs)
 
 
 class Bitmap:
@@ -177,7 +177,8 @@ class TestIncrementalBackupBase(iotests.QMPTestCase):
     def create_anchor_backup(self, drive=None):
         if drive is None:
             drive = self.drives[-1]
-        res = self.do_qmp_backup(device=drive['id'], sync='full',
+        res = self.do_qmp_backup(job_id=drive['id'],
+                                 device=drive['id'], sync='full',
                                  format=drive['fmt'], target=drive['backup'])
         self.assertTrue(res)
         self.files.append(drive['backup'])
@@ -188,7 +189,8 @@ class TestIncrementalBackupBase(iotests.QMPTestCase):
         if bitmap is None:
             bitmap = self.bitmaps[-1]
         _, reference = bitmap.last_target()
-        res = self.do_qmp_backup(device=bitmap.drive['id'], sync='full',
+        res = self.do_qmp_backup(job_id=bitmap.drive['id'],
+                                 device=bitmap.drive['id'], sync='full',
                                  format=bitmap.drive['fmt'], target=reference)
         self.assertTrue(res)
 
@@ -221,7 +223,8 @@ class TestIncrementalBackupBase(iotests.QMPTestCase):
             parent, _ = bitmap.last_target()
 
         target = self.prepare_backup(bitmap, parent)
-        res = self.do_qmp_backup(device=bitmap.drive['id'],
+        res = self.do_qmp_backup(job_id=bitmap.drive['id'],
+                                 device=bitmap.drive['id'],
                                  sync='incremental', bitmap=bitmap.name,
                                  format=bitmap.drive['fmt'], target=target,
                                  mode='existing')
@@ -392,19 +395,7 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         self.check_backups()
 
 
-    def test_transaction_failure(self):
-        '''Test: Verify backups made from a transaction that partially fails.
-
-        Add a second drive with its own unique pattern, and add a bitmap to each
-        drive. Use blkdebug to interfere with the backup on just one drive and
-        attempt to create a coherent incremental backup across both drives.
-
-        verify a failure in one but not both, then delete the failed stubs and
-        re-run the same transaction.
-
-        verify that both incrementals are created successfully.
-        '''
-
+    def do_transaction_failure_test(self, race=False):
         # Create a second drive, with pattern:
         drive1 = self.add_node('drive1')
         self.img_create(drive1['file'], drive1['fmt'])
@@ -413,10 +404,10 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
                                            ('0xcd', '32M', '124k')))
 
         # Create a blkdebug interface to this img as 'drive1'
-        result = self.vm.qmp('blockdev-add', options={
-            'id': drive1['id'],
-            'driver': drive1['fmt'],
-            'file': {
+        result = self.vm.qmp('blockdev-add',
+            node_name=drive1['id'],
+            driver=drive1['fmt'],
+            file={
                 'driver': 'blkdebug',
                 'image': {
                     'driver': 'file',
@@ -435,7 +426,7 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
                     'once': True
                 }],
             }
-        })
+        )
         self.assert_qmp(result, 'return', {})
 
         # Create bitmaps and full backups for both drives
@@ -448,9 +439,10 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         self.assertFalse(self.vm.get_qmp_events(wait=False))
 
         # Emulate some writes
-        self.hmp_io_writes(drive0['id'], (('0xab', 0, 512),
-                                          ('0xfe', '16M', '256k'),
-                                          ('0x64', '32736k', '64k')))
+        if not race:
+            self.hmp_io_writes(drive0['id'], (('0xab', 0, 512),
+                                              ('0xfe', '16M', '256k'),
+                                              ('0x64', '32736k', '64k')))
         self.hmp_io_writes(drive1['id'], (('0xba', 0, 512),
                                           ('0xef', '16M', '256k'),
                                           ('0x46', '32736k', '64k')))
@@ -460,7 +452,8 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         target1 = self.prepare_backup(dr1bm0)
 
         # Ask for a new incremental backup per-each drive,
-        # expecting drive1's backup to fail:
+        # expecting drive1's backup to fail. In the 'race' test,
+        # we expect drive1 to attempt to cancel the empty drive0 job.
         transaction = [
             transaction_drive_backup(drive0['id'], target0, sync='incremental',
                                      format=drive0['fmt'], mode='existing',
@@ -485,9 +478,15 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         self.assert_no_active_block_jobs()
 
         # Delete drive0's successful target and eliminate our record of the
-        # unsuccessful drive1 target. Then re-run the same transaction.
+        # unsuccessful drive1 target.
         dr0bm0.del_target()
         dr1bm0.del_target()
+        if race:
+            # Don't re-run the transaction, we only wanted to test the race.
+            self.vm.shutdown()
+            return
+
+        # Re-run the same transaction:
         target0 = self.prepare_backup(dr0bm0)
         target1 = self.prepare_backup(dr1bm0)
 
@@ -508,6 +507,27 @@ class TestIncrementalBackup(TestIncrementalBackupBase):
         self.vm.shutdown()
         self.check_backups()
 
+    def test_transaction_failure(self):
+        '''Test: Verify backups made from a transaction that partially fails.
+
+        Add a second drive with its own unique pattern, and add a bitmap to each
+        drive. Use blkdebug to interfere with the backup on just one drive and
+        attempt to create a coherent incremental backup across both drives.
+
+        verify a failure in one but not both, then delete the failed stubs and
+        re-run the same transaction.
+
+        verify that both incrementals are created successfully.
+        '''
+        self.do_transaction_failure_test()
+
+    def test_transaction_failure_race(self):
+        '''Test: Verify that transactions with jobs that have no data to
+        transfer do not cause race conditions in the cancellation of the entire
+        transaction job group.
+        '''
+        self.do_transaction_failure_test(race=True)
+
 
     def test_sync_dirty_bitmap_missing(self):
         self.assert_no_active_block_jobs()
@@ -557,10 +577,10 @@ class TestIncrementalBackupBlkdebug(TestIncrementalBackupBase):
         '''
 
         drive0 = self.drives[0]
-        result = self.vm.qmp('blockdev-add', options={
-            'id': drive0['id'],
-            'driver': drive0['fmt'],
-            'file': {
+        result = self.vm.qmp('blockdev-add',
+            node_name=drive0['id'],
+            driver=drive0['fmt'],
+            file={
                 'driver': 'blkdebug',
                 'image': {
                     'driver': 'file',
@@ -579,7 +599,7 @@ class TestIncrementalBackupBlkdebug(TestIncrementalBackupBase):
                     'once': True
                 }],
             }
-        })
+        )
         self.assert_qmp(result, 'return', {})
 
         self.create_anchor_backup(drive0)
diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
index 36376bed87..e56cae021b 100644
--- a/tests/qemu-iotests/124.out
+++ b/tests/qemu-iotests/124.out
@@ -1,5 +1,5 @@
-..........
+...........
 ----------------------------------------------------------------------
-Ran 10 tests
+Ran 11 tests
 
 OK
diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139
index a4b969499c..6d98e4f879 100644
--- a/tests/qemu-iotests/139
+++ b/tests/qemu-iotests/139
@@ -31,6 +31,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
     def setUp(self):
         iotests.qemu_img('create', '-f', iotests.imgfmt, base_img, '1M')
         self.vm = iotests.VM()
+        self.vm.add_device("virtio-scsi-pci,id=virtio-scsi")
         self.vm.launch()
 
     def tearDown(self):
@@ -39,18 +40,6 @@ class TestBlockdevDel(iotests.QMPTestCase):
         if os.path.isfile(new_img):
             os.remove(new_img)
 
-    # Check whether a BlockBackend exists
-    def checkBlockBackend(self, backend, node, must_exist = True):
-        result = self.vm.qmp('query-block')
-        backends = filter(lambda x: x['device'] == backend, result['return'])
-        self.assertLessEqual(len(backends), 1)
-        self.assertEqual(must_exist, len(backends) == 1)
-        if must_exist:
-            if node:
-                self.assertEqual(backends[0]['inserted']['node-name'], node)
-            else:
-                self.assertFalse(backends[0].has_key('inserted'))
-
     # Check whether a BlockDriverState exists
     def checkBlockDriverState(self, node, must_exist = True):
         result = self.vm.qmp('query-named-block-nodes')
@@ -58,24 +47,6 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.assertLessEqual(len(nodes), 1)
         self.assertEqual(must_exist, len(nodes) == 1)
 
-    # Add a new BlockBackend (with its attached BlockDriverState)
-    def addBlockBackend(self, backend, node):
-        file_node = '%s_file' % node
-        self.checkBlockBackend(backend, node, False)
-        self.checkBlockDriverState(node, False)
-        self.checkBlockDriverState(file_node, False)
-        opts = {'driver': iotests.imgfmt,
-                'id': backend,
-                'node-name': node,
-                'file': {'driver': 'file',
-                         'node-name': file_node,
-                         'filename': base_img}}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
-        self.assert_qmp(result, 'return', {})
-        self.checkBlockBackend(backend, node)
-        self.checkBlockDriverState(node)
-        self.checkBlockDriverState(file_node)
-
     # Add a BlockDriverState without a BlockBackend
     def addBlockDriverState(self, node):
         file_node = '%s_file' % node
@@ -86,7 +57,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
                 'file': {'driver': 'file',
                          'node-name': file_node,
                          'filename': base_img}}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
         self.checkBlockDriverState(node)
         self.checkBlockDriverState(file_node)
@@ -101,27 +72,10 @@ class TestBlockdevDel(iotests.QMPTestCase):
                 'backing': '',
                 'file': {'driver': 'file',
                          'filename': new_img}}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
         self.checkBlockDriverState(node)
 
-    # Delete a BlockBackend
-    def delBlockBackend(self, backend, node, expect_error = False,
-                        destroys_media = True):
-        self.checkBlockBackend(backend, node)
-        if node:
-            self.checkBlockDriverState(node)
-        result = self.vm.qmp('x-blockdev-del', id = backend)
-        if expect_error:
-            self.assert_qmp(result, 'error/class', 'GenericError')
-            if node:
-                self.checkBlockDriverState(node)
-        else:
-            self.assert_qmp(result, 'return', {})
-            if node:
-                self.checkBlockDriverState(node, not destroys_media)
-        self.checkBlockBackend(backend, node, must_exist = expect_error)
-
     # Delete a BlockDriverState
     def delBlockDriverState(self, node, expect_error = False):
         self.checkBlockDriverState(node)
@@ -133,51 +87,47 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.checkBlockDriverState(node, expect_error)
 
     # Add a device model
-    def addDeviceModel(self, device, backend):
+    def addDeviceModel(self, device, backend, driver = 'virtio-blk-pci'):
         result = self.vm.qmp('device_add', id = device,
-                             driver = 'virtio-blk-pci', drive = backend)
+                             driver = driver, drive = backend)
         self.assert_qmp(result, 'return', {})
 
     # Delete a device model
-    def delDeviceModel(self, device):
+    def delDeviceModel(self, device, is_virtio_blk = True):
         result = self.vm.qmp('device_del', id = device)
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('system_reset')
         self.assert_qmp(result, 'return', {})
 
-        device_path = '/machine/peripheral/%s/virtio-backend' % device
-        event = self.vm.event_wait(name="DEVICE_DELETED",
-                                   match={'data': {'path': device_path}})
-        self.assertNotEqual(event, None)
+        if is_virtio_blk:
+            device_path = '/machine/peripheral/%s/virtio-backend' % device
+            event = self.vm.event_wait(name="DEVICE_DELETED",
+                                       match={'data': {'path': device_path}})
+            self.assertNotEqual(event, None)
 
         event = self.vm.event_wait(name="DEVICE_DELETED",
                                    match={'data': {'device': device}})
         self.assertNotEqual(event, None)
 
     # Remove a BlockDriverState
-    def ejectDrive(self, backend, node, expect_error = False,
+    def ejectDrive(self, device, node, expect_error = False,
                    destroys_media = True):
-        self.checkBlockBackend(backend, node)
         self.checkBlockDriverState(node)
-        result = self.vm.qmp('eject', device = backend)
+        result = self.vm.qmp('eject', id = device)
         if expect_error:
             self.assert_qmp(result, 'error/class', 'GenericError')
             self.checkBlockDriverState(node)
-            self.checkBlockBackend(backend, node)
         else:
             self.assert_qmp(result, 'return', {})
             self.checkBlockDriverState(node, not destroys_media)
-            self.checkBlockBackend(backend, None)
 
     # Insert a BlockDriverState
-    def insertDrive(self, backend, node):
-        self.checkBlockBackend(backend, None)
+    def insertDrive(self, device, node):
         self.checkBlockDriverState(node)
         result = self.vm.qmp('x-blockdev-insert-medium',
-                             device = backend, node_name = node)
+                             id = device, node_name = node)
         self.assert_qmp(result, 'return', {})
-        self.checkBlockBackend(backend, node)
         self.checkBlockDriverState(node)
 
     # Create a snapshot using 'blockdev-snapshot-sync'
@@ -204,26 +154,23 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.checkBlockDriverState(overlay)
 
     # Create a mirror
-    def createMirror(self, backend, node, new_node):
-        self.checkBlockBackend(backend, node)
+    def createMirror(self, node, new_node):
         self.checkBlockDriverState(new_node, False)
-        opts = {'device': backend,
+        opts = {'device': node,
+                'job-id': node,
                 'target': new_img,
                 'node-name': new_node,
                 'sync': 'top',
                 'format': iotests.imgfmt}
         result = self.vm.qmp('drive-mirror', conv_keys=False, **opts)
         self.assert_qmp(result, 'return', {})
-        self.checkBlockBackend(backend, node)
         self.checkBlockDriverState(new_node)
 
     # Complete an existing block job
-    def completeBlockJob(self, backend, node_before, node_after):
-        self.checkBlockBackend(backend, node_before)
-        result = self.vm.qmp('block-job-complete', device=backend)
+    def completeBlockJob(self, id, node_before, node_after):
+        result = self.vm.qmp('block-job-complete', device=id)
         self.assert_qmp(result, 'return', {})
-        self.wait_until_completed(backend)
-        self.checkBlockBackend(backend, node_after)
+        self.wait_until_completed(id)
 
     # Add a BlkDebug node
     # Note that the purpose of this is to test the x-blockdev-del
@@ -238,7 +185,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
         opts = {'driver': 'blkdebug',
                 'node-name': debug,
                 'image': image}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
         self.checkBlockDriverState(node)
         self.checkBlockDriverState(debug)
@@ -263,7 +210,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
                 'node-name': blkverify,
                 'test': node_0,
                 'raw': node_1}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
         self.checkBlockDriverState(test)
         self.checkBlockDriverState(raw)
@@ -287,7 +234,7 @@ class TestBlockdevDel(iotests.QMPTestCase):
                 'node-name': quorum,
                 'vote-threshold': 1,
                 'children': [ child_0, child_1 ]}
-        result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+        result = self.vm.qmp('blockdev-add', conv_keys = False, **opts)
         self.assert_qmp(result, 'return', {})
         self.checkBlockDriverState(child0)
         self.checkBlockDriverState(child1)
@@ -297,89 +244,78 @@ class TestBlockdevDel(iotests.QMPTestCase):
     # The tests start here #
     ########################
 
-    def testWrongParameters(self):
-        self.addBlockBackend('drive0', 'node0')
-        result = self.vm.qmp('x-blockdev-del')
-        self.assert_qmp(result, 'error/class', 'GenericError')
-        result = self.vm.qmp('x-blockdev-del', id='drive0', node_name='node0')
-        self.assert_qmp(result, 'error/class', 'GenericError')
-        self.delBlockBackend('drive0', 'node0')
-
-    def testBlockBackend(self):
-        self.addBlockBackend('drive0', 'node0')
-        # You cannot delete a BDS that is attached to a backend
-        self.delBlockDriverState('node0', expect_error = True)
-        self.delBlockBackend('drive0', 'node0')
-
     def testBlockDriverState(self):
         self.addBlockDriverState('node0')
         # You cannot delete a file BDS directly
         self.delBlockDriverState('node0_file', expect_error = True)
         self.delBlockDriverState('node0')
 
-    def testEject(self):
-        self.addBlockBackend('drive0', 'node0')
-        self.ejectDrive('drive0', 'node0')
-        self.delBlockBackend('drive0', None)
-
     def testDeviceModel(self):
-        self.addBlockBackend('drive0', 'node0')
-        self.addDeviceModel('device0', 'drive0')
-        self.ejectDrive('drive0', 'node0', expect_error = True)
-        self.delBlockBackend('drive0', 'node0', expect_error = True)
+        self.addBlockDriverState('node0')
+        self.addDeviceModel('device0', 'node0')
+        self.ejectDrive('device0', 'node0', expect_error = True)
+        self.delBlockDriverState('node0', expect_error = True)
         self.delDeviceModel('device0')
-        self.delBlockBackend('drive0', 'node0')
+        self.delBlockDriverState('node0')
 
     def testAttachMedia(self):
         # This creates a BlockBackend and removes its media
-        self.addBlockBackend('drive0', 'node0')
-        self.ejectDrive('drive0', 'node0')
-        # This creates a new BlockDriverState and inserts it into the backend
+        self.addBlockDriverState('node0')
+        self.addDeviceModel('device0', 'node0', 'scsi-cd')
+        self.ejectDrive('device0', 'node0', destroys_media = False)
+        self.delBlockDriverState('node0')
+
+        # This creates a new BlockDriverState and inserts it into the device
         self.addBlockDriverState('node1')
-        self.insertDrive('drive0', 'node1')
-        # The backend can't be removed: the new BDS has an extra reference
-        self.delBlockBackend('drive0', 'node1', expect_error = True)
+        self.insertDrive('device0', 'node1')
+        # The node can't be removed: the new device has an extra reference
         self.delBlockDriverState('node1', expect_error = True)
         # The BDS still exists after being ejected, but now it can be removed
-        self.ejectDrive('drive0', 'node1', destroys_media = False)
+        self.ejectDrive('device0', 'node1', destroys_media = False)
         self.delBlockDriverState('node1')
-        self.delBlockBackend('drive0', None)
+        self.delDeviceModel('device0', False)
 
     def testSnapshotSync(self):
-        self.addBlockBackend('drive0', 'node0')
+        self.addBlockDriverState('node0')
+        self.addDeviceModel('device0', 'node0')
         self.createSnapshotSync('node0', 'overlay0')
         # This fails because node0 is now being used as a backing image
         self.delBlockDriverState('node0', expect_error = True)
-        # This succeeds because overlay0 only has the backend reference
-        self.delBlockBackend('drive0', 'overlay0')
-        self.checkBlockDriverState('node0', False)
+        self.delBlockDriverState('overlay0', expect_error = True)
+        # This succeeds because device0 only has the backend reference
+        self.delDeviceModel('device0')
+        # FIXME Would still be there if blockdev-snapshot-sync took a ref
+        self.checkBlockDriverState('overlay0', False)
+        self.delBlockDriverState('node0')
 
     def testSnapshot(self):
-        self.addBlockBackend('drive0', 'node0')
+        self.addBlockDriverState('node0')
+        self.addDeviceModel('device0', 'node0', 'scsi-cd')
         self.addBlockDriverStateOverlay('overlay0')
         self.createSnapshot('node0', 'overlay0')
-        self.delBlockBackend('drive0', 'overlay0', expect_error = True)
         self.delBlockDriverState('node0', expect_error = True)
         self.delBlockDriverState('overlay0', expect_error = True)
-        self.ejectDrive('drive0', 'overlay0', destroys_media = False)
-        self.delBlockBackend('drive0', None)
+        self.ejectDrive('device0', 'overlay0', destroys_media = False)
         self.delBlockDriverState('node0', expect_error = True)
         self.delBlockDriverState('overlay0')
-        self.checkBlockDriverState('node0', False)
+        self.delBlockDriverState('node0')
 
     def testMirror(self):
-        self.addBlockBackend('drive0', 'node0')
-        self.createMirror('drive0', 'node0', 'mirror0')
+        self.addBlockDriverState('node0')
+        self.addDeviceModel('device0', 'node0', 'scsi-cd')
+        self.createMirror('node0', 'mirror0')
         # The block job prevents removing the device
-        self.delBlockBackend('drive0', 'node0', expect_error = True)
         self.delBlockDriverState('node0', expect_error = True)
         self.delBlockDriverState('mirror0', expect_error = True)
-        self.wait_ready('drive0')
-        self.completeBlockJob('drive0', 'node0', 'mirror0')
+        self.wait_ready('node0')
+        self.completeBlockJob('node0', 'node0', 'mirror0')
         self.assert_no_active_block_jobs()
-        self.checkBlockDriverState('node0', False)
-        # This succeeds because the backend now points to mirror0
-        self.delBlockBackend('drive0', 'mirror0')
+        # This succeeds because the device now points to mirror0
+        self.delBlockDriverState('node0')
+        self.delBlockDriverState('mirror0', expect_error = True)
+        self.delDeviceModel('device0', False)
+        # FIXME mirror0 disappears, drive-mirror doesn't take a reference
+        #self.delBlockDriverState('mirror0')
 
     def testBlkDebug(self):
         self.addBlkDebug('debug0', 'node0')
@@ -400,8 +336,9 @@ class TestBlockdevDel(iotests.QMPTestCase):
         self.checkBlockDriverState('node1', False)
 
     def testQuorum(self):
-        if not 'quorum' in iotests.qemu_img_pipe('--help'):
+        if not iotests.supports_quorum():
             return
+
         self.addQuorum('quorum0', 'node0', 'node1')
         # We cannot remove the children of a Quorum device
         self.delBlockDriverState('node0', expect_error = True)
diff --git a/tests/qemu-iotests/139.out b/tests/qemu-iotests/139.out
index 281b69efea..dae404e278 100644
--- a/tests/qemu-iotests/139.out
+++ b/tests/qemu-iotests/139.out
@@ -1,5 +1,5 @@
-............
+.........
 ----------------------------------------------------------------------
-Ran 12 tests
+Ran 9 tests
 
 OK
diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141
index b2617e5e2b..3ba79f027a 100755
--- a/tests/qemu-iotests/141
+++ b/tests/qemu-iotests/141
@@ -50,13 +50,12 @@ test_blockjob()
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'blockdev-add',
           'arguments': {
-              'options': {
-                  'id': 'drv0',
-                  'driver': '$IMGFMT',
-                  'file': {
-                      'driver': 'file',
-                      'filename': '$TEST_IMG'
-                  }}}}" \
+              'node-name': 'drv0',
+              'driver': '$IMGFMT',
+              'file': {
+                  'driver': 'file',
+                  'filename': '$TEST_IMG'
+              }}}" \
         'return'
 
     _send_qemu_cmd $QEMU_HANDLE \
@@ -66,18 +65,18 @@ test_blockjob()
 
     # We want this to return an error because the block job is still running
     _send_qemu_cmd $QEMU_HANDLE \
-        "{'execute': 'x-blockdev-remove-medium',
-          'arguments': {'device': 'drv0'}}" \
+        "{'execute': 'x-blockdev-del',
+          'arguments': {'node-name': 'drv0'}}" \
         'error'
 
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'block-job-cancel',
-          'arguments': {'device': 'drv0'}}" \
+          'arguments': {'device': 'job0'}}" \
         "$3"
 
     _send_qemu_cmd $QEMU_HANDLE \
         "{'execute': 'x-blockdev-del',
-          'arguments': {'id': 'drv0'}}" \
+          'arguments': {'node-name': 'drv0'}}" \
         'return'
 }
 
@@ -101,7 +100,8 @@ echo
 
 test_blockjob \
     "{'execute': 'drive-backup',
-      'arguments': {'device': 'drv0',
+      'arguments': {'job-id': 'job0',
+                    'device': 'drv0',
                     'target': '$TEST_DIR/o.$IMGFMT',
                     'format': '$IMGFMT',
                     'sync': 'none'}}" \
@@ -117,7 +117,8 @@ echo
 
 test_blockjob \
     "{'execute': 'drive-mirror',
-      'arguments': {'device': 'drv0',
+      'arguments': {'job-id': 'job0',
+                    'device': 'drv0',
                     'target': '$TEST_DIR/o.$IMGFMT',
                     'format': '$IMGFMT',
                     'sync': 'none'}}" \
@@ -134,7 +135,7 @@ echo
 
 test_blockjob \
     "{'execute': 'block-commit',
-      'arguments': {'device': 'drv0'}}" \
+      'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \
     'BLOCK_JOB_READY' \
     'BLOCK_JOB_COMPLETED'
 
@@ -150,7 +151,8 @@ $QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io
 
 test_blockjob \
     "{'execute': 'block-commit',
-      'arguments': {'device': 'drv0',
+      'arguments': {'job-id': 'job0',
+                    'device': 'drv0',
                     'top':    '$TEST_DIR/m.$IMGFMT',
                     'speed':  1}}" \
     'return' \
@@ -172,7 +174,8 @@ $QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io
 
 test_blockjob \
     "{'execute': 'block-stream',
-      'arguments': {'device': 'drv0',
+      'arguments': {'job-id': 'job0',
+                    'device': 'drv0',
                     'speed': 1}}" \
     'return' \
     'BLOCK_JOB_CANCELLED'
diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out
index eaf1e603ed..195ca1a604 100644
--- a/tests/qemu-iotests/141.out
+++ b/tests/qemu-iotests/141.out
@@ -9,30 +9,30 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.
 {"return": {}}
 Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: backup"}}
+{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "drv0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}}
 {"return": {}}
 
 === Testing drive-mirror ===
 
 {"return": {}}
 Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "drv0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}}
+{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "drv0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
 {"return": {}}
 
 === Testing active block-commit ===
 
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "drv0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
+{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "drv0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
 {"return": {}}
 
 === Testing non-active block-commit ===
@@ -41,9 +41,9 @@ wrote 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": {}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}}
+{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "drv0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}}
 {"return": {}}
 
 === Testing block-stream ===
@@ -52,8 +52,8 @@ wrote 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 {"return": {}}
 {"return": {}}
-{"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}}
+{"error": {"class": "GenericError", "desc": "Node drv0 is in use"}}
 {"return": {}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "drv0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}}
 {"return": {}}
 *** done
diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
new file mode 100755
index 0000000000..45469c911e
--- /dev/null
+++ b/tests/qemu-iotests/147
@@ -0,0 +1,195 @@
+#!/usr/bin/env python
+#
+# Test case for NBD's blockdev-add interface
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import socket
+import stat
+import time
+import iotests
+from iotests import cachemode, imgfmt, qemu_img, qemu_nbd
+
+NBD_PORT = 10811
+
+test_img = os.path.join(iotests.test_dir, 'test.img')
+unix_socket = os.path.join(iotests.test_dir, 'nbd.socket')
+
+class NBDBlockdevAddBase(iotests.QMPTestCase):
+    def blockdev_add_options(self, address, export=None):
+        options = { 'node-name': 'nbd-blockdev',
+                    'driver': 'raw',
+                    'file': {
+                        'driver': 'nbd',
+                        'server': address
+                    } }
+        if export is not None:
+            options['file']['export'] = export
+        return options
+
+    def client_test(self, filename, address, export=None):
+        bao = self.blockdev_add_options(address, export)
+        result = self.vm.qmp('blockdev-add', **bao)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('query-named-block-nodes')
+        for node in result['return']:
+            if node['node-name'] == 'nbd-blockdev':
+                if isinstance(filename, str):
+                    self.assert_qmp(node, 'image/filename', filename)
+                else:
+                    self.assert_json_filename_equal(node['image']['filename'],
+                                                    filename)
+                break
+
+        result = self.vm.qmp('x-blockdev-del', node_name='nbd-blockdev')
+        self.assert_qmp(result, 'return', {})
+
+
+class QemuNBD(NBDBlockdevAddBase):
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, test_img, '64k')
+        self.vm = iotests.VM()
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        try:
+            os.remove(unix_socket)
+        except OSError:
+            pass
+
+    def _server_up(self, *args):
+        self.assertEqual(qemu_nbd('-f', imgfmt, test_img, *args), 0)
+
+    def test_inet(self):
+        self._server_up('-p', str(NBD_PORT))
+        address = { 'type': 'inet',
+                    'data': {
+                        'host': 'localhost',
+                        'port': str(NBD_PORT)
+                    } }
+        self.client_test('nbd://localhost:%i' % NBD_PORT, address)
+
+    def test_unix(self):
+        self._server_up('-k', unix_socket)
+        address = { 'type': 'unix',
+                    'data': { 'path': unix_socket } }
+        self.client_test('nbd+unix://?socket=' + unix_socket, address)
+
+
+class BuiltinNBD(NBDBlockdevAddBase):
+    def setUp(self):
+        qemu_img('create', '-f', iotests.imgfmt, test_img, '64k')
+        self.vm = iotests.VM()
+        self.vm.launch()
+        self.server = iotests.VM('.server')
+        self.server.add_drive_raw('if=none,id=nbd-export,' +
+                                  'file=%s,' % test_img +
+                                  'format=%s,' % imgfmt +
+                                  'cache=%s' % cachemode)
+        self.server.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        self.server.shutdown()
+        os.remove(test_img)
+        try:
+            os.remove(unix_socket)
+        except OSError:
+            pass
+
+    def _server_up(self, address):
+        result = self.server.qmp('nbd-server-start', addr=address)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.server.qmp('nbd-server-add', device='nbd-export')
+        self.assert_qmp(result, 'return', {})
+
+    def _server_down(self):
+        result = self.server.qmp('nbd-server-stop')
+        self.assert_qmp(result, 'return', {})
+
+    def test_inet(self):
+        address = { 'type': 'inet',
+                    'data': {
+                        'host': 'localhost',
+                        'port': str(NBD_PORT)
+                    } }
+        self._server_up(address)
+        self.client_test('nbd://localhost:%i/nbd-export' % NBD_PORT,
+                         address, 'nbd-export')
+        self._server_down()
+
+    def test_inet6(self):
+        address = { 'type': 'inet',
+                    'data': {
+                        'host': '::1',
+                        'port': str(NBD_PORT),
+                        'ipv4': False,
+                        'ipv6': True
+                    } }
+        filename = { 'driver': 'raw',
+                     'file': {
+                         'driver': 'nbd',
+                         'export': 'nbd-export',
+                         'server': address
+                     } }
+        self._server_up(address)
+        self.client_test(filename, address, 'nbd-export')
+        self._server_down()
+
+    def test_unix(self):
+        address = { 'type': 'unix',
+                    'data': { 'path': unix_socket } }
+        self._server_up(address)
+        self.client_test('nbd+unix:///nbd-export?socket=' + unix_socket,
+                         address, 'nbd-export')
+        self._server_down()
+
+    def test_fd(self):
+        self._server_up({ 'type': 'unix',
+                          'data': { 'path': unix_socket } })
+
+        sockfd = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        sockfd.connect(unix_socket)
+
+        result = self.vm.send_fd_scm(str(sockfd.fileno()))
+        self.assertEqual(result, 0, 'Failed to send socket FD')
+
+        result = self.vm.qmp('getfd', fdname='nbd-fifo')
+        self.assert_qmp(result, 'return', {})
+
+        address = { 'type': 'fd',
+                    'data': { 'str': 'nbd-fifo' } }
+        filename = { 'driver': 'raw',
+                     'file': {
+                         'driver': 'nbd',
+                         'export': 'nbd-export',
+                         'server': address
+                     } }
+        self.client_test(filename, address, 'nbd-export')
+
+        self._server_down()
+
+
+if __name__ == '__main__':
+    # Need to support image creation
+    iotests.main(supported_fmts=['vpc', 'parallels', 'qcow', 'vdi', 'qcow2',
+                                 'vmdk', 'raw', 'vhdx', 'qed'])
diff --git a/tests/qemu-iotests/147.out b/tests/qemu-iotests/147.out
new file mode 100644
index 0000000000..3f8a935a08
--- /dev/null
+++ b/tests/qemu-iotests/147.out
@@ -0,0 +1,5 @@
+......
+----------------------------------------------------------------------
+Ran 6 tests
+
+OK
diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155
index 4057b5e2aa..0b86ea4e5c 100755
--- a/tests/qemu-iotests/155
+++ b/tests/qemu-iotests/155
@@ -63,10 +63,10 @@ class BaseClass(iotests.QMPTestCase):
         # Add the BDS via blockdev-add so it stays around after the mirror block
         # job has been completed
         result = self.vm.qmp('blockdev-add',
-                             options={'node-name': 'source',
-                                      'driver': iotests.imgfmt,
-                                      'file': {'driver': 'file',
-                                               'filename': source_img}})
+                             node_name='source',
+                             driver=iotests.imgfmt,
+                             file={'driver': 'file',
+                                   'filename': source_img})
         self.assert_qmp(result, 'return', {})
 
         result = self.vm.qmp('x-blockdev-insert-medium',
@@ -90,7 +90,7 @@ class BaseClass(iotests.QMPTestCase):
                 if self.target_blockdev_backing:
                     options['backing'] = self.target_blockdev_backing
 
-                result = self.vm.qmp('blockdev-add', options=options)
+                result = self.vm.qmp('blockdev-add', **options)
                 self.assert_qmp(result, 'return', {})
 
     def tearDown(self):
diff --git a/tests/qemu-iotests/158 b/tests/qemu-iotests/158
new file mode 100755
index 0000000000..a6cdd6d8cf
--- /dev/null
+++ b/tests/qemu-iotests/158
@@ -0,0 +1,80 @@
+#!/bin/bash
+#
+# Test encrypted read/write using backing files
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=berrange@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+
+size=128M
+TEST_IMG_BASE=$TEST_IMG.base
+
+TEST_IMG_SAVE=$TEST_IMG
+TEST_IMG=$TEST_IMG_BASE
+echo "== create base =="
+IMGOPTS="encryption=on" _make_test_img $size
+TEST_IMG=$TEST_IMG_SAVE
+
+echo
+echo "== writing whole image =="
+echo "astrochicken" | $QEMU_IO -c "write -P 0xa 0 $size" "$TEST_IMG_BASE" | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern =="
+echo "astrochicken" | $QEMU_IO -c "read -P 0xa 0 $size" "$TEST_IMG_BASE" | _filter_qemu_io | _filter_testdir
+
+echo "== create overlay =="
+IMGOPTS="encryption=on" _make_test_img -b "$TEST_IMG_BASE" $size
+
+echo
+echo "== writing part of a cluster =="
+echo "astrochicken" | $QEMU_IO -c "write -P 0xe 0 1024" "$TEST_IMG" | _filter_qemu_io | _filter_testdir
+
+echo
+echo "== verify pattern =="
+echo "astrochicken" | $QEMU_IO -c "read -P 0xe 0 1024" "$TEST_IMG" | _filter_qemu_io | _filter_testdir
+echo
+echo "== verify pattern =="
+echo "astrochicken" | $QEMU_IO -c "read -P 0xa 1024 64512" "$TEST_IMG" | _filter_qemu_io | _filter_testdir
+
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/158.out b/tests/qemu-iotests/158.out
new file mode 100644
index 0000000000..b3f37e28ef
--- /dev/null
+++ b/tests/qemu-iotests/158.out
@@ -0,0 +1,36 @@
+QA output created by 158
+== create base ==
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=134217728 encryption=on
+
+== writing whole image ==
+Disk image 'TEST_DIR/t.qcow2.base' is encrypted.
+password:
+wrote 134217728/134217728 bytes at offset 0
+128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+Disk image 'TEST_DIR/t.qcow2.base' is encrypted.
+password:
+read 134217728/134217728 bytes at offset 0
+128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+== create overlay ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/t.IMGFMT.base encryption=on
+
+== writing part of a cluster ==
+Disk image 'TEST_DIR/t.qcow2' is encrypted.
+password:
+wrote 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+Disk image 'TEST_DIR/t.qcow2' is encrypted.
+password:
+read 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== verify pattern ==
+Disk image 'TEST_DIR/t.qcow2' is encrypted.
+password:
+read 64512/64512 bytes at offset 1024
+63 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/159 b/tests/qemu-iotests/159
new file mode 100755
index 0000000000..825f05fab8
--- /dev/null
+++ b/tests/qemu-iotests/159
@@ -0,0 +1,70 @@
+#! /bin/bash
+#
+# qemu-img dd test with different block sizes
+#
+# Copyright (C) 2016 Reda Sallahi
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+owner=fullmanet@gmail.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1
+
+_cleanup()
+{
+    _cleanup_test_img
+    rm -f "$TEST_IMG.out"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt generic
+_supported_proto file
+_supported_os Linux
+
+TEST_SIZES="5 512 1024 1999 1K 64K 1M"
+
+for bs in $TEST_SIZES; do
+    echo
+    echo "== Creating image =="
+
+    size=1M
+    _make_test_img $size
+    _check_test_img
+    $QEMU_IO -c "write -P 0xa 0 $size" "$TEST_IMG" | _filter_qemu_io
+
+    echo
+    echo "== Converting the image with dd with a block size of $bs =="
+
+    $QEMU_IMG dd if="$TEST_IMG" of="$TEST_IMG.out" bs=$bs -O "$IMGFMT"
+    TEST_IMG="$TEST_IMG.out" _check_test_img
+
+    echo
+    echo "== Compare the images with qemu-img compare =="
+
+    $QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.out"
+done
+
+echo
+echo "*** done"
+rm -f "$seq.full"
+status=0
diff --git a/tests/qemu-iotests/159.out b/tests/qemu-iotests/159.out
new file mode 100644
index 0000000000..b86b63abe6
--- /dev/null
+++ b/tests/qemu-iotests/159.out
@@ -0,0 +1,87 @@
+QA output created by 159
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 5 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 512 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 1024 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 1999 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 1K ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 64K ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with a block size of 1M ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+*** done
diff --git a/tests/qemu-iotests/160 b/tests/qemu-iotests/160
new file mode 100755
index 0000000000..5c910e5bfc
--- /dev/null
+++ b/tests/qemu-iotests/160
@@ -0,0 +1,72 @@
+#! /bin/bash
+#
+# qemu-img dd test for the skip option
+#
+# Copyright (C) 2016 Reda Sallahi
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+owner=fullmanet@gmail.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1
+
+_cleanup()
+{
+    _cleanup_test_img
+    rm -f "$TEST_IMG.out" "$TEST_IMG.out.dd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+TEST_SKIP_BLOCKS="1 2 30 30K"
+
+for skip in $TEST_SKIP_BLOCKS; do
+    echo
+    echo "== Creating image =="
+
+    size=1M
+    _make_test_img $size
+    _check_test_img
+    $QEMU_IO -c "write -P 0xa 24 512k" "$TEST_IMG" | _filter_qemu_io
+
+    echo
+    echo "== Converting the image with dd with skip=$skip =="
+
+    $QEMU_IMG dd if="$TEST_IMG" of="$TEST_IMG.out" skip="$skip" -O "$IMGFMT" \
+        2> /dev/null
+    TEST_IMG="$TEST_IMG.out" _check_test_img
+    dd if="$TEST_IMG" of="$TEST_IMG.out.dd" skip="$skip" status=none
+
+    echo
+    echo "== Compare the images with qemu-img compare =="
+
+    $QEMU_IMG compare "$TEST_IMG.out.dd" "$TEST_IMG.out"
+done
+
+echo
+echo "*** done"
+rm -f "$seq.full"
+status=0
diff --git a/tests/qemu-iotests/160.out b/tests/qemu-iotests/160.out
new file mode 100644
index 0000000000..9cedc80356
--- /dev/null
+++ b/tests/qemu-iotests/160.out
@@ -0,0 +1,51 @@
+QA output created by 160
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 524288/524288 bytes at offset 24
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with skip=1 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 524288/524288 bytes at offset 24
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with skip=2 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 524288/524288 bytes at offset 24
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with skip=30 ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 524288/524288 bytes at offset 24
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd with skip=30K ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+*** done
diff --git a/tests/qemu-iotests/162 b/tests/qemu-iotests/162
index 0b43ea3395..cad2bd70ab 100755
--- a/tests/qemu-iotests/162
+++ b/tests/qemu-iotests/162
@@ -35,6 +35,9 @@ status=1	# failure is the default!
 _supported_fmt generic
 _supported_os Linux
 
+test_ssh=$($QEMU_IMG --help | grep '^Supported formats:.* ssh\( \|$\)')
+[ "$test_ssh" = "" ] && _notrun "ssh support required"
+
 echo
 echo '=== NBD ==='
 # NBD expects all of its arguments to be strings
@@ -43,16 +46,26 @@ echo '=== NBD ==='
 $QEMU_IMG info 'json:{"driver": "nbd", "host": 42}'
 
 # And this should not treat @port as if it had not been specified
-# (We cannot use localhost with an invalid port here, but we need to use a
-#  non-existing domain, because otherwise the error message will not contain
-#  the port)
-$QEMU_IMG info 'json:{"driver": "nbd", "host": "does.not.exist.example.com", "port": 42}'
+# (We need to set up a server here, because the error message for "Connection
+#  refused" does not contain the destination port)
+
+# Launching qemu-nbd is done in a loop: We try to set up an NBD server on some
+# random port and continue until success, i.e. until we have found a port that
+# is not in use yet.
+while true; do
+    port=$((RANDOM + 32768))
+    if $QEMU_NBD -p $port -f raw --fork null-co:// 2> /dev/null; then
+        break
+    fi
+done
+
+$QEMU_IMG info "json:{'driver': 'nbd', 'host': 'localhost', 'port': $port}" \
+    | grep '^image' | sed -e "s/$port/PORT/"
 
 # This is a test for NBD's bdrv_refresh_filename() implementation: It expects
 # either host or path to be set, but it must not assume that they are set to
 # strings in the options QDict
-$QEMU_NBD -k "$PWD/42" -f raw null-co:// &
-sleep 0.5
+$QEMU_NBD -k "$PWD/42" -f raw --fork null-co://
 $QEMU_IMG info 'json:{"driver": "nbd", "path": 42}' | grep '^image'
 rm -f 42
 
diff --git a/tests/qemu-iotests/162.out b/tests/qemu-iotests/162.out
index 9bba72353a..3c5be2c569 100644
--- a/tests/qemu-iotests/162.out
+++ b/tests/qemu-iotests/162.out
@@ -2,7 +2,7 @@ QA output created by 162
 
 === NBD ===
 qemu-img: Could not open 'json:{"driver": "nbd", "host": 42}': Failed to connect socket: Invalid argument
-qemu-img: Could not open 'json:{"driver": "nbd", "host": "does.not.exist.example.com", "port": 42}': address resolution failed for does.not.exist.example.com:42: Name or service not known
+image: nbd://localhost:PORT
 image: nbd+unix://?socket=42
 
 === SSH ===
diff --git a/tests/qemu-iotests/170 b/tests/qemu-iotests/170
new file mode 100755
index 0000000000..5b335dbc3e
--- /dev/null
+++ b/tests/qemu-iotests/170
@@ -0,0 +1,67 @@
+#! /bin/bash
+#
+# qemu-img dd test
+#
+# Copyright (C) 2016 Reda Sallahi
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+owner=fullmanet@gmail.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1
+
+_cleanup()
+{
+    _cleanup_test_img
+    rm -f "$TEST_IMG.out"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+. ./common.rc
+. ./common.filter
+. ./common.pattern
+
+_supported_fmt generic
+_supported_proto file
+_supported_os Linux
+
+echo
+echo "== Creating image =="
+
+size=1M
+_make_test_img $size
+_check_test_img
+
+$QEMU_IO -c "write -P 0xa 0 $size" "$TEST_IMG" | _filter_qemu_io
+
+echo
+echo "== Converting the image with dd =="
+
+$QEMU_IMG dd if="$TEST_IMG" of="$TEST_IMG.out" -O "$IMGFMT"
+TEST_IMG="$TEST_IMG.out" _check_test_img
+
+echo
+echo "== Compare the images with qemu-img compare =="
+
+$QEMU_IMG compare "$TEST_IMG" "$TEST_IMG.out"
+
+echo
+echo "*** done"
+rm -f "$seq.full"
+status=0
diff --git a/tests/qemu-iotests/170.out b/tests/qemu-iotests/170.out
new file mode 100644
index 0000000000..a83fb82fa7
--- /dev/null
+++ b/tests/qemu-iotests/170.out
@@ -0,0 +1,15 @@
+QA output created by 170
+
+== Creating image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+No errors were found on the image.
+wrote 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== Converting the image with dd ==
+No errors were found on the image.
+
+== Compare the images with qemu-img compare ==
+Images are identical.
+
+*** done
diff --git a/tests/qemu-iotests/171 b/tests/qemu-iotests/171
new file mode 100755
index 0000000000..257be10a0e
--- /dev/null
+++ b/tests/qemu-iotests/171
@@ -0,0 +1,212 @@
+#!/bin/bash
+#
+# Test 'offset' and 'size' options of the raw driver. Make sure we can't
+# (or can) read and write outside of the image size.
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=tgolembi@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+
+# Create JSON with options
+img_json() {
+    echo -n 'json:{"driver":"raw", '
+    echo -n "\"offset\":\"$img_offset\", "
+    if [ "$img_size" -ne -1 ] ; then
+        echo -n "\"size\":\"$img_size\", "
+    fi
+    echo -n '"file": {'
+    echo -n    '"driver":"file", '
+    echo -n    "\"filename\":\"$TEST_IMG\" "
+    echo -n "} }"
+}
+
+do_general_test() {
+    if [ "$img_size" -ge 0 ] ; then
+        test_size=$img_size
+    else
+        test_size=$((size-img_offset))
+    fi
+
+    echo
+    echo "write to image"
+    $QEMU_IO -c "write -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "read the image"
+    $QEMU_IO -c "read -P 0x0a 0 $test_size" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "check that offset is respected"
+    $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "write before image boundary"
+    $QEMU_IO -c "write $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "write across image boundary"
+    $QEMU_IO -c "write $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "write at image boundary"
+    $QEMU_IO -c "write $test_size 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "write after image boundary"
+    $QEMU_IO -c "write $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "writev before/after image boundary"
+    $QEMU_IO -c "writev $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "read before image boundary"
+    $QEMU_IO -c "read $((test_size-1)) 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "read across image boundary"
+    $QEMU_IO -c "read $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "read at image boundary"
+    $QEMU_IO -c "read $test_size 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "read after image boundary"
+    $QEMU_IO -c "read $((test_size+512)) 1" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "readv before/after image boundary"
+    $QEMU_IO -c "readv $((test_size-512)) 512 512" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "fill image with pattern"
+    $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "write zeroes and check"
+    $QEMU_IO -c "write -z 0 512" "$(img_json)" | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "write zeroes across image boundary"
+    $QEMU_IO -c "write -z $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "write zeroes at image boundary and check"
+    $QEMU_IO -c "write -z $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "fill image with pattern"
+    $QEMU_IO -c "write -P 0x0a 0 $size" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "discard and check"
+    $QEMU_IO -c "discard 0 512" "$(img_json)" | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset-2)) 4" $TEST_IMG | _filter_qemu_io
+
+    echo
+    echo "discard across image boundary"
+    $QEMU_IO -c "discard $((test_size-1)) 2" "$(img_json)" | _filter_qemu_io
+
+    echo
+    echo "discard at image boundary and check"
+    $QEMU_IO -c "discard $((test_size-2)) 2" "$(img_json)" | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset+test_size-2)) 2" $TEST_IMG | _filter_qemu_io
+    $QEMU_IO -c "read -v $((img_offset+test_size)) 2" $TEST_IMG | _filter_qemu_io
+}
+
+echo
+echo "== test 'offset' option =="
+size=4096
+img_offset=512
+img_size=-1
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test 'offset' and 'size' options =="
+size=4096
+img_offset=512
+img_size=2048
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test misaligned 'offset' =="
+size=4096
+img_offset=10
+img_size=2048
+_make_test_img $size
+do_general_test
+_cleanup_test_img
+
+echo
+echo "== test reopen =="
+size=4096
+img_offset=512
+img_size=512
+_make_test_img $size
+(
+$QEMU_IO "$(img_json)"  <<EOT
+write -P 0x0a 0 512
+write -P 0x0a 511 1
+write -P 0x0a 512 1
+reopen -o driver=raw,offset=1536,size=1024
+write -P 0x0a 0 1024
+write -P 0x0a 1023 1
+write -P 0x0a 1024 1
+EOT
+) | _filter_qemu_io
+echo "checking boundaries"
+$QEMU_IO -c "read -v 510 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 1022 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 1534 4" $TEST_IMG | _filter_qemu_io
+$QEMU_IO -c "read -v 2558 4" $TEST_IMG | _filter_qemu_io
+_cleanup_test_img
+
+# success, all done
+echo
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/171.out b/tests/qemu-iotests/171.out
new file mode 100644
index 0000000000..ec3363b4f4
--- /dev/null
+++ b/tests/qemu-iotests/171.out
@@ -0,0 +1,313 @@
+QA output created by 171
+
+== test 'offset' option ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 3584/3584 bytes at offset 0
+3.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 3584/3584 bytes at offset 0
+3.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+000001fe:  00 00 0a 0a  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 3583
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 3583
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 3582
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000ffe:  00 00  ..
+read 2/2 bytes at offset 4094
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 3582
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000ffe:  00 00  ..
+read 2/2 bytes at offset 4094
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Input/output error
+
+== test 'offset' and 'size' options ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+000001fe:  00 00 0a 0a  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe:  00 00  ..
+read 2/2 bytes at offset 2558
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000a00:  0a 0a  ..
+read 2/2 bytes at offset 2560
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000001fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe:  00 00  ..
+read 2/2 bytes at offset 2558
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000a00:  0a 0a  ..
+read 2/2 bytes at offset 2560
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== test misaligned 'offset' ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+
+write to image
+wrote 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read the image
+read 2048/2048 bytes at offset 0
+2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+check that offset is respected
+00000008:  00 00 0a 0a  ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write before image boundary
+wrote 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write across image boundary
+write failed: Input/output error
+
+write at image boundary
+write failed: Input/output error
+
+write after image boundary
+write failed: Input/output error
+
+writev before/after image boundary
+writev failed: Input/output error
+
+read before image boundary
+read 1/1 bytes at offset 2047
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+read across image boundary
+read failed: Input/output error
+
+read at image boundary
+read failed: Input/output error
+
+read after image boundary
+read failed: Input/output error
+
+readv before/after image boundary
+readv failed: Input/output error
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes and check
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000008:  0a 0a 00 00  ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+write zeroes across image boundary
+write failed: Input/output error
+
+write zeroes at image boundary and check
+wrote 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000808:  00 00  ..
+read 2/2 bytes at offset 2056
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+0000080a:  0a 0a  ..
+read 2/2 bytes at offset 2058
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+fill image with pattern
+wrote 4096/4096 bytes at offset 0
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard and check
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000008:  0a 0a 00 00  ....
+read 4/4 bytes at offset 8
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+discard across image boundary
+discard failed: Input/output error
+
+discard at image boundary and check
+discard 2/2 bytes at offset 2046
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+00000808:  00 00  ..
+read 2/2 bytes at offset 2056
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+0000080a:  0a 0a  ..
+read 2/2 bytes at offset 2058
+2 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+== test reopen ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4096
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1/1 bytes at offset 511
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+wrote 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1/1 bytes at offset 1023
+1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Input/output error
+checking boundaries
+000001fe:  00 00 0a 0a  ....
+read 4/4 bytes at offset 510
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000003fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 1022
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000005fe:  00 00 0a 0a  ....
+read 4/4 bytes at offset 1534
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+000009fe:  0a 0a 00 00  ....
+read 4/4 bytes at offset 2558
+4 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+*** done
diff --git a/tests/qemu-iotests/172 b/tests/qemu-iotests/172
new file mode 100755
index 0000000000..1b7d3a194d
--- /dev/null
+++ b/tests/qemu-iotests/172
@@ -0,0 +1,246 @@
+#!/bin/bash
+#
+# Test floppy configuration
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+if [ "$QEMU_DEFAULT_MACHINE" != "pc" ]; then
+    _notrun "Requires a PC machine"
+fi
+
+function do_run_qemu()
+{
+    (
+        if ! test -t 0; then
+            while read cmd; do
+                echo $cmd
+            done
+        fi
+        echo quit
+    ) | $QEMU -nographic -monitor stdio -serial none "$@"
+    echo
+}
+
+function check_floppy_qtree()
+{
+    echo
+    echo Testing: "$@" | _filter_testdir
+
+    # QEMU_OPTIONS contains -nodefaults, we don't want that here because the
+    # defaults are part of what should be checked here.
+    #
+    # Apply the sed filter to stdout only, but keep the stderr output and
+    # filter the qemu program name in it.
+    echo "info qtree" |
+    (QEMU_OPTIONS="" do_run_qemu "$@" |
+        sed -ne '/^          dev: isa-fdc/,/^          dev:/{x;p}' ) 2>&1 |
+    _filter_win32 | _filter_qemu
+}
+
+function check_cache_mode()
+{
+    echo "info block none0" |
+    QEMU_OPTIONS="" do_run_qemu -drive if=none,file="$TEST_IMG" "$@" |
+    _filter_win32 | _filter_qemu | grep "Cache mode"
+}
+
+
+size=720k
+
+_make_test_img $size
+
+# Default drive semantics:
+#
+# By default you get a single empty floppy drive. You can override it with
+# -drive and using the same index, but if you use -drive to add a floppy to a
+# different index, you get both of them. However, as soon as you use any
+# '-device floppy', even to a different slot, the default drive is disabled.
+
+echo
+echo
+echo === Default ===
+
+check_floppy_qtree
+
+echo
+echo
+echo === Using -fda/-fdb options ===
+
+check_floppy_qtree -fda "$TEST_IMG"
+check_floppy_qtree -fdb "$TEST_IMG"
+check_floppy_qtree -fda "$TEST_IMG" -fdb "$TEST_IMG"
+
+
+echo
+echo
+echo === Using -drive options ===
+
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG"
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG",index=1
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=floppy,file="$TEST_IMG",index=1
+
+echo
+echo
+echo === Using -drive if=none and -global ===
+
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
+
+echo
+echo
+echo === Using -drive if=none and -device ===
+
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -device floppy,drive=none0 -device floppy,drive=none1,unit=1
+
+echo
+echo
+echo === Mixing -fdX and -global ===
+
+# Working
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
+
+# Conflicting (-fdX wins)
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveA=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -global isa-fdc.driveB=none0
+
+echo
+echo
+echo === Mixing -fdX and -device ===
+
+# Working
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
+
+# Conflicting
+check_floppy_qtree -fda "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
+check_floppy_qtree -fdb "$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+
+echo
+echo
+echo === Mixing -drive and -device ===
+
+# Working
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=1
+
+# Conflicting
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,unit=0
+
+echo
+echo
+echo === Mixing -global and -device ===
+
+# Working
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveA=none0 -device floppy,drive=none1
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
+
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveB=none0 -device floppy,drive=none1
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
+
+# Conflicting
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1
+
+echo
+echo
+echo === Too many floppy drives ===
+
+# Working
+check_floppy_qtree -drive if=floppy,file="$TEST_IMG" \
+                   -drive if=none,file="$TEST_IMG" \
+                   -drive if=none,file="$TEST_IMG" \
+                   -global isa-fdc.driveB=none0 \
+                   -device floppy,drive=none1
+
+echo
+echo
+echo === Creating an empty drive with anonymous BB ===
+
+check_floppy_qtree -device floppy
+check_floppy_qtree -device floppy,drive-type=120
+check_floppy_qtree -device floppy,drive-type=144
+check_floppy_qtree -device floppy,drive-type=288
+
+echo
+echo
+echo === Try passing different drive size with image ===
+
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,drive-type=120
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,drive-type=288
+
+echo
+echo
+echo === Try passing different block sizes ===
+
+# Explicitly setting the default is okay
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,logical_block_size=512
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,physical_block_size=512
+
+# Changing it is not
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,logical_block_size=4096
+check_floppy_qtree -drive if=none,file="$TEST_IMG" -device floppy,drive=none0,physical_block_size=1024
+
+echo
+echo
+echo === Writethrough caching ===
+
+check_cache_mode -device floppy,drive=none0
+check_cache_mode -device floppy,drive=none0,write-cache=on
+check_cache_mode -device floppy,drive=none0,write-cache=off
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
new file mode 100644
index 0000000000..6b7edaf28f
--- /dev/null
+++ b/tests/qemu-iotests/172.out
@@ -0,0 +1,1170 @@
+QA output created by 172
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=737280
+
+
+=== Default ===
+
+Testing:
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+
+=== Using -fda/-fdb options ===
+
+Testing: -fda TEST_DIR/t.qcow2
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fdb TEST_DIR/t.qcow2
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+
+=== Using -drive options ===
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+
+=== Using -drive if=none and -global ===
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -global isa-fdc.driveB=none1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+
+=== Using -drive if=none and -device ===
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 -device floppy,drive=none1,unit=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+
+=== Mixing -fdX and -global ===
+
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+
+=== Mixing -fdX and -device ===
+
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "floppy1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use
+QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed.
+
+Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+QEMU_PROG: -device floppy,drive=none0,unit=1: Floppy unit 1 is in use
+QEMU_PROG: -device floppy,drive=none0,unit=1: Device initialization failed.
+
+
+=== Mixing -drive and -device ===
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "floppy0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=0
+QEMU_PROG: -device floppy,drive=none0,unit=0: Floppy unit 0 is in use
+QEMU_PROG: -device floppy,drive=none0,unit=0: Device initialization failed.
+
+
+=== Mixing -global and -device ===
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=0
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none1"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+              dev: floppy, id ""
+                unit = 1 (0x1)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 -device floppy,drive=none1,unit=0
+QEMU_PROG: -device floppy,drive=none1,unit=0: Floppy unit 0 is in use
+QEMU_PROG: -device floppy,drive=none1,unit=0: Device initialization failed.
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1,unit=1
+QEMU_PROG: -device floppy,drive=none1,unit=1: Floppy unit 1 is in use
+QEMU_PROG: -device floppy,drive=none1,unit=1: Device initialization failed.
+
+
+=== Too many floppy drives ===
+
+Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 -device floppy,drive=none1
+QEMU_PROG: -device floppy,drive=none1: Can't create floppy unit 2, bus supports only 2 units
+QEMU_PROG: -device floppy,drive=none1: Device initialization failed.
+
+
+=== Creating an empty drive with anonymous BB ===
+
+Testing: -device floppy
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = ""
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+Testing: -device floppy,drive-type=120
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = ""
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "120"
+
+Testing: -device floppy,drive-type=144
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = ""
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -device floppy,drive-type=288
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = ""
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+
+=== Try passing different drive size with image ===
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=120
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "120"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-type=288
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "288"
+
+
+=== Try passing different block sizes ===
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=512
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=512
+
+          dev: isa-fdc, id ""
+            iobase = 1008 (0x3f0)
+            irq = 6 (0x6)
+            dma = 2 (0x2)
+            driveA = ""
+            driveB = ""
+            check_media_rate = true
+            fdtypeA = "auto"
+            fdtypeB = "auto"
+            fallback = "288"
+            isa irq 6
+            bus: floppy-bus.0
+              type floppy-bus
+              dev: floppy, id ""
+                unit = 0 (0x0)
+                drive = "none0"
+                logical_block_size = 512 (0x200)
+                physical_block_size = 512 (0x200)
+                min_io_size = 0 (0x0)
+                opt_io_size = 0 (0x0)
+                discard_granularity = 4294967295 (0xffffffff)
+                write-cache = "auto"
+                drive-type = "144"
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096
+QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: Physical and logical block size must be 512 for floppy
+QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: Device initialization failed.
+
+Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=1024
+QEMU_PROG: -device floppy,drive=none0,physical_block_size=1024: Physical and logical block size must be 512 for floppy
+QEMU_PROG: -device floppy,drive=none0,physical_block_size=1024: Device initialization failed.
+
+
+=== Writethrough caching ===
+    Cache mode:       writeback
+    Cache mode:       writeback
+    Cache mode:       writethrough
+*** done
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index d60ea2ce3c..b6274bee0a 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -51,7 +51,7 @@ export IMGOPTS=""
 export CACHEMODE="writeback"
 export QEMU_IO_OPTIONS=""
 export CACHEMODE_IS_DEFAULT=true
-export QEMU_OPTIONS="-nodefaults"
+export QEMU_OPTIONS="-nodefaults -machine accel=qtest"
 export VALGRIND_QEMU=
 export IMGKEYSECRET=
 export IMGOPTSSYNTAX=false
diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 3ab6e4d764..240ed0697a 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -44,6 +44,15 @@ _filter_imgfmt()
     sed -e "s#$IMGFMT#IMGFMT#g"
 }
 
+# Replace error message when the format is not supported and delete
+# the output lines after the first one
+_filter_qemu_img_check()
+{
+    sed -e '/allocated.*fragmented.*compressed clusters/d' \
+        -e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
+        -e '/Image end offset: [0-9]\+/d'
+}
+
 # Removes \r from messages
 _filter_win32()
 {
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 2548a8700b..e657361790 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -155,15 +155,13 @@ function _launch_qemu()
 
     if [ -z "$keep_stderr" ]; then
         QEMU_NEED_PID='y'\
-        ${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \
-                                                                >"${fifo_out}" \
-                                                                2>&1 \
-                                                                <"${fifo_in}" &
+        ${QEMU} -nographic -serial none ${comm} "${@}" >"${fifo_out}" \
+                                                       2>&1 \
+                                                       <"${fifo_in}" &
     elif [ "$keep_stderr" = "y" ]; then
         QEMU_NEED_PID='y'\
-        ${QEMU} -nographic -serial none ${comm} -machine accel=qtest "${@}" \
-                                                                >"${fifo_out}" \
-                                                                <"${fifo_in}" &
+        ${QEMU} -nographic -serial none ${comm} "${@}" >"${fifo_out}" \
+                                                       <"${fifo_in}" &
     else
         exit 1
     fi
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 306b00c210..3213765f4e 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -69,7 +69,7 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then
         TEST_IMG="$DRIVER,file.driver=ssh,file.host=127.0.0.1,file.path=$TEST_IMG_FILE"
     elif [ "$IMGPROTO" = "nfs" ]; then
         TEST_DIR="$DRIVER,file.driver=nfs,file.filename=nfs://127.0.0.1/$TEST_DIR"
-        TEST_IMG=$TEST_DIR_OPTS/t.$IMGFMT
+        TEST_IMG=$TEST_DIR/t.$IMGFMT
     elif [ "$IMGPROTO" = "archipelago" ]; then
         TEST_IMG="$DRIVER,file.driver=archipelago,file.volume=:at.$IMGFMT"
     else
@@ -234,10 +234,7 @@ _check_test_img()
         else
             $QEMU_IMG check "$@" -f $IMGFMT "$TEST_IMG" 2>&1
         fi
-    ) | _filter_testdir | \
-        sed -e '/allocated.*fragmented.*compressed clusters/d' \
-            -e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
-            -e '/Image end offset: [0-9]\+/d'
+    ) | _filter_testdir | _filter_qemu_img_check
 }
 
 _img_info()
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 50ddeed80a..866c1a032d 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -149,6 +149,7 @@
 144 rw auto quick
 145 auto quick
 146 auto quick
+147 auto
 148 rw auto quick
 149 rw auto sudo
 150 rw auto quick
@@ -157,4 +158,10 @@
 155 rw auto
 156 rw auto quick
 157 auto
+158 rw auto quick
+159 rw auto quick
+160 rw auto quick
 162 auto quick
+170 rw auto quick
+171 rw auto quick
+172 auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index dbe0ee548a..bec8eb4b8d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -39,6 +39,10 @@
 if os.environ.get('QEMU_IO_OPTIONS'):
     qemu_io_args += os.environ['QEMU_IO_OPTIONS'].strip().split(' ')
 
+qemu_nbd_args = [os.environ.get('QEMU_NBD_PROG', 'qemu-nbd')]
+if os.environ.get('QEMU_NBD_OPTIONS'):
+    qemu_nbd_args += os.environ['QEMU_NBD_OPTIONS'].strip().split(' ')
+
 qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
 qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
 
@@ -50,6 +54,7 @@
 qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE')
 
 socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
+debug = False
 
 def qemu_img(*args):
     '''Run qemu-img and return the exit code'''
@@ -86,10 +91,14 @@ def qemu_io(*args):
         sys.stderr.write('qemu-io received signal %i: %s\n' % (-exitcode, ' '.join(args)))
     return subp.communicate()[0]
 
-def compare_images(img1, img2):
+def qemu_nbd(*args):
+    '''Run qemu-nbd in daemon mode and return the parent's exit code'''
+    return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))
+
+def compare_images(img1, img2, fmt1=imgfmt, fmt2=imgfmt):
     '''Return True if two image files are identical'''
-    return qemu_img('compare', '-f', imgfmt,
-                    '-F', imgfmt, img1, img2) == 0
+    return qemu_img('compare', '-f', fmt1,
+                    '-F', fmt2, img1, img2) == 0
 
 def create_image(name, size):
     '''Create a fully-allocated raw image with sector markers'''
@@ -131,24 +140,33 @@ def log(msg, filters=[]):
 class VM(qtest.QEMUQtestMachine):
     '''A QEMU VM'''
 
-    def __init__(self):
-        super(VM, self).__init__(qemu_prog, qemu_opts, test_dir=test_dir,
+    def __init__(self, path_suffix=''):
+        name = "qemu%s-%d" % (path_suffix, os.getpid())
+        super(VM, self).__init__(qemu_prog, qemu_opts, name=name,
+                                 test_dir=test_dir,
                                  socket_scm_helper=socket_scm_helper)
+        if debug:
+            self._debug = True
         self._num_drives = 0
 
+    def add_device(self, opts):
+        self._args.append('-device')
+        self._args.append(opts)
+        return self
+
     def add_drive_raw(self, opts):
         self._args.append('-drive')
         self._args.append(opts)
         return self
 
-    def add_drive(self, path, opts='', interface='virtio'):
+    def add_drive(self, path, opts='', interface='virtio', format=imgfmt):
         '''Add a virtio-blk drive to the VM'''
         options = ['if=%s' % interface,
                    'id=drive%d' % self._num_drives]
 
         if path is not None:
             options.append('file=%s' % path)
-            options.append('format=%s' % imgfmt)
+            options.append('format=%s' % format)
             options.append('cache=%s' % cachemode)
 
         if opts:
@@ -204,6 +222,19 @@ def dictpath(self, d, path):
                     self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d)))
         return d
 
+    def flatten_qmp_object(self, obj, output=None, basestr=''):
+        if output is None:
+            output = dict()
+        if isinstance(obj, list):
+            for i in range(len(obj)):
+                self.flatten_qmp_object(obj[i], output, basestr + str(i) + '.')
+        elif isinstance(obj, dict):
+            for key in obj:
+                self.flatten_qmp_object(obj[key], output, basestr + key + '.')
+        else:
+            output[basestr[:-1]] = obj # Strip trailing '.'
+        return output
+
     def assert_qmp_absent(self, d, path):
         try:
             result = self.dictpath(d, path)
@@ -234,6 +265,13 @@ def check_equal_or_none(a, b):
         self.assertTrue(False, "Cannot find %s %s in result:\n%s" % \
                 (node_name, file_name, result))
 
+    def assert_json_filename_equal(self, json_filename, reference):
+        '''Asserts that the given filename is a json: filename and that its
+           content is equal to the given reference object'''
+        self.assertEqual(json_filename[:5], 'json:')
+        self.assertEqual(self.flatten_qmp_object(json.loads(json_filename[5:])),
+                         self.flatten_qmp_object(reference))
+
     def cancel_and_wait(self, drive='drive0', force=False, resume=False):
         '''Cancel a block job and wait for it to finish, returning the event'''
         result = self.vm.qmp('block-job-cancel', device=drive, force=force)
@@ -310,14 +348,19 @@ def verify_platform(supported_oses=['linux']):
     if True not in [sys.platform.startswith(x) for x in supported_oses]:
         notrun('not suitable for this OS: %s' % sys.platform)
 
+def supports_quorum():
+    return 'quorum' in qemu_img_pipe('--help')
+
 def verify_quorum():
     '''Skip test suite if quorum support is not available'''
-    if 'quorum' not in qemu_img_pipe('--help'):
+    if not supports_quorum():
         notrun('quorum support missing')
 
 def main(supported_fmts=[], supported_oses=['linux']):
     '''Run tests'''
 
+    global debug
+
     # We are using TEST_DIR and QEMU_DEFAULT_MACHINE as proxies to
     # indicate that we're not being run via "check". There may be
     # other things set up by "check" that individual test cases rely
diff --git a/tests/qemu-iotests/socket_scm_helper.c b/tests/qemu-iotests/socket_scm_helper.c
index 80cadf43bc..eb76d31aa9 100644
--- a/tests/qemu-iotests/socket_scm_helper.c
+++ b/tests/qemu-iotests/socket_scm_helper.c
@@ -60,7 +60,7 @@ static int send_fd(int fd, int fd_to_send)
 }
 
 /* Convert string to fd number. */
-static int get_fd_num(const char *fd_str)
+static int get_fd_num(const char *fd_str, bool silent)
 {
     int sock;
     char *err;
@@ -68,12 +68,16 @@ static int get_fd_num(const char *fd_str)
     errno = 0;
     sock = strtol(fd_str, &err, 10);
     if (errno) {
-        fprintf(stderr, "Failed in strtol for socket fd, reason: %s\n",
-                strerror(errno));
+        if (!silent) {
+            fprintf(stderr, "Failed in strtol for socket fd, reason: %s\n",
+                    strerror(errno));
+        }
         return -1;
     }
     if (!*fd_str || *err || sock < 0) {
-        fprintf(stderr, "bad numerical value for socket fd '%s'\n", fd_str);
+        if (!silent) {
+            fprintf(stderr, "bad numerical value for socket fd '%s'\n", fd_str);
+        }
         return -1;
     }
 
@@ -104,18 +108,21 @@ int main(int argc, char **argv, char **envp)
     }
 
 
-    sock = get_fd_num(argv[1]);
+    sock = get_fd_num(argv[1], false);
     if (sock < 0) {
         return EXIT_FAILURE;
     }
 
-    /* Now only open a file in readonly mode for test purpose. If more precise
-       control is needed, use python script in file operation, which is
-       supposed to fork and exec this program. */
-    fd = open(argv[2], O_RDONLY);
+    fd = get_fd_num(argv[2], true);
     if (fd < 0) {
-        fprintf(stderr, "Failed to open file '%s'\n", argv[2]);
-        return EXIT_FAILURE;
+        /* Now only open a file in readonly mode for test purpose. If more
+           precise control is needed, use python script in file operation, which
+           is supposed to fork and exec this program. */
+        fd = open(argv[2], O_RDONLY);
+        if (fd < 0) {
+            fprintf(stderr, "Failed to open file '%s'\n", argv[2]);
+            return EXIT_FAILURE;
+        }
     }
 
     ret = send_fd(sock, fd);
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
index 76360a0cf5..2afa09d859 100644
--- a/tests/qht-bench.c
+++ b/tests/qht-bench.c
@@ -193,7 +193,7 @@ static void *thread_func(void *p)
     rcu_register_thread();
 
     atomic_inc(&n_ready_threads);
-    while (!atomic_mb_read(&test_start)) {
+    while (!atomic_read(&test_start)) {
         cpu_relax();
     }
 
@@ -393,11 +393,11 @@ static void run_test(void)
     while (atomic_read(&n_ready_threads) != n_rw_threads + n_rz_threads) {
         cpu_relax();
     }
-    atomic_mb_set(&test_start, true);
+    atomic_set(&test_start, true);
     do {
         remaining = sleep(duration);
     } while (remaining);
-    atomic_mb_set(&test_stop, true);
+    atomic_set(&test_stop, true);
 
     for (i = 0; i < n_rw_threads; i++) {
         qemu_thread_join(&rw_threads[i]);
diff --git a/tests/qom-test.c b/tests/qom-test.c
index 23493a2b0a..d48f890e84 100644
--- a/tests/qom-test.c
+++ b/tests/qom-test.c
@@ -115,7 +115,7 @@ static void add_machine_test_cases(void)
     const QListEntry *p;
     QObject *qobj;
     QString *qstr;
-    const char *mname, *path;
+    const char *mname;
 
     qtest_start("-machine none");
     response = qmp("{ 'execute': 'query-machines' }");
@@ -132,8 +132,9 @@ static void add_machine_test_cases(void)
         g_assert(qstr);
         mname = qstring_get_str(qstr);
         if (!is_blacklisted(arch, mname)) {
-            path = g_strdup_printf("qom/%s", mname);
+            char *path = g_strdup_printf("qom/%s", mname);
             qtest_add_data_func(path, g_strdup(mname), test_machine);
+            g_free(path);
         }
     }
 
diff --git a/tests/rtas-test.c b/tests/rtas-test.c
new file mode 100644
index 0000000000..276c87ef84
--- /dev/null
+++ b/tests/rtas-test.c
@@ -0,0 +1,40 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "libqtest.h"
+
+#include "libqos/libqos-spapr.h"
+#include "libqos/rtas.h"
+
+static void test_rtas_get_time_of_day(void)
+{
+    QOSState *qs;
+    struct tm tm;
+    uint32_t ns;
+    uint64_t ret;
+    time_t t1, t2;
+
+    qs = qtest_spapr_boot("-machine pseries");
+
+    t1 = time(NULL);
+    ret = qrtas_get_time_of_day(qs->alloc, &tm, &ns);
+    g_assert_cmpint(ret, ==, 0);
+    t2 = mktimegm(&tm);
+    g_assert(t2 - t1 < 5); /* 5 sec max to run the test */
+
+    qtest_shutdown(qs);
+}
+
+int main(int argc, char *argv[])
+{
+    const char *arch = qtest_get_arch();
+
+    g_test_init(&argc, &argv, NULL);
+
+    if (strcmp(arch, "ppc64")) {
+        g_printerr("RTAS requires ppc64-softmmu/qemu-system-ppc64\n");
+        exit(EXIT_FAILURE);
+    }
+    qtest_add_func("rtas/get-time-of-day", test_rtas_get_time_of_day);
+
+    return g_test_run();
+}
diff --git a/tests/rtl8139-test.c b/tests/rtl8139-test.c
index 13de7eeafd..7de7dc45ae 100644
--- a/tests/rtl8139-test.c
+++ b/tests/rtl8139-test.c
@@ -22,7 +22,7 @@ static void nop(void)
 
 static QPCIBus *pcibus;
 static QPCIDevice *dev;
-static void *dev_base;
+static QPCIBar dev_bar;
 
 static void save_fn(QPCIDevice *dev, int devfn, void *data)
 {
@@ -35,7 +35,7 @@ static QPCIDevice *get_device(void)
 {
     QPCIDevice *dev;
 
-    pcibus = qpci_init_pc();
+    pcibus = qpci_init_pc(NULL);
     qpci_device_foreach(pcibus, 0x10ec, 0x8139, save_fn, &dev);
     g_assert(dev != NULL);
 
@@ -45,14 +45,14 @@ static QPCIDevice *get_device(void)
 #define PORT(name, len, val) \
 static unsigned __attribute__((unused)) in_##name(void) \
 { \
-    unsigned res = qpci_io_read##len(dev, dev_base+(val)); \
+    unsigned res = qpci_io_read##len(dev, dev_bar, (val));     \
     g_test_message("*%s -> %x\n", #name, res); \
     return res; \
 } \
 static void out_##name(unsigned v) \
 { \
     g_test_message("%x -> *%s\n", v, #name); \
-    qpci_io_write##len(dev, dev_base+(val), v); \
+    qpci_io_write##len(dev, dev_bar, (val), v);        \
 }
 
 PORT(Timer, l, 0x48)
@@ -186,9 +186,7 @@ static void test_init(void)
 
     dev = get_device();
 
-    dev_base = qpci_iomap(dev, 0, &barsize);
-
-    g_assert(dev_base != NULL);
+    dev_bar = qpci_iomap(dev, 0, &barsize);
 
     qpci_device_enable(dev);
 
diff --git a/tests/spapr-phb-test.c b/tests/spapr-phb-test.c
index 21004a76ec..d3522ea093 100644
--- a/tests/spapr-phb-test.c
+++ b/tests/spapr-phb-test.c
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
     qtest_add_func("/spapr-phb/device", test_phb_device);
 
-    qtest_start("-device " TYPE_SPAPR_PCI_HOST_BRIDGE ",index=100");
+    qtest_start("-device " TYPE_SPAPR_PCI_HOST_BRIDGE ",index=30");
 
     ret = g_test_run();
 
diff --git a/tests/tcg/README b/tests/tcg/README
new file mode 100644
index 0000000000..5dcfb4852b
--- /dev/null
+++ b/tests/tcg/README
@@ -0,0 +1,76 @@
+This directory contains various interesting programs for
+regression testing.
+
+The target "make test" runs the programs and, if applicable,
+runs "diff" to detect mismatches between output on the host and
+output on QEMU.
+
+i386
+====
+
+test-i386
+---------
+
+This program executes most of the 16 bit and 32 bit x86 instructions and
+generates a text output, for comparison with the output obtained with
+a real CPU or another emulator.
+
+The Linux system call modify_ldt() is used to create x86 selectors
+to test some 16 bit addressing and 32 bit with segmentation cases.
+
+The Linux system call vm86() is used to test vm86 emulation.
+
+Various exceptions are raised to test most of the x86 user space
+exception reporting.
+
+linux-test
+----------
+
+This program tests various Linux system calls. It is used to verify
+that the system call parameters are correctly converted between target
+and host CPUs.
+
+test-i386-fprem
+---------------
+
+runcom
+------
+
+test-mmap
+---------
+
+sha1
+----
+
+hello-i386
+----------
+
+
+ARM
+===
+
+hello-arm
+---------
+
+test-arm-iwmmxt
+---------------
+
+MIPS
+====
+
+hello-mips
+----------
+
+hello-mipsel
+------------
+
+CRIS
+====
+The testsuite for CRIS is in tests/tcg/cris.  You can run it
+with "make test-cris".
+
+LM32
+====
+The testsuite for LM32 is in tests/tcg/cris.  You can run it
+with "make test-lm32".
+
diff --git a/tests/tcg/cris/Makefile b/tests/tcg/cris/Makefile
index d34bfd8f7a..6b3dba446c 100644
--- a/tests/tcg/cris/Makefile
+++ b/tests/tcg/cris/Makefile
@@ -23,6 +23,7 @@ SYS        = sys.o
 TESTCASES += check_abs.tst
 TESTCASES += check_addc.tst
 TESTCASES += check_addcm.tst
+TESTCASES += check_addcv17.tst
 TESTCASES += check_addo.tst
 TESTCASES += check_addoq.tst
 TESTCASES += check_addi.tst
@@ -108,14 +109,12 @@ TESTCASES += check_stat4.ctst
 TESTCASES += check_openpf1.ctst
 TESTCASES += check_openpf2.ctst
 TESTCASES += check_openpf3.ctst
-TESTCASES += check_openpf4.ctst
 TESTCASES += check_openpf5.ctst
 TESTCASES += check_mapbrk.ctst
 TESTCASES += check_mmap1.ctst
 TESTCASES += check_mmap2.ctst
 TESTCASES += check_mmap3.ctst
 TESTCASES += check_sigalrm.ctst
-TESTCASES += check_time1.ctst
 TESTCASES += check_time2.ctst
 TESTCASES += check_settls1.ctst
 
@@ -136,13 +135,27 @@ all: build
 %.ctst: %.o
 	$(CC) $(CFLAGS) $(LDLIBS) $< -o $@
 
+
+sysv10.o: sys.c
+	$(CC) $(CFLAGS) -mcpu=v10 -c $< -o $@
+
+crtv10.o: crt.s
+	$(AS) $(ASFLAGS) -mcpu=v10 -c $< -o $@
+
+check_addcv17.tst: ASFLAGS += -mcpu=v10
+check_addcv17.tst: CRT := crtv10.o
+check_addcv17.tst: SYS := sysv10.o
+check_addcv17.tst: crtv10.o sysv10.o
+
 build: $(CRT) $(SYS) $(TESTCASES)
 
 check: $(CRT) $(SYS) $(TESTCASES)
 	@echo -e "\nQEMU simulator."
 	for case in $(TESTCASES); do \
 		echo -n "$$case "; \
-		$(SIM) ./$$case; \
+		SIMARGS=; \
+		case $$case in *v17*) SIMARGS="-cpu crisv17";; esac; \
+		$(SIM) $$SIMARGS ./$$case; \
 	done
 check-g: $(CRT) $(SYS) $(TESTCASES)
 	@echo -e "\nGDB simulator."
@@ -152,4 +165,4 @@ check-g: $(CRT) $(SYS) $(TESTCASES)
 	done
 
 clean:
-	$(RM) -fr $(TESTCASES) $(CRT) $(SYS)
+	$(RM) -fr $(TESTCASES) *.o
diff --git a/tests/tcg/cris/check_abs.c b/tests/tcg/cris/check_abs.c
index 9770a8d9ef..08b67b6ef0 100644
--- a/tests/tcg/cris/check_abs.c
+++ b/tests/tcg/cris/check_abs.c
@@ -4,14 +4,14 @@
 #include "sys.h"
 #include "crisutils.h"
 
-static inline int cris_abs(int n)
+static always_inline int cris_abs(int n)
 {
 	int r;
 	asm ("abs\t%1, %0\n" : "=r" (r) : "r" (n));
 	return r;
 }
 
-static inline void
+static always_inline void
 verify_abs(int val, int res,
 	   const int n, const int z, const int v, const int c)
 {
diff --git a/tests/tcg/cris/check_addc.c b/tests/tcg/cris/check_addc.c
index facd1bea2d..fc3fb1faa8 100644
--- a/tests/tcg/cris/check_addc.c
+++ b/tests/tcg/cris/check_addc.c
@@ -4,7 +4,7 @@
 #include "sys.h"
 #include "crisutils.h"
 
-static inline int cris_addc(int a, const int b)
+static always_inline int cris_addc(int a, const int b)
 {
 	asm ("addc\t%1, %0\n" : "+r" (a) : "r" (b));
 	return a;
diff --git a/tests/tcg/cris/check_addcm.c b/tests/tcg/cris/check_addcm.c
index 7928bc9999..b355ba164f 100644
--- a/tests/tcg/cris/check_addcm.c
+++ b/tests/tcg/cris/check_addcm.c
@@ -5,14 +5,14 @@
 #include "crisutils.h"
 
 /* need to avoid acr as source here.  */
-static inline int cris_addc_m(int a, const int *b)
+static always_inline int cris_addc_m(int a, const int *b)
 {
 	asm volatile ("addc [%1], %0\n" : "+r" (a) : "r" (b));
 	return a;
 }
 
 /* 'b' is a crisv32 constrain to avoid postinc with $acr.  */
-static inline int cris_addc_pi_m(int a, int **b)
+static always_inline int cris_addc_pi_m(int a, int **b)
 {
 	asm volatile ("addc [%1+], %0\n" : "+r" (a), "+b" (*b));
 	return a;
diff --git a/tests/tcg/cris/check_addcv17.s b/tests/tcg/cris/check_addcv17.s
new file mode 100644
index 0000000000..52ef7a9716
--- /dev/null
+++ b/tests/tcg/cris/check_addcv17.s
@@ -0,0 +1,65 @@
+# mach:  crisv17
+
+ .include "testutils.inc"
+
+ .macro addc Rs Rd inc=0
+# Create the instruction manually since there is no assembler support yet
+ .word (\Rd << 12) | \Rs | (\inc << 10) | 0x09a0
+ .endm
+
+ start
+
+ .data
+mem1:
+ .dword 0x0
+mem2:
+ .dword 0x12345678
+
+ .text
+ move.d mem1,r4
+ clearf nzvc
+ addc 4 3
+ test_cc 0 1 0 0
+ checkr3 0
+
+ move.d mem1,r4
+ clearf nzvc
+ ax
+ addc 4 3
+ test_cc 0 0 0 0
+ checkr3 0
+
+ move.d mem1,r4
+ clearf nzvc
+ setf c
+ addc 4 3
+ test_cc 0 0 0 0
+ checkr3 1
+
+ move.d mem2,r4
+ moveq 2, r3
+ clearf nzvc
+ setf c
+ addc 4 3
+ test_cc 0 0 0 0
+ checkr3 1234567b
+
+ move.d mem2,r5
+ clearf nzvc
+ cmp.d r4,r5
+ test_cc 0 1 0 0
+
+ move.d mem2,r4
+ moveq 2, r3
+ clearf nzvc
+ addc 4 3 inc=1
+ test_cc 0 0 0 0
+ checkr3 1234567a
+
+ move.d mem2,r5
+ clearf nzvc
+ addq 4,r5
+ cmp.d r4,r5
+ test_cc 0 1 0 0
+
+ quit
diff --git a/tests/tcg/cris/check_bound.c b/tests/tcg/cris/check_bound.c
index e8831754ec..d956ab9ade 100644
--- a/tests/tcg/cris/check_bound.c
+++ b/tests/tcg/cris/check_bound.c
@@ -4,21 +4,21 @@
 #include "sys.h"
 #include "crisutils.h"
 
-static inline int cris_bound_b(int v, int b)
+static always_inline int cris_bound_b(int v, int b)
 {
 	int r = v;
 	asm ("bound.b\t%1, %0\n" : "+r" (r) : "ri" (b));
 	return r;
 }
 
-static inline int cris_bound_w(int v, int b)
+static always_inline int cris_bound_w(int v, int b)
 {
 	int r = v;
 	asm ("bound.w\t%1, %0\n" : "+r" (r) : "ri" (b));
 	return r;
 }
 
-static inline int cris_bound_d(int v, int b)
+static always_inline int cris_bound_d(int v, int b)
 {
 	int r = v;
 	asm ("bound.d\t%1, %0\n" : "+r" (r) : "ri" (b));
diff --git a/tests/tcg/cris/check_ftag.c b/tests/tcg/cris/check_ftag.c
index 908773a38a..aaa5c97115 100644
--- a/tests/tcg/cris/check_ftag.c
+++ b/tests/tcg/cris/check_ftag.c
@@ -4,22 +4,22 @@
 #include "sys.h"
 #include "crisutils.h"
 
-static inline void cris_ftag_i(unsigned int x)
+static always_inline void cris_ftag_i(unsigned int x)
 {
 	register unsigned int v asm("$r10") = x;
 	asm ("ftagi\t[%0]\n" : : "r" (v) );
 }
-static inline void cris_ftag_d(unsigned int x)
+static always_inline void cris_ftag_d(unsigned int x)
 {
 	register unsigned int v asm("$r10") = x;
 	asm ("ftagd\t[%0]\n" : : "r" (v) );
 }
-static inline void cris_fidx_i(unsigned int x)
+static always_inline void cris_fidx_i(unsigned int x)
 {
 	register unsigned int v asm("$r10") = x;
 	asm ("fidxi\t[%0]\n" : : "r" (v) );
 }
-static inline void cris_fidx_d(unsigned int x)
+static always_inline void cris_fidx_d(unsigned int x)
 {
 	register unsigned int v asm("$r10") = x;
 	asm ("fidxd\t[%0]\n" : : "r" (v) );
diff --git a/tests/tcg/cris/check_int64.c b/tests/tcg/cris/check_int64.c
index fc600176e2..69caec1bb2 100644
--- a/tests/tcg/cris/check_int64.c
+++ b/tests/tcg/cris/check_int64.c
@@ -5,12 +5,12 @@
 #include "crisutils.h"
 
 
-static inline int64_t add64(const int64_t a, const int64_t b)
+static always_inline int64_t add64(const int64_t a, const int64_t b)
 {
 	return a + b;
 }
 
-static inline int64_t sub64(const int64_t a, const int64_t b)
+static always_inline int64_t sub64(const int64_t a, const int64_t b)
 {
 	return a - b;
 }
diff --git a/tests/tcg/cris/check_lz.c b/tests/tcg/cris/check_lz.c
index 69c2e6d4ec..bf051a6b55 100644
--- a/tests/tcg/cris/check_lz.c
+++ b/tests/tcg/cris/check_lz.c
@@ -3,7 +3,7 @@
 #include <stdint.h>
 #include "sys.h"
 
-static inline int cris_lz(int x)
+static always_inline int cris_lz(int x)
 {
 	int r;
 	asm ("lz\t%1, %0\n" : "=r" (r) : "r" (x));
diff --git a/tests/tcg/cris/check_openpf4.c b/tests/tcg/cris/check_openpf4.c
deleted file mode 100644
index 8bbee41a64..0000000000
--- a/tests/tcg/cris/check_openpf4.c
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Basic file operations, now *with* sysroot.
-#sim: --sysroot=@exedir@
-*/
-#define PREFIX "/"
-#include "check_openpf3.c"
diff --git a/tests/tcg/cris/check_swap.c b/tests/tcg/cris/check_swap.c
index f851cbcef1..9a68c1e5d7 100644
--- a/tests/tcg/cris/check_swap.c
+++ b/tests/tcg/cris/check_swap.c
@@ -9,7 +9,7 @@
 #define B 2
 #define R 1
 
-static inline int cris_swap(const int mode, int x)
+static always_inline int cris_swap(const int mode, int x)
 {
 	switch (mode)
 	{
diff --git a/tests/tcg/cris/check_time1.c b/tests/tcg/cris/check_time1.c
deleted file mode 100644
index 3fcf0e1535..0000000000
--- a/tests/tcg/cris/check_time1.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Basic time functionality test: check that milliseconds are
-   incremented for each syscall (does not work on host).  */
-#include <stdio.h>
-#include <time.h>
-#include <sys/time.h>
-#include <string.h>
-#include <stdlib.h>
-
-void err (const char *s)
-{
-  perror (s);
-  abort ();
-}
-
-int
-main (void)
-{
-  struct timeval t_m = {0, 0};
-  struct timezone t_z = {0, 0};
-  struct timeval t_m1 = {0, 0};
-  int i;
-
-  if (gettimeofday (&t_m, &t_z) != 0)
-    err ("gettimeofday");
-
-  for (i = 1; i < 10000; i++)
-    if (gettimeofday (&t_m1, NULL) != 0)
-      err ("gettimeofday 1");
-    else
-      if (t_m1.tv_sec * 1000000 + t_m1.tv_usec
-	  != (t_m.tv_sec * 1000000 + t_m.tv_usec + i * 1000))
-	{
-	  fprintf (stderr, "t0 (%ld, %ld), i %d, t1 (%ld, %ld)\n",
-		   t_m.tv_sec, t_m.tv_usec, i, t_m1.tv_sec, t_m1.tv_usec);
-	  abort ();
-	}
-
-  if (time (NULL) != t_m1.tv_sec)
-    {
-      fprintf (stderr, "time != gettod\n");
-      abort ();
-    }
-
-  printf ("pass\n");
-  exit (0);
-}
diff --git a/tests/tcg/cris/crisutils.h b/tests/tcg/cris/crisutils.h
index 3456b9d50d..bbbe6c5540 100644
--- a/tests/tcg/cris/crisutils.h
+++ b/tests/tcg/cris/crisutils.h
@@ -13,57 +13,57 @@ void _err(void) {
 	_fail(tst_cc_loc);
 }
 
-static inline void cris_tst_cc_n1(void)
+static always_inline void cris_tst_cc_n1(void)
 {
 	asm volatile ("bpl _err\n"
 		      "nop\n");
 }
-static inline void cris_tst_cc_n0(void)
+static always_inline void cris_tst_cc_n0(void)
 {
 	asm volatile ("bmi _err\n"
 		      "nop\n");
 }
 
-static inline void cris_tst_cc_z1(void)
+static always_inline void cris_tst_cc_z1(void)
 {
 	asm volatile ("bne _err\n"
 		      "nop\n");
 }
-static inline void cris_tst_cc_z0(void)
+static always_inline void cris_tst_cc_z0(void)
 {
 	asm volatile ("beq _err\n"
 		      "nop\n");
 }
-static inline void cris_tst_cc_v1(void)
+static always_inline void cris_tst_cc_v1(void)
 {
 	asm volatile ("bvc _err\n"
 		      "nop\n");
 }
-static inline void cris_tst_cc_v0(void)
+static always_inline void cris_tst_cc_v0(void)
 {
 	asm volatile ("bvs _err\n"
 		      "nop\n");
 }
 
-static inline void cris_tst_cc_c1(void)
+static always_inline void cris_tst_cc_c1(void)
 {
 	asm volatile ("bcc _err\n"
 		      "nop\n");
 }
-static inline void cris_tst_cc_c0(void)
+static always_inline void cris_tst_cc_c0(void)
 {
 	asm volatile ("bcs _err\n"
 		      "nop\n");
 }
 
-static inline void cris_tst_mov_cc(int n, int z)
+static always_inline void cris_tst_mov_cc(int n, int z)
 {
 	if (n) cris_tst_cc_n1(); else cris_tst_cc_n0();
 	if (z) cris_tst_cc_z1(); else cris_tst_cc_z0();
 	asm volatile ("" : : "g" (_err));
 }
 
-static inline void cris_tst_cc(const int n, const int z,
+static always_inline void cris_tst_cc(const int n, const int z,
 			       const int v, const int c)
 {
 	if (n) cris_tst_cc_n1(); else cris_tst_cc_n0();
diff --git a/tests/tcg/cris/sys.c b/tests/tcg/cris/sys.c
index 551c5dd7cb..21f08c0747 100644
--- a/tests/tcg/cris/sys.c
+++ b/tests/tcg/cris/sys.c
@@ -33,19 +33,27 @@ void *memset (void *s, int c, size_t n) {
 }
 
 void exit (int status) {
-	asm volatile ("moveq 1, $r9\n" /* NR_exit.  */
-		      "break 13\n");
+	register unsigned int callno asm ("r9") = 1; /* NR_exit */
+
+	asm volatile ("break 13\n"
+		      :
+		      : "r" (callno)
+		      : "memory" );
 	while(1)
 		;
 }
 
 ssize_t write (int fd, const void *buf, size_t count) {
-	int r;
-	asm ("move.d %0, $r10\n"
-	     "move.d %1, $r11\n"
-	     "move.d %2, $r12\n"
-	     "moveq 4, $r9\n" /* NR_write.  */
-	     "break 13\n" : : "r" (fd), "r" (buf), "r" (count) : "memory");
-	asm ("move.d $r10, %0\n" : "=r" (r));
+	register unsigned int callno asm ("r9") = 4; /* NR_write */
+	register unsigned int r10 asm ("r10") = fd;
+	register const void *r11 asm ("r11") = buf;
+	register size_t r12 asm ("r12") = count;
+	register unsigned int r asm ("r10");
+
+	asm volatile ("break 13\n"
+	     : "=r" (r)
+	     : "r" (callno), "0" (r10), "r" (r11), "r" (r12)
+	     : "memory");
+
 	return r;
 }
diff --git a/tests/tcg/cris/sys.h b/tests/tcg/cris/sys.h
index c5f88e1a29..3dd47bb673 100644
--- a/tests/tcg/cris/sys.h
+++ b/tests/tcg/cris/sys.h
@@ -3,6 +3,8 @@
 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
 
+#define always_inline inline __attribute__((always_inline))
+
 #define CURRENT_LOCATION __FILE__ ":" TOSTRING(__LINE__)
 
 #define err()                         \
diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index b05572b734..0f7b943b0c 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -2250,14 +2250,14 @@ SSE_OP(a ## sd);
 
 #define SSE_COMI(op, field)\
 {\
-    unsigned int eflags;\
+    unsigned long eflags;\
     XMMReg a, b;\
     a.field[0] = a1;\
     b.field[0] = b1;\
     asm volatile (#op " %2, %1\n"\
         "pushf\n"\
         "pop %0\n"\
-        : "=m" (eflags)\
+        : "=rm" (eflags)\
         : "x" (a.dq), "x" (b.dq));\
     printf("%-9s: a=%f b=%f cc=%04x\n",\
            #op, a1, b1,\
diff --git a/tests/tco-test.c b/tests/tco-test.c
index 0d13aa8d63..ef02ec5903 100644
--- a/tests/tco-test.c
+++ b/tests/tco-test.c
@@ -41,7 +41,7 @@ typedef struct {
     const char *args;
     bool noreboot;
     QPCIDevice *dev;
-    void *tco_io_base;
+    QPCIBar tco_io_bar;
 } TestData;
 
 static void test_init(TestData *d)
@@ -57,7 +57,7 @@ static void test_init(TestData *d)
     qtest_irq_intercept_in(qs, "ioapic");
     g_free(s);
 
-    bus = qpci_init_pc();
+    bus = qpci_init_pc(NULL);
     d->dev = qpci_device_find(bus, QPCI_DEVFN(0x1f, 0x00));
     g_assert(d->dev != NULL);
 
@@ -70,42 +70,42 @@ static void test_init(TestData *d)
     /* set Root Complex BAR */
     qpci_config_writel(d->dev, ICH9_LPC_RCBA, RCBA_BASE_ADDR | 0x1);
 
-    d->tco_io_base = (void *)((uintptr_t)PM_IO_BASE_ADDR + 0x60);
+    d->tco_io_bar = qpci_legacy_iomap(d->dev, PM_IO_BASE_ADDR + 0x60);
 }
 
 static void stop_tco(const TestData *d)
 {
     uint32_t val;
 
-    val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT);
+    val = qpci_io_readw(d->dev, d->tco_io_bar, TCO1_CNT);
     val |= TCO_TMR_HLT;
-    qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO1_CNT, val);
 }
 
 static void start_tco(const TestData *d)
 {
     uint32_t val;
 
-    val = qpci_io_readw(d->dev, d->tco_io_base + TCO1_CNT);
+    val = qpci_io_readw(d->dev, d->tco_io_bar, TCO1_CNT);
     val &= ~TCO_TMR_HLT;
-    qpci_io_writew(d->dev, d->tco_io_base + TCO1_CNT, val);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO1_CNT, val);
 }
 
 static void load_tco(const TestData *d)
 {
-    qpci_io_writew(d->dev, d->tco_io_base + TCO_RLD, 4);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO_RLD, 4);
 }
 
 static void set_tco_timeout(const TestData *d, uint16_t ticks)
 {
-    qpci_io_writew(d->dev, d->tco_io_base + TCO_TMR, ticks);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO_TMR, ticks);
 }
 
 static void clear_tco_status(const TestData *d)
 {
-    qpci_io_writew(d->dev, d->tco_io_base + TCO1_STS, 0x0008);
-    qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0002);
-    qpci_io_writew(d->dev, d->tco_io_base + TCO2_STS, 0x0004);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO1_STS, 0x0008);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO2_STS, 0x0002);
+    qpci_io_writew(d->dev, d->tco_io_bar, TCO2_STS, 0x0004);
 }
 
 static void reset_on_second_timeout(bool enable)
@@ -128,25 +128,25 @@ static void test_tco_defaults(void)
     d.args = NULL;
     d.noreboot = true;
     test_init(&d);
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD), ==,
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_RLD), ==,
                     TCO_RLD_DEFAULT);
     /* TCO_DAT_IN & TCO_DAT_OUT */
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_DAT_IN), ==,
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_DAT_IN), ==,
                     (TCO_DAT_OUT_DEFAULT << 8) | TCO_DAT_IN_DEFAULT);
     /* TCO1_STS & TCO2_STS */
-    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_STS), ==,
+    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_bar, TCO1_STS), ==,
                     (TCO2_STS_DEFAULT << 16) | TCO1_STS_DEFAULT);
     /* TCO1_CNT & TCO2_CNT */
-    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_base + TCO1_CNT), ==,
+    g_assert_cmpint(qpci_io_readl(d.dev, d.tco_io_bar, TCO1_CNT), ==,
                     (TCO2_CNT_DEFAULT << 16) | TCO1_CNT_DEFAULT);
     /* TCO_MESSAGE1 & TCO_MESSAGE2 */
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_MESSAGE1), ==,
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_MESSAGE1), ==,
                     (TCO_MESSAGE2_DEFAULT << 8) | TCO_MESSAGE1_DEFAULT);
-    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + TCO_WDCNT), ==,
+    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_bar, TCO_WDCNT), ==,
                     TCO_WDCNT_DEFAULT);
-    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_base + SW_IRQ_GEN), ==,
+    g_assert_cmpint(qpci_io_readb(d.dev, d.tco_io_bar, SW_IRQ_GEN), ==,
                     SW_IRQ_GEN_DEFAULT);
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO_TMR), ==,
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO_TMR), ==,
                     TCO_TMR_DEFAULT);
     qtest_end();
 }
@@ -171,23 +171,23 @@ static void test_tco_timeout(void)
     clock_step(ticks * TCO_TICK_NSEC);
 
     /* test first timeout */
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & TCO_TIMEOUT ? 1 : 0;
     g_assert(ret == 1);
 
     /* test clearing timeout bit */
     val |= TCO_TIMEOUT;
-    qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & TCO_TIMEOUT ? 1 : 0;
     g_assert(ret == 0);
 
     /* test second timeout */
     clock_step(ticks * TCO_TICK_NSEC);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & TCO_TIMEOUT ? 1 : 0;
     g_assert(ret == 1);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS);
     ret = val & TCO_SECOND_TO_STS ? 1 : 0;
     g_assert(ret == 1);
 
@@ -214,13 +214,13 @@ static void test_tco_max_timeout(void)
     start_tco(&d);
     clock_step(((ticks & TCO_TMR_MASK) - 1) * TCO_TICK_NSEC);
 
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO_RLD);
     g_assert_cmpint(val & TCO_RLD_MASK, ==, 1);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & TCO_TIMEOUT ? 1 : 0;
     g_assert(ret == 0);
     clock_step(TCO_TICK_NSEC);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & TCO_TIMEOUT ? 1 : 0;
     g_assert(ret == 1);
 
@@ -358,11 +358,11 @@ static void test_tco_ticks_counter(void)
     start_tco(&d);
 
     do {
-        rld = qpci_io_readw(d.dev, d.tco_io_base + TCO_RLD) & TCO_RLD_MASK;
+        rld = qpci_io_readw(d.dev, d.tco_io_bar, TCO_RLD) & TCO_RLD_MASK;
         g_assert_cmpint(rld, ==, ticks);
         clock_step(TCO_TICK_NSEC);
         ticks--;
-    } while (!(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS) & TCO_TIMEOUT));
+    } while (!(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS) & TCO_TIMEOUT));
 
     stop_tco(&d);
     qtest_end();
@@ -378,10 +378,10 @@ static void test_tco1_control_bits(void)
     test_init(&d);
 
     val = TCO_LOCK;
-    qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val);
+    qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
     val &= ~TCO_LOCK;
-    qpci_io_writew(d.dev, d.tco_io_base + TCO1_CNT, val);
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_CNT), ==,
+    qpci_io_writew(d.dev, d.tco_io_bar, TCO1_CNT, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_CNT), ==,
                     TCO_LOCK);
     qtest_end();
 }
@@ -405,13 +405,13 @@ static void test_tco1_status_bits(void)
     start_tco(&d);
     clock_step(ticks * TCO_TICK_NSEC);
 
-    qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_IN, 0);
-    qpci_io_writeb(d.dev, d.tco_io_base + TCO_DAT_OUT, 0);
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS);
+    qpci_io_writeb(d.dev, d.tco_io_bar, TCO_DAT_IN, 0);
+    qpci_io_writeb(d.dev, d.tco_io_bar, TCO_DAT_OUT, 0);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS);
     ret = val & (TCO_TIMEOUT | SW_TCO_SMI | TCO_INT_STS) ? 1 : 0;
     g_assert(ret == 1);
-    qpci_io_writew(d.dev, d.tco_io_base + TCO1_STS, val);
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO1_STS), ==, 0);
+    qpci_io_writew(d.dev, d.tco_io_bar, TCO1_STS, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO1_STS), ==, 0);
     qtest_end();
 }
 
@@ -434,11 +434,11 @@ static void test_tco2_status_bits(void)
     start_tco(&d);
     clock_step(ticks * TCO_TICK_NSEC * 2);
 
-    val = qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS);
+    val = qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS);
     ret = val & (TCO_SECOND_TO_STS | TCO_BOOT_STS) ? 1 : 0;
     g_assert(ret == 1);
-    qpci_io_writew(d.dev, d.tco_io_base + TCO2_STS, val);
-    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_base + TCO2_STS), ==, 0);
+    qpci_io_writew(d.dev, d.tco_io_bar, TCO2_STS, val);
+    g_assert_cmpint(qpci_io_readw(d.dev, d.tco_io_bar, TCO2_STS), ==, 0);
     qtest_end();
 }
 
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 03aa846970..5be99f8287 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -100,6 +100,7 @@ static void event_ready_cb(EventNotifier *e)
 
 typedef struct {
     QemuMutex start_lock;
+    EventNotifier notifier;
     bool thread_acquired;
 } AcquireTestData;
 
@@ -111,6 +112,11 @@ static void *test_acquire_thread(void *opaque)
     qemu_mutex_lock(&data->start_lock);
     qemu_mutex_unlock(&data->start_lock);
 
+    /* event_notifier_set might be called either before or after
+     * the main thread's call to poll().  The test case's outcome
+     * should be the same in either case.
+     */
+    event_notifier_set(&data->notifier);
     aio_context_acquire(ctx);
     aio_context_release(ctx);
 
@@ -125,20 +131,19 @@ static void set_event_notifier(AioContext *ctx, EventNotifier *notifier,
     aio_set_event_notifier(ctx, notifier, false, handler);
 }
 
-static void dummy_notifier_read(EventNotifier *unused)
+static void dummy_notifier_read(EventNotifier *n)
 {
-    g_assert(false); /* should never be invoked */
+    event_notifier_test_and_clear(n);
 }
 
 static void test_acquire(void)
 {
     QemuThread thread;
-    EventNotifier notifier;
     AcquireTestData data;
 
     /* Dummy event notifier ensures aio_poll() will block */
-    event_notifier_init(&notifier, false);
-    set_event_notifier(ctx, &notifier, dummy_notifier_read);
+    event_notifier_init(&data.notifier, false);
+    set_event_notifier(ctx, &data.notifier, dummy_notifier_read);
     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
 
     qemu_mutex_init(&data.start_lock);
@@ -152,12 +157,13 @@ static void test_acquire(void)
     /* Block in aio_poll(), let other thread kick us and acquire context */
     aio_context_acquire(ctx);
     qemu_mutex_unlock(&data.start_lock); /* let the thread run */
-    g_assert(!aio_poll(ctx, true));
+    g_assert(aio_poll(ctx, true));
+    g_assert(!data.thread_acquired);
     aio_context_release(ctx);
 
     qemu_thread_join(&thread);
-    set_event_notifier(ctx, &notifier, NULL);
-    event_notifier_cleanup(&notifier);
+    set_event_notifier(ctx, &data.notifier, NULL);
+    event_notifier_cleanup(&data.notifier);
 
     g_assert(data.thread_acquired);
 }
diff --git a/tests/test-arm-mptimer.c b/tests/test-arm-mptimer.c
new file mode 100644
index 0000000000..cb8f2df914
--- /dev/null
+++ b/tests/test-arm-mptimer.c
@@ -0,0 +1,1105 @@
+/*
+ * QTest testcase for the ARM MPTimer
+ *
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "libqtest.h"
+
+#define TIMER_BLOCK_SCALE(s)    ((((s) & 0xff) + 1) * 10)
+
+#define TIMER_BLOCK_STEP(scaler, steps_nb) \
+    clock_step(TIMER_BLOCK_SCALE(scaler) * (int64_t)(steps_nb) + 1)
+
+#define TIMER_BASE_PHYS 0x1e000600
+
+#define TIMER_LOAD      0x00
+#define TIMER_COUNTER   0x04
+#define TIMER_CONTROL   0x08
+#define TIMER_INTSTAT   0x0C
+
+#define TIMER_CONTROL_ENABLE        (1 << 0)
+#define TIMER_CONTROL_PERIODIC      (1 << 1)
+#define TIMER_CONTROL_IT_ENABLE     (1 << 2)
+#define TIMER_CONTROL_PRESCALER(p)  (((p) & 0xff) << 8)
+
+#define PERIODIC     1
+#define ONESHOT      0
+#define NOSCALE      0
+
+static int nonscaled = NOSCALE;
+static int scaled = 122;
+
+static void timer_load(uint32_t load)
+{
+    writel(TIMER_BASE_PHYS + TIMER_LOAD, load);
+}
+
+static void timer_start(int periodic, uint32_t scale)
+{
+    uint32_t ctl = TIMER_CONTROL_ENABLE | TIMER_CONTROL_PRESCALER(scale);
+
+    if (periodic) {
+        ctl |= TIMER_CONTROL_PERIODIC;
+    }
+
+    writel(TIMER_BASE_PHYS + TIMER_CONTROL, ctl);
+}
+
+static void timer_stop(void)
+{
+    writel(TIMER_BASE_PHYS + TIMER_CONTROL, 0);
+}
+
+static void timer_int_clr(void)
+{
+    writel(TIMER_BASE_PHYS + TIMER_INTSTAT, 1);
+}
+
+static void timer_reset(void)
+{
+    timer_stop();
+    timer_load(0);
+    timer_int_clr();
+}
+
+static uint32_t timer_get_and_clr_int_sts(void)
+{
+    uint32_t int_sts = readl(TIMER_BASE_PHYS + TIMER_INTSTAT);
+
+    if (int_sts) {
+        timer_int_clr();
+    }
+
+    return int_sts;
+}
+
+static uint32_t timer_counter(void)
+{
+    return readl(TIMER_BASE_PHYS + TIMER_COUNTER);
+}
+
+static void timer_set_counter(uint32_t value)
+{
+    writel(TIMER_BASE_PHYS + TIMER_COUNTER, value);
+}
+
+static void test_timer_oneshot(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(9999999);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 9999);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+    g_assert_cmpuint(timer_counter(), ==, 9990000);
+
+    TIMER_BLOCK_STEP(scaler, 9990000);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    TIMER_BLOCK_STEP(scaler, 9990000);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_pause(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(999999999);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 999);
+
+    g_assert_cmpuint(timer_counter(), ==, 999999000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(scaler, 9000);
+
+    g_assert_cmpuint(timer_counter(), ==, 999990000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_stop();
+
+    g_assert_cmpuint(timer_counter(), ==, 999990000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(scaler, 90000);
+
+    g_assert_cmpuint(timer_counter(), ==, 999990000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 999990000);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    TIMER_BLOCK_STEP(scaler, 999990000);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+}
+
+static void test_timer_reload(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 90000);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 90000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_load(UINT32_MAX);
+
+    TIMER_BLOCK_STEP(scaler, 90000);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 90000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_periodic(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+    int repeat = 10;
+
+    timer_reset();
+    timer_load(100);
+    timer_start(PERIODIC, scaler);
+
+    while (repeat--) {
+        clock_step(TIMER_BLOCK_SCALE(scaler) * (101 + repeat) + 1);
+
+        g_assert_cmpuint(timer_counter(), ==, 100 - repeat);
+        g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+        clock_step(TIMER_BLOCK_SCALE(scaler) * (101 - repeat) - 1);
+    }
+}
+
+static void test_timer_oneshot_to_periodic(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(10000);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1000);
+
+    g_assert_cmpuint(timer_counter(), ==, 9000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 14001);
+
+    g_assert_cmpuint(timer_counter(), ==, 5000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+}
+
+static void test_timer_periodic_to_oneshot(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(99999999);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 999);
+
+    g_assert_cmpuint(timer_counter(), ==, 99999000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 99999009);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+}
+
+static void test_timer_prescaler(void)
+{
+    timer_reset();
+    timer_load(9999999);
+    timer_start(ONESHOT, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 9999998);
+
+    g_assert_cmpuint(timer_counter(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    timer_reset();
+    timer_load(9999999);
+    timer_start(ONESHOT, 0xAB);
+
+    TIMER_BLOCK_STEP(0xAB, 9999998);
+
+    g_assert_cmpuint(timer_counter(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(0xAB, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+}
+
+static void test_timer_prescaler_on_the_fly(void)
+{
+    timer_reset();
+    timer_load(9999999);
+    timer_start(ONESHOT, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 999);
+
+    g_assert_cmpuint(timer_counter(), ==, 9999000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(ONESHOT, 0xAB);
+
+    TIMER_BLOCK_STEP(0xAB, 9000);
+
+    g_assert_cmpuint(timer_counter(), ==, 9990000);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_set_oneshot_counter_to_0(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_set_counter(0);
+
+    TIMER_BLOCK_STEP(scaler, 10);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_set_periodic_counter_to_0(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_set_counter(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - (scaler ? 0 : 1));
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_reset();
+    timer_set_counter(UINT32_MAX);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_set_counter(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_noload_oneshot(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_noload_periodic(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_zero_load_oneshot(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_zero_load_periodic(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_zero_load_oneshot_to_nonzero(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_load(999);
+
+    TIMER_BLOCK_STEP(scaler, 1001);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+}
+
+static void test_timer_zero_load_periodic_to_nonzero(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+    int i;
+
+    timer_reset();
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_load(1999999);
+
+    for (i = 1; i < 10; i++) {
+        TIMER_BLOCK_STEP(scaler, 2000001);
+
+        g_assert_cmpuint(timer_counter(), ==, 1999999 - i);
+        g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+        g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+    }
+}
+
+static void test_timer_nonzero_load_oneshot_to_zero(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_load(UINT32_MAX);
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_nonzero_load_periodic_to_zero(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_load(UINT32_MAX);
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_set_periodic_counter_on_the_fly(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX / 2);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX / 2 - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_set_counter(UINT32_MAX);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_enable_and_set_counter(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_set_counter(UINT32_MAX);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_set_counter_and_enable(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_set_counter(UINT32_MAX);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_set_counter_disabled(void)
+{
+    timer_reset();
+    timer_set_counter(999999999);
+
+    TIMER_BLOCK_STEP(NOSCALE, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 999999999);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_load_disabled(void)
+{
+    timer_reset();
+    timer_load(999999999);
+
+    TIMER_BLOCK_STEP(NOSCALE, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 999999999);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_oneshot_with_counter_0_on_start(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(999);
+    timer_set_counter(0);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_periodic_with_counter_0_on_start(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+    int i;
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_set_counter(0);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX + (scaler ? 1 : 0) - 100);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX + (scaler ? 1 : 0) - 200);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_reset();
+    timer_load(1999999);
+    timer_set_counter(0);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    for (i = 2 - (!!scaler ? 1 : 0); i < 10; i++) {
+        TIMER_BLOCK_STEP(scaler, 2000001);
+
+        g_assert_cmpuint(timer_counter(), ==, 1999999 - i);
+        g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+        g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+    }
+}
+
+static void test_periodic_counter(gconstpointer arg)
+{
+    const int test_load = 10;
+    int scaler = *((int *) arg);
+    int test_val;
+
+    timer_reset();
+    timer_load(test_load);
+    timer_start(PERIODIC, scaler);
+
+    clock_step(1);
+
+    for (test_val = 0; test_val <= test_load; test_val++) {
+        clock_step(TIMER_BLOCK_SCALE(scaler) * test_load);
+        g_assert_cmpint(timer_counter(), ==, test_val);
+    }
+}
+
+static void test_timer_set_counter_periodic_with_zero_load(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_start(PERIODIC, scaler);
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    timer_set_counter(999);
+
+    TIMER_BLOCK_STEP(scaler, 999);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_set_oneshot_load_to_0(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_set_periodic_load_to_0(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, UINT32_MAX - 100);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_load(0);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+    g_assert_cmpuint(timer_counter(), ==, 0);
+}
+
+static void test_deferred_trigger(void)
+{
+    int mode = ONESHOT;
+
+again:
+    timer_reset();
+    timer_start(mode, 255);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    timer_reset();
+    timer_load(2);
+    timer_start(mode, 255);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(mode, 255);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_set_counter(0);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    timer_reset();
+    timer_load(UINT32_MAX);
+    timer_start(mode, 255);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_load(0);
+
+    clock_step(100);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+
+    if (mode == ONESHOT) {
+        mode = PERIODIC;
+        goto again;
+    }
+}
+
+static void test_timer_zero_load_mode_switch(gconstpointer arg)
+{
+    int scaler = *((int *) arg);
+
+    timer_reset();
+    timer_load(0);
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    timer_start(ONESHOT, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    timer_start(PERIODIC, scaler);
+
+    TIMER_BLOCK_STEP(scaler, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, !!scaler);
+}
+
+static void test_timer_zero_load_prescaled_periodic_to_nonscaled_oneshot(void)
+{
+    timer_reset();
+    timer_load(0);
+    timer_start(PERIODIC, 255);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    timer_start(ONESHOT, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_zero_load_prescaled_oneshot_to_nonscaled_periodic(void)
+{
+    timer_reset();
+    timer_load(0);
+    timer_start(ONESHOT, 255);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(PERIODIC, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_zero_load_nonscaled_oneshot_to_prescaled_periodic(void)
+{
+    timer_reset();
+    timer_load(0);
+    timer_start(ONESHOT, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(PERIODIC, 255);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+static void test_timer_zero_load_nonscaled_periodic_to_prescaled_oneshot(void)
+{
+    timer_reset();
+    timer_load(0);
+    timer_start(PERIODIC, NOSCALE);
+
+    TIMER_BLOCK_STEP(NOSCALE, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    timer_start(ONESHOT, 255);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 1);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+
+    TIMER_BLOCK_STEP(255, 1);
+
+    g_assert_cmpuint(timer_counter(), ==, 0);
+    g_assert_cmpuint(timer_get_and_clr_int_sts(), ==, 0);
+}
+
+int main(int argc, char **argv)
+{
+    int *scaler = &nonscaled;
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+
+    qtest_add_func("mptimer/deferred_trigger", test_deferred_trigger);
+    qtest_add_func("mptimer/load_disabled", test_timer_load_disabled);
+    qtest_add_func("mptimer/set_counter_disabled", test_timer_set_counter_disabled);
+    qtest_add_func("mptimer/zero_load_prescaled_periodic_to_nonscaled_oneshot",
+                   test_timer_zero_load_prescaled_periodic_to_nonscaled_oneshot);
+    qtest_add_func("mptimer/zero_load_prescaled_oneshot_to_nonscaled_periodic",
+                   test_timer_zero_load_prescaled_oneshot_to_nonscaled_periodic);
+    qtest_add_func("mptimer/zero_load_nonscaled_oneshot_to_prescaled_periodic",
+                   test_timer_zero_load_nonscaled_oneshot_to_prescaled_periodic);
+    qtest_add_func("mptimer/zero_load_nonscaled_periodic_to_prescaled_oneshot",
+                   test_timer_zero_load_nonscaled_periodic_to_prescaled_oneshot);
+    qtest_add_func("mptimer/prescaler", test_timer_prescaler);
+    qtest_add_func("mptimer/prescaler_on_the_fly", test_timer_prescaler_on_the_fly);
+
+tests_with_prescaler_arg:
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/oneshot scaler=%d", *scaler),
+                        scaler, test_timer_oneshot);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/pause scaler=%d", *scaler),
+                        scaler, test_timer_pause);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/reload scaler=%d", *scaler),
+                        scaler, test_timer_reload);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/periodic scaler=%d", *scaler),
+                        scaler, test_timer_periodic);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/oneshot_to_periodic scaler=%d", *scaler),
+                        scaler, test_timer_oneshot_to_periodic);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/periodic_to_oneshot scaler=%d", *scaler),
+                        scaler, test_timer_periodic_to_oneshot);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_oneshot_counter_to_0 scaler=%d", *scaler),
+                        scaler, test_timer_set_oneshot_counter_to_0);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_periodic_counter_to_0 scaler=%d", *scaler),
+                        scaler, test_timer_set_periodic_counter_to_0);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/noload_oneshot scaler=%d", *scaler),
+                        scaler, test_timer_noload_oneshot);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/noload_periodic scaler=%d", *scaler),
+                        scaler, test_timer_noload_periodic);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/zero_load_oneshot scaler=%d", *scaler),
+                        scaler, test_timer_zero_load_oneshot);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/zero_load_periodic scaler=%d", *scaler),
+                        scaler, test_timer_zero_load_periodic);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/zero_load_oneshot_to_nonzero scaler=%d", *scaler),
+                        scaler, test_timer_zero_load_oneshot_to_nonzero);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/zero_load_periodic_to_nonzero scaler=%d", *scaler),
+                        scaler, test_timer_zero_load_periodic_to_nonzero);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/nonzero_load_oneshot_to_zero scaler=%d", *scaler),
+                        scaler, test_timer_nonzero_load_oneshot_to_zero);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/nonzero_load_periodic_to_zero scaler=%d", *scaler),
+                        scaler, test_timer_nonzero_load_periodic_to_zero);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_periodic_counter_on_the_fly scaler=%d", *scaler),
+                        scaler, test_timer_set_periodic_counter_on_the_fly);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/enable_and_set_counter scaler=%d", *scaler),
+                        scaler, test_timer_enable_and_set_counter);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_counter_and_enable scaler=%d", *scaler),
+                        scaler, test_timer_set_counter_and_enable);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/oneshot_with_counter_0_on_start scaler=%d", *scaler),
+                        scaler, test_timer_oneshot_with_counter_0_on_start);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/periodic_with_counter_0_on_start scaler=%d", *scaler),
+                        scaler, test_timer_periodic_with_counter_0_on_start);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/periodic_counter scaler=%d", *scaler),
+                        scaler, test_periodic_counter);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_counter_periodic_with_zero_load scaler=%d", *scaler),
+                        scaler, test_timer_set_counter_periodic_with_zero_load);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_oneshot_load_to_0 scaler=%d", *scaler),
+                        scaler, test_timer_set_oneshot_load_to_0);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/set_periodic_load_to_0 scaler=%d", *scaler),
+                        scaler, test_timer_set_periodic_load_to_0);
+    qtest_add_data_func(
+        g_strdup_printf("mptimer/zero_load_mode_switch scaler=%d", *scaler),
+                        scaler, test_timer_zero_load_mode_switch);
+
+    if (scaler == &nonscaled) {
+        scaler = &scaled;
+        goto tests_with_prescaler_arg;
+    }
+
+    qtest_start("-machine vexpress-a9");
+    ret = g_test_run();
+    qtest_end();
+
+    return ret;
+}
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
index d049cba8a3..b132e39097 100644
--- a/tests/test-blockjob-txn.c
+++ b/tests/test-blockjob-txn.c
@@ -13,7 +13,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "sysemu/block-backend.h"
 
 typedef struct {
@@ -24,10 +24,6 @@ typedef struct {
     int *result;
 } TestBlockJob;
 
-static const BlockJobDriver test_block_job_driver = {
-    .instance_size = sizeof(TestBlockJob),
-};
-
 static void test_block_job_complete(BlockJob *job, void *opaque)
 {
     BlockDriverState *bs = blk_bs(job->blk);
@@ -77,6 +73,11 @@ static void test_block_job_cb(void *opaque, int ret)
     g_free(data);
 }
 
+static const BlockJobDriver test_block_job_driver = {
+    .instance_size = sizeof(TestBlockJob),
+    .start = test_block_job_run,
+};
+
 /* Create a block job that completes with a given return code after a given
  * number of event loop iterations.  The return code is stored in the given
  * result pointer.
@@ -98,15 +99,15 @@ static BlockJob *test_block_job_start(unsigned int iterations,
     bs = bdrv_new();
     snprintf(job_id, sizeof(job_id), "job%u", counter++);
     s = block_job_create(job_id, &test_block_job_driver, bs, 0,
-                         test_block_job_cb, data, &error_abort);
+                         BLOCK_JOB_DEFAULT, test_block_job_cb,
+                         data, &error_abort);
     s->iterations = iterations;
     s->use_timer = use_timer;
     s->rc = rc;
     s->result = result;
-    s->common.co = qemu_coroutine_create(test_block_job_run, s);
     data->job = s;
     data->result = result;
-    qemu_coroutine_enter(s->common.co);
+    block_job_start(&s->common);
     return &s->common;
 }
 
diff --git a/tests/test-blockjob.c b/tests/test-blockjob.c
index 5b0e934a0c..60b78a3342 100644
--- a/tests/test-blockjob.c
+++ b/tests/test-blockjob.c
@@ -13,7 +13,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/main-loop.h"
-#include "block/blockjob.h"
+#include "block/blockjob_int.h"
 #include "sysemu/block-backend.h"
 
 static const BlockJobDriver test_block_job_driver = {
@@ -31,7 +31,7 @@ static BlockJob *do_test_id(BlockBackend *blk, const char *id,
     Error *errp = NULL;
 
     job = block_job_create(id, &test_block_job_driver, blk_bs(blk), 0,
-                           block_job_cb, NULL, &errp);
+                           BLOCK_JOB_DEFAULT, block_job_cb, NULL, &errp);
     if (should_succeed) {
         g_assert_null(errp);
         g_assert_nonnull(job);
diff --git a/tests/test-bufferiszero.c b/tests/test-bufferiszero.c
new file mode 100644
index 0000000000..42d194cadf
--- /dev/null
+++ b/tests/test-bufferiszero.c
@@ -0,0 +1,78 @@
+/*
+ * QEMU buffer_is_zero test
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+
+static char buffer[8 * 1024 * 1024];
+
+static void test_1(void)
+{
+    size_t s, a, o;
+
+    /* Basic positive test.  */
+    g_assert(buffer_is_zero(buffer, sizeof(buffer)));
+
+    /* Basic negative test.  */
+    buffer[sizeof(buffer) - 1] = 1;
+    g_assert(!buffer_is_zero(buffer, sizeof(buffer)));
+    buffer[sizeof(buffer) - 1] = 0;
+
+    /* Positive tests for size and alignment.  */
+    for (a = 1; a <= 64; a++) {
+        for (s = 1; s < 1024; s++) {
+            buffer[a - 1] = 1;
+            buffer[a + s] = 1;
+            g_assert(buffer_is_zero(buffer + a, s));
+            buffer[a - 1] = 0;
+            buffer[a + s] = 0;
+        }
+    }
+
+    /* Negative tests for size, alignment, and the offset of the marker.  */
+    for (a = 1; a <= 64; a++) {
+        for (s = 1; s < 1024; s++) {
+            for (o = 0; o < s; ++o) {
+                buffer[a + o] = 1;
+                g_assert(!buffer_is_zero(buffer + a, s));
+                buffer[a + o] = 0;
+            }
+        }
+    }
+}
+
+static void test_2(void)
+{
+    if (g_test_perf()) {
+        test_1();
+    } else {
+        do {
+            test_1();
+        } while (test_buffer_is_zero_next_accel());
+    }
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/cutils/bufferiszero", test_2);
+
+    return g_test_run();
+}
diff --git a/tests/test-char.c b/tests/test-char.c
new file mode 100644
index 0000000000..241685afbb
--- /dev/null
+++ b/tests/test-char.c
@@ -0,0 +1,253 @@
+#include "qemu/osdep.h"
+
+#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "sysemu/char.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+#include "qmp-commands.h"
+
+typedef struct FeHandler {
+    int read_count;
+    int last_event;
+    char read_buf[128];
+} FeHandler;
+
+static int fe_can_read(void *opaque)
+{
+    FeHandler *h = opaque;
+
+    return sizeof(h->read_buf) - h->read_count;
+}
+
+static void fe_read(void *opaque, const uint8_t *buf, int size)
+{
+    FeHandler *h = opaque;
+
+    g_assert_cmpint(size, <=, fe_can_read(opaque));
+
+    memcpy(h->read_buf + h->read_count, buf, size);
+    h->read_count += size;
+}
+
+static void fe_event(void *opaque, int event)
+{
+    FeHandler *h = opaque;
+
+    h->last_event = event;
+}
+
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static void char_stdio_test_subprocess(void)
+{
+    CharDriverState *chr;
+    CharBackend be;
+    int ret;
+
+    chr = qemu_chr_new("label", "stdio");
+    g_assert_nonnull(chr);
+
+    qemu_chr_fe_init(&be, chr, &error_abort);
+    qemu_chr_fe_set_open(&be, true);
+    ret = qemu_chr_fe_write(&be, (void *)"buf", 4);
+    g_assert_cmpint(ret, ==, 4);
+
+    qemu_chr_fe_deinit(&be);
+    qemu_chr_delete(chr);
+}
+
+static void char_stdio_test(void)
+{
+    g_test_trap_subprocess("/char/stdio/subprocess", 0, 0);
+    g_test_trap_assert_passed();
+    g_test_trap_assert_stdout("buf");
+}
+#endif
+
+
+static void char_ringbuf_test(void)
+{
+    QemuOpts *opts;
+    CharDriverState *chr;
+    CharBackend be;
+    char *data;
+    int ret;
+
+    opts = qemu_opts_create(qemu_find_opts("chardev"), "ringbuf-label",
+                            1, &error_abort);
+    qemu_opt_set(opts, "backend", "ringbuf", &error_abort);
+
+    qemu_opt_set(opts, "size", "5", &error_abort);
+    chr = qemu_chr_new_from_opts(opts, NULL);
+    g_assert_null(chr);
+    qemu_opts_del(opts);
+
+    opts = qemu_opts_create(qemu_find_opts("chardev"), "ringbuf-label",
+                            1, &error_abort);
+    qemu_opt_set(opts, "backend", "ringbuf", &error_abort);
+    qemu_opt_set(opts, "size", "2", &error_abort);
+    chr = qemu_chr_new_from_opts(opts, &error_abort);
+    g_assert_nonnull(chr);
+    qemu_opts_del(opts);
+
+    qemu_chr_fe_init(&be, chr, &error_abort);
+    ret = qemu_chr_fe_write(&be, (void *)"buff", 4);
+    g_assert_cmpint(ret, ==, 4);
+
+    data = qmp_ringbuf_read("ringbuf-label", 4, false, 0, &error_abort);
+    g_assert_cmpstr(data, ==, "ff");
+    g_free(data);
+
+    data = qmp_ringbuf_read("ringbuf-label", 4, false, 0, &error_abort);
+    g_assert_cmpstr(data, ==, "");
+    g_free(data);
+
+    qemu_chr_fe_deinit(&be);
+    qemu_chr_delete(chr);
+}
+
+static void char_mux_test(void)
+{
+    QemuOpts *opts;
+    CharDriverState *chr, *base;
+    char *data;
+    FeHandler h1 = { 0, }, h2 = { 0, };
+    CharBackend chr_be1, chr_be2;
+
+    opts = qemu_opts_create(qemu_find_opts("chardev"), "mux-label",
+                            1, &error_abort);
+    qemu_opt_set(opts, "backend", "ringbuf", &error_abort);
+    qemu_opt_set(opts, "size", "128", &error_abort);
+    qemu_opt_set(opts, "mux", "on", &error_abort);
+    chr = qemu_chr_new_from_opts(opts, &error_abort);
+    g_assert_nonnull(chr);
+    qemu_opts_del(opts);
+
+    qemu_chr_fe_init(&chr_be1, chr, &error_abort);
+    qemu_chr_fe_set_handlers(&chr_be1,
+                             fe_can_read,
+                             fe_read,
+                             fe_event,
+                             &h1,
+                             NULL, true);
+
+    qemu_chr_fe_init(&chr_be2, chr, &error_abort);
+    qemu_chr_fe_set_handlers(&chr_be2,
+                             fe_can_read,
+                             fe_read,
+                             fe_event,
+                             &h2,
+                             NULL, true);
+    qemu_chr_fe_take_focus(&chr_be2);
+
+    base = qemu_chr_find("mux-label-base");
+    g_assert_cmpint(qemu_chr_be_can_write(base), !=, 0);
+
+    qemu_chr_be_write(base, (void *)"hello", 6);
+    g_assert_cmpint(h1.read_count, ==, 0);
+    g_assert_cmpint(h2.read_count, ==, 6);
+    g_assert_cmpstr(h2.read_buf, ==, "hello");
+    h2.read_count = 0;
+
+    /* switch focus */
+    qemu_chr_be_write(base, (void *)"\1c", 2);
+
+    qemu_chr_be_write(base, (void *)"hello", 6);
+    g_assert_cmpint(h2.read_count, ==, 0);
+    g_assert_cmpint(h1.read_count, ==, 6);
+    g_assert_cmpstr(h1.read_buf, ==, "hello");
+    h1.read_count = 0;
+
+    /* remove first handler */
+    qemu_chr_fe_set_handlers(&chr_be1, NULL, NULL, NULL, NULL, NULL, true);
+    qemu_chr_be_write(base, (void *)"hello", 6);
+    g_assert_cmpint(h1.read_count, ==, 0);
+    g_assert_cmpint(h2.read_count, ==, 0);
+
+    qemu_chr_be_write(base, (void *)"\1c", 2);
+    qemu_chr_be_write(base, (void *)"hello", 6);
+    g_assert_cmpint(h1.read_count, ==, 0);
+    g_assert_cmpint(h2.read_count, ==, 6);
+    g_assert_cmpstr(h2.read_buf, ==, "hello");
+    h2.read_count = 0;
+
+    /* print help */
+    qemu_chr_be_write(base, (void *)"\1?", 2);
+    data = qmp_ringbuf_read("mux-label-base", 128, false, 0, &error_abort);
+    g_assert_cmpint(strlen(data), !=, 0);
+    g_free(data);
+
+    qemu_chr_fe_deinit(&chr_be1);
+    qemu_chr_fe_deinit(&chr_be2);
+    qemu_chr_delete(chr);
+}
+
+static void char_null_test(void)
+{
+    Error *err = NULL;
+    CharDriverState *chr;
+    CharBackend be;
+    int ret;
+
+    chr = qemu_chr_find("label-null");
+    g_assert_null(chr);
+
+    chr = qemu_chr_new("label-null", "null");
+    chr = qemu_chr_find("label-null");
+    g_assert_nonnull(chr);
+
+    g_assert(qemu_chr_has_feature(chr,
+                 QEMU_CHAR_FEATURE_FD_PASS) == false);
+    g_assert(qemu_chr_has_feature(chr,
+                 QEMU_CHAR_FEATURE_RECONNECTABLE) == false);
+
+    /* check max avail */
+    qemu_chr_fe_init(&be, chr, &error_abort);
+    qemu_chr_fe_init(&be, chr, &err);
+    error_free_or_abort(&err);
+
+    /* deinit & reinit */
+    qemu_chr_fe_deinit(&be);
+    qemu_chr_fe_init(&be, chr, &error_abort);
+
+    qemu_chr_fe_set_open(&be, true);
+
+    qemu_chr_fe_set_handlers(&be,
+                             fe_can_read,
+                             fe_read,
+                             fe_event,
+                             NULL, NULL, true);
+
+    ret = qemu_chr_fe_write(&be, (void *)"buf", 4);
+    g_assert_cmpint(ret, ==, 4);
+
+    qemu_chr_fe_deinit(&be);
+    qemu_chr_delete(chr);
+}
+
+static void char_invalid_test(void)
+{
+    CharDriverState *chr;
+
+    chr = qemu_chr_new("label-invalid", "invalid");
+    g_assert_null(chr);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+    module_call_init(MODULE_INIT_QOM);
+    qemu_add_opts(&qemu_chardev_opts);
+
+    g_test_add_func("/char/null", char_null_test);
+    g_test_add_func("/char/invalid", char_invalid_test);
+    g_test_add_func("/char/ringbuf", char_ringbuf_test);
+    g_test_add_func("/char/mux", char_mux_test);
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+    g_test_add_func("/char/stdio/subprocess", char_stdio_test_subprocess);
+    g_test_add_func("/char/stdio", char_stdio_test);
+#endif
+
+    return g_test_run();
+}
diff --git a/tests/test-coroutine.c b/tests/test-coroutine.c
index ee5e06d327..abd97c23c1 100644
--- a/tests/test-coroutine.c
+++ b/tests/test-coroutine.c
@@ -52,6 +52,47 @@ static void test_self(void)
     qemu_coroutine_enter(coroutine);
 }
 
+/*
+ * Check that qemu_coroutine_entered() works
+ */
+
+static void coroutine_fn verify_entered_step_2(void *opaque)
+{
+    Coroutine *caller = (Coroutine *)opaque;
+
+    g_assert(qemu_coroutine_entered(caller));
+    g_assert(qemu_coroutine_entered(qemu_coroutine_self()));
+    qemu_coroutine_yield();
+
+    /* Once more to check it still works after yielding */
+    g_assert(qemu_coroutine_entered(caller));
+    g_assert(qemu_coroutine_entered(qemu_coroutine_self()));
+    qemu_coroutine_yield();
+}
+
+static void coroutine_fn verify_entered_step_1(void *opaque)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *coroutine;
+
+    g_assert(qemu_coroutine_entered(self));
+
+    coroutine = qemu_coroutine_create(verify_entered_step_2, self);
+    g_assert(!qemu_coroutine_entered(coroutine));
+    qemu_coroutine_enter(coroutine);
+    g_assert(!qemu_coroutine_entered(coroutine));
+    qemu_coroutine_enter(coroutine);
+}
+
+static void test_entered(void)
+{
+    Coroutine *coroutine;
+
+    coroutine = qemu_coroutine_create(verify_entered_step_1, NULL);
+    g_assert(!qemu_coroutine_entered(coroutine));
+    qemu_coroutine_enter(coroutine);
+}
+
 /*
  * Check that coroutines may nest multiple levels
  */
@@ -139,13 +180,20 @@ static void test_co_queue(void)
 {
     Coroutine *c1;
     Coroutine *c2;
+    Coroutine tmp;
 
     c2 = qemu_coroutine_create(c2_fn, NULL);
     c1 = qemu_coroutine_create(c1_fn, c2);
 
     qemu_coroutine_enter(c1);
+
+    /* c1 shouldn't be used any more now; make sure we segfault if it is */
+    tmp = *c1;
     memset(c1, 0xff, sizeof(Coroutine));
     qemu_coroutine_enter(c2);
+
+    /* Must restore the coroutine now to avoid corrupted pool */
+    *c1 = tmp;
 }
 
 /*
@@ -382,6 +430,7 @@ int main(int argc, char **argv)
     g_test_add_func("/basic/yield", test_yield);
     g_test_add_func("/basic/nesting", test_nesting);
     g_test_add_func("/basic/self", test_self);
+    g_test_add_func("/basic/entered", test_entered);
     g_test_add_func("/basic/in_coroutine", test_in_coroutine);
     g_test_add_func("/basic/order", test_order);
     if (g_test_perf()) {
diff --git a/tests/test-crypto-block.c b/tests/test-crypto-block.c
index a38110d3ff..1957a86743 100644
--- a/tests/test-crypto-block.c
+++ b/tests/test-crypto-block.c
@@ -28,7 +28,7 @@
 #include <sys/resource.h>
 #endif
 
-#if defined(CONFIG_UUID) && (defined(_WIN32) || defined RUSAGE_THREAD)
+#if (defined(_WIN32) || defined RUSAGE_THREAD)
 #define TEST_LUKS
 #else
 #undef TEST_LUKS
diff --git a/tests/test-crypto-cipher.c b/tests/test-crypto-cipher.c
index 1b5130d5f6..5d9e535e2e 100644
--- a/tests/test-crypto-cipher.c
+++ b/tests/test-crypto-cipher.c
@@ -370,6 +370,72 @@ static QCryptoCipherTestData test_data[] = {
             "eb4a427d1923ce3ff262735779a418f2"
             "0a282df920147beabe421ee5319d0568",
     },
+    {
+        /* Bad config - cast5-128 has 8 byte block size
+         * which is incompatible with XTS
+         */
+        .path = "/crypto/cipher/cast5-xts-128",
+        .alg = QCRYPTO_CIPHER_ALG_CAST5_128,
+        .mode = QCRYPTO_CIPHER_MODE_XTS,
+        .key =
+            "27182818284590452353602874713526"
+            "31415926535897932384626433832795",
+    },
+    {
+        /* NIST F.5.1 CTR-AES128.Encrypt */
+        .path = "/crypto/cipher/aes-ctr-128",
+        .alg = QCRYPTO_CIPHER_ALG_AES_128,
+        .mode = QCRYPTO_CIPHER_MODE_CTR,
+        .key = "2b7e151628aed2a6abf7158809cf4f3c",
+        .iv = "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+        .plaintext  =
+            "6bc1bee22e409f96e93d7e117393172a"
+            "ae2d8a571e03ac9c9eb76fac45af8e51"
+            "30c81c46a35ce411e5fbc1191a0a52ef"
+            "f69f2445df4f9b17ad2b417be66c3710",
+        .ciphertext =
+            "874d6191b620e3261bef6864990db6ce"
+            "9806f66b7970fdff8617187bb9fffdff"
+            "5ae4df3edbd5d35e5b4f09020db03eab"
+            "1e031dda2fbe03d1792170a0f3009cee",
+    },
+    {
+        /* NIST F.5.3 CTR-AES192.Encrypt */
+        .path = "/crypto/cipher/aes-ctr-192",
+        .alg = QCRYPTO_CIPHER_ALG_AES_192,
+        .mode = QCRYPTO_CIPHER_MODE_CTR,
+        .key = "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b",
+        .iv = "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+        .plaintext  =
+            "6bc1bee22e409f96e93d7e117393172a"
+            "ae2d8a571e03ac9c9eb76fac45af8e51"
+            "30c81c46a35ce411e5fbc1191a0a52ef"
+            "f69f2445df4f9b17ad2b417be66c3710",
+        .ciphertext =
+            "1abc932417521ca24f2b0459fe7e6e0b"
+            "090339ec0aa6faefd5ccc2c6f4ce8e94"
+            "1e36b26bd1ebc670d1bd1d665620abf7"
+            "4f78a7f6d29809585a97daec58c6b050",
+    },
+    {
+        /* NIST F.5.5 CTR-AES256.Encrypt */
+        .path = "/crypto/cipher/aes-ctr-256",
+        .alg = QCRYPTO_CIPHER_ALG_AES_256,
+        .mode = QCRYPTO_CIPHER_MODE_CTR,
+        .key = "603deb1015ca71be2b73aef0857d7781"
+               "1f352c073b6108d72d9810a30914dff4",
+        .iv = "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
+        .plaintext  =
+            "6bc1bee22e409f96e93d7e117393172a"
+            "ae2d8a571e03ac9c9eb76fac45af8e51"
+            "30c81c46a35ce411e5fbc1191a0a52ef"
+            "f69f2445df4f9b17ad2b417be66c3710",
+        .ciphertext =
+            "601ec313775789a5b7a7f504bbf3d228"
+            "f443e3ca4d62b59aca84e990cacaf5c5"
+            "2b0930daa23de94ce87017ba2d84988d"
+            "dfc9c58db67aada613c2dd08457941a6",
+    }
 };
 
 
@@ -432,15 +498,23 @@ static void test_cipher(const void *opaque)
     const QCryptoCipherTestData *data = opaque;
 
     QCryptoCipher *cipher;
-    uint8_t *key, *iv, *ciphertext, *plaintext, *outtext;
-    size_t nkey, niv, nciphertext, nplaintext;
-    char *outtexthex;
+    uint8_t *key, *iv = NULL, *ciphertext = NULL,
+        *plaintext = NULL, *outtext = NULL;
+    size_t nkey, niv = 0, nciphertext = 0, nplaintext = 0;
+    char *outtexthex = NULL;
     size_t ivsize, keysize, blocksize;
+    Error *err = NULL;
 
     nkey = unhex_string(data->key, &key);
-    niv = unhex_string(data->iv, &iv);
-    nciphertext = unhex_string(data->ciphertext, &ciphertext);
-    nplaintext = unhex_string(data->plaintext, &plaintext);
+    if (data->iv) {
+        niv = unhex_string(data->iv, &iv);
+    }
+    if (data->ciphertext) {
+        nciphertext = unhex_string(data->ciphertext, &ciphertext);
+    }
+    if (data->plaintext) {
+        nplaintext = unhex_string(data->plaintext, &plaintext);
+    }
 
     g_assert(nciphertext == nplaintext);
 
@@ -449,8 +523,15 @@ static void test_cipher(const void *opaque)
     cipher = qcrypto_cipher_new(
         data->alg, data->mode,
         key, nkey,
-        &error_abort);
-    g_assert(cipher != NULL);
+        &err);
+    if (data->plaintext) {
+        g_assert(err == NULL);
+        g_assert(cipher != NULL);
+    } else {
+        error_free_or_abort(&err);
+        g_assert(cipher == NULL);
+        goto cleanup;
+    }
 
     keysize = qcrypto_cipher_get_key_len(data->alg);
     blocksize = qcrypto_cipher_get_block_len(data->alg);
@@ -498,6 +579,7 @@ static void test_cipher(const void *opaque)
 
     g_assert_cmpstr(outtexthex, ==, data->plaintext);
 
+ cleanup:
     g_free(outtext);
     g_free(outtexthex);
     g_free(key);
@@ -589,7 +671,7 @@ int main(int argc, char **argv)
     g_assert(qcrypto_init(NULL) == 0);
 
     for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
-        if (qcrypto_cipher_supports(test_data[i].alg)) {
+        if (qcrypto_cipher_supports(test_data[i].alg, test_data[i].mode)) {
             g_test_add_data_func(test_data[i].path, &test_data[i], test_cipher);
         }
     }
diff --git a/tests/test-crypto-hash.c b/tests/test-crypto-hash.c
index 42fc77a107..214a9f72c3 100644
--- a/tests/test-crypto-hash.c
+++ b/tests/test-crypto-hash.c
@@ -89,8 +89,6 @@ static void test_hash_alloc(void)
 {
     size_t i;
 
-    g_assert(qcrypto_init(NULL) == 0);
-
     for (i = 0; i < G_N_ELEMENTS(expected_outputs) ; i++) {
         uint8_t *result = NULL;
         size_t resultlen = 0;
@@ -123,8 +121,6 @@ static void test_hash_prealloc(void)
 {
     size_t i;
 
-    g_assert(qcrypto_init(NULL) == 0);
-
     for (i = 0; i < G_N_ELEMENTS(expected_outputs) ; i++) {
         uint8_t *result;
         size_t resultlen;
@@ -161,8 +157,6 @@ static void test_hash_iov(void)
 {
     size_t i;
 
-    g_assert(qcrypto_init(NULL) == 0);
-
     for (i = 0; i < G_N_ELEMENTS(expected_outputs) ; i++) {
         struct iovec iov[3] = {
             { .iov_base = (char *)INPUT_TEXT1, .iov_len = strlen(INPUT_TEXT1) },
@@ -199,8 +193,6 @@ static void test_hash_digest(void)
 {
     size_t i;
 
-    g_assert(qcrypto_init(NULL) == 0);
-
     for (i = 0; i < G_N_ELEMENTS(expected_outputs) ; i++) {
         int ret;
         char *digest;
@@ -230,8 +222,6 @@ static void test_hash_base64(void)
 {
     size_t i;
 
-    g_assert(qcrypto_init(NULL) == 0);
-
     for (i = 0; i < G_N_ELEMENTS(expected_outputs) ; i++) {
         int ret;
         char *digest;
@@ -253,6 +243,8 @@ static void test_hash_base64(void)
 
 int main(int argc, char **argv)
 {
+    g_assert(qcrypto_init(NULL) == 0);
+
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/crypto/hash/iov", test_hash_iov);
     g_test_add_func("/crypto/hash/alloc", test_hash_alloc);
diff --git a/tests/test-crypto-pbkdf.c b/tests/test-crypto-pbkdf.c
index 8ceceb1827..d937aff6b2 100644
--- a/tests/test-crypto-pbkdf.c
+++ b/tests/test-crypto-pbkdf.c
@@ -261,7 +261,6 @@ static QCryptoPbkdfTestData test_data[] = {
                "\xcc\x4a\x5e\x6d\xca\x04\xec\x58",
         .nout = 32
     },
-#if 0
     {
         .path = "/crypto/pbkdf/nonrfc/sha512/iter1200",
         .hash = QCRYPTO_HASH_ALG_SHA512,
@@ -279,6 +278,58 @@ static QCryptoPbkdfTestData test_data[] = {
                "\x76\x14\x80\xf3\xe3\x7a\x22\xb9",
         .nout = 32
     },
+    {
+        .path = "/crypto/pbkdf/nonrfc/sha224/iter1200",
+        .hash = QCRYPTO_HASH_ALG_SHA224,
+        .iterations = 1200,
+        .key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+        .nkey = 129,
+        .salt = "pass phrase exceeds block size",
+        .nsalt = 30,
+        .out = "\x13\x3b\x88\x0c\x0e\x52\xa2\x41"
+               "\x49\x33\x35\xa6\xc3\x83\xae\x23"
+               "\xf6\x77\x43\x9e\x5b\x30\x92\x3e"
+               "\x4a\x3a\xaa\x24\x69\x3c\xed\x20",
+        .nout = 32
+    },
+    {
+        .path = "/crypto/pbkdf/nonrfc/sha384/iter1200",
+        .hash = QCRYPTO_HASH_ALG_SHA384,
+        .iterations = 1200,
+        .key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+        .nkey = 129,
+        .salt = "pass phrase exceeds block size",
+        .nsalt = 30,
+        .out = "\xfe\xe3\xe1\x84\xc9\x25\x3e\x10"
+               "\x47\xc8\x7d\x53\xc6\xa5\xe3\x77"
+               "\x29\x41\x76\xbd\x4b\xe3\x9b\xac"
+               "\x05\x6c\x11\xdd\x17\xc5\x93\x80",
+        .nout = 32
+    },
+    {
+        .path = "/crypto/pbkdf/nonrfc/ripemd160/iter1200",
+        .hash = QCRYPTO_HASH_ALG_RIPEMD160,
+        .iterations = 1200,
+        .key = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+               "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
+        .nkey = 129,
+        .salt = "pass phrase exceeds block size",
+        .nsalt = 30,
+        .out = "\xd6\xcb\xd8\xa7\xdb\x0c\xa2\x2a"
+               "\x23\x5e\x47\xaf\xdb\xda\xa8\xef"
+               "\xe4\x01\x0d\x6f\xb5\x33\xc8\xbd"
+               "\xce\xbf\x91\x14\x8b\x5c\x48\x41",
+        .nout = 32
+    },
+#if 0
     {
         .path = "/crypto/pbkdf/nonrfc/whirlpool/iter1200",
         .hash = QCRYPTO_HASH_ALG_WHIRLPOOL,
@@ -358,6 +409,7 @@ static void test_pbkdf_timing(void)
     iters = qcrypto_pbkdf2_count_iters(QCRYPTO_HASH_ALG_SHA256,
                                        key, sizeof(key),
                                        salt, sizeof(salt),
+                                       32,
                                        &error_abort);
 
     g_assert(iters >= (1 << 15));
diff --git a/tests/test-cutils.c b/tests/test-cutils.c
index 64e3e95ce2..20b0f59ba2 100644
--- a/tests/test-cutils.c
+++ b/tests/test-cutils.c
@@ -378,7 +378,7 @@ static void test_qemu_strtol_hex(void)
 
 static void test_qemu_strtol_max(void)
 {
-    const char *str = g_strdup_printf("%ld", LONG_MAX);
+    char *str = g_strdup_printf("%ld", LONG_MAX);
     char f = 'X';
     const char *endptr = &f;
     long res = 999;
@@ -389,6 +389,7 @@ static void test_qemu_strtol_max(void)
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LONG_MAX);
     g_assert(endptr == str + strlen(str));
+    g_free(str);
 }
 
 static void test_qemu_strtol_overflow(void)
@@ -497,7 +498,7 @@ static void test_qemu_strtol_full_trailing(void)
 
 static void test_qemu_strtol_full_max(void)
 {
-    const char *str = g_strdup_printf("%ld", LONG_MAX);
+    char *str = g_strdup_printf("%ld", LONG_MAX);
     long res;
     int err;
 
@@ -505,6 +506,7 @@ static void test_qemu_strtol_full_max(void)
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LONG_MAX);
+    g_free(str);
 }
 
 static void test_qemu_strtoul_correct(void)
@@ -662,7 +664,7 @@ static void test_qemu_strtoul_hex(void)
 
 static void test_qemu_strtoul_max(void)
 {
-    const char *str = g_strdup_printf("%lu", ULONG_MAX);
+    char *str = g_strdup_printf("%lu", ULONG_MAX);
     char f = 'X';
     const char *endptr = &f;
     unsigned long res = 999;
@@ -673,6 +675,7 @@ static void test_qemu_strtoul_max(void)
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, ULONG_MAX);
     g_assert(endptr == str + strlen(str));
+    g_free(str);
 }
 
 static void test_qemu_strtoul_overflow(void)
@@ -776,7 +779,7 @@ static void test_qemu_strtoul_full_trailing(void)
 
 static void test_qemu_strtoul_full_max(void)
 {
-    const char *str = g_strdup_printf("%lu", ULONG_MAX);
+    char *str = g_strdup_printf("%lu", ULONG_MAX);
     unsigned long res = 999;
     int err;
 
@@ -784,6 +787,7 @@ static void test_qemu_strtoul_full_max(void)
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, ULONG_MAX);
+    g_free(str);
 }
 
 static void test_qemu_strtoll_correct(void)
@@ -941,7 +945,7 @@ static void test_qemu_strtoll_hex(void)
 
 static void test_qemu_strtoll_max(void)
 {
-    const char *str = g_strdup_printf("%lld", LLONG_MAX);
+    char *str = g_strdup_printf("%lld", LLONG_MAX);
     char f = 'X';
     const char *endptr = &f;
     int64_t res = 999;
@@ -952,6 +956,7 @@ static void test_qemu_strtoll_max(void)
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LLONG_MAX);
     g_assert(endptr == str + strlen(str));
+    g_free(str);
 }
 
 static void test_qemu_strtoll_overflow(void)
@@ -1058,7 +1063,7 @@ static void test_qemu_strtoll_full_trailing(void)
 static void test_qemu_strtoll_full_max(void)
 {
 
-    const char *str = g_strdup_printf("%lld", LLONG_MAX);
+    char *str = g_strdup_printf("%lld", LLONG_MAX);
     int64_t res;
     int err;
 
@@ -1066,6 +1071,7 @@ static void test_qemu_strtoll_full_max(void)
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, LLONG_MAX);
+    g_free(str);
 }
 
 static void test_qemu_strtoull_correct(void)
@@ -1223,7 +1229,7 @@ static void test_qemu_strtoull_hex(void)
 
 static void test_qemu_strtoull_max(void)
 {
-    const char *str = g_strdup_printf("%llu", ULLONG_MAX);
+    char *str = g_strdup_printf("%llu", ULLONG_MAX);
     char f = 'X';
     const char *endptr = &f;
     uint64_t res = 999;
@@ -1234,6 +1240,7 @@ static void test_qemu_strtoull_max(void)
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, ULLONG_MAX);
     g_assert(endptr == str + strlen(str));
+    g_free(str);
 }
 
 static void test_qemu_strtoull_overflow(void)
@@ -1339,7 +1346,7 @@ static void test_qemu_strtoull_full_trailing(void)
 
 static void test_qemu_strtoull_full_max(void)
 {
-    const char *str = g_strdup_printf("%lld", ULLONG_MAX);
+    char *str = g_strdup_printf("%lld", ULLONG_MAX);
     uint64_t res = 999;
     int err;
 
@@ -1347,6 +1354,7 @@ static void test_qemu_strtoull_full_max(void)
 
     g_assert_cmpint(err, ==, 0);
     g_assert_cmpint(res, ==, ULLONG_MAX);
+    g_free(str);
 }
 
 static void test_qemu_strtosz_simple(void)
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index c0e9895fb9..9b7495cc32 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -11,6 +11,8 @@
 
 #include "qemu/osdep.h"
 #include "qemu/hbitmap.h"
+#include "qemu/bitmap.h"
+#include "block/block.h"
 
 #define LOG_BITS_PER_LONG          (BITS_PER_LONG == 32 ? 5 : 6)
 
@@ -20,6 +22,7 @@
 
 typedef struct TestHBitmapData {
     HBitmap       *hb;
+    HBitmap       *meta;
     unsigned long *bits;
     size_t         size;
     size_t         old_size;
@@ -91,6 +94,14 @@ static void hbitmap_test_init(TestHBitmapData *data,
     }
 }
 
+static void hbitmap_test_init_meta(TestHBitmapData *data,
+                                   uint64_t size, int granularity,
+                                   int meta_chunk)
+{
+    hbitmap_test_init(data, size, granularity);
+    data->meta = hbitmap_create_meta(data->hb, meta_chunk);
+}
+
 static inline size_t hbitmap_test_array_size(size_t bits)
 {
     size_t n = DIV_ROUND_UP(bits, BITS_PER_LONG);
@@ -133,6 +144,9 @@ static void hbitmap_test_teardown(TestHBitmapData *data,
                                   const void *unused)
 {
     if (data->hb) {
+        if (data->meta) {
+            hbitmap_free_meta(data->hb);
+        }
         hbitmap_free(data->hb);
         data->hb = NULL;
     }
@@ -634,6 +648,249 @@ static void test_hbitmap_truncate_shrink_large(TestHBitmapData *data,
     hbitmap_test_truncate(data, size, -diff, 0);
 }
 
+static void hbitmap_check_meta(TestHBitmapData *data,
+                               int64_t start, int count)
+{
+    int64_t i;
+
+    for (i = 0; i < data->size; i++) {
+        if (i >= start && i < start + count) {
+            g_assert(hbitmap_get(data->meta, i));
+        } else {
+            g_assert(!hbitmap_get(data->meta, i));
+        }
+    }
+}
+
+static void hbitmap_test_meta(TestHBitmapData *data,
+                              int64_t start, int count,
+                              int64_t check_start, int check_count)
+{
+    hbitmap_reset_all(data->hb);
+    hbitmap_reset_all(data->meta);
+
+    /* Test "unset" -> "unset" will not update meta. */
+    hbitmap_reset(data->hb, start, count);
+    hbitmap_check_meta(data, 0, 0);
+
+    /* Test "unset" -> "set" will update meta */
+    hbitmap_set(data->hb, start, count);
+    hbitmap_check_meta(data, check_start, check_count);
+
+    /* Test "set" -> "set" will not update meta */
+    hbitmap_reset_all(data->meta);
+    hbitmap_set(data->hb, start, count);
+    hbitmap_check_meta(data, 0, 0);
+
+    /* Test "set" -> "unset" will update meta */
+    hbitmap_reset_all(data->meta);
+    hbitmap_reset(data->hb, start, count);
+    hbitmap_check_meta(data, check_start, check_count);
+}
+
+static void hbitmap_test_meta_do(TestHBitmapData *data, int chunk_size)
+{
+    uint64_t size = chunk_size * 100;
+    hbitmap_test_init_meta(data, size, 0, chunk_size);
+
+    hbitmap_test_meta(data, 0, 1, 0, chunk_size);
+    hbitmap_test_meta(data, 0, chunk_size, 0, chunk_size);
+    hbitmap_test_meta(data, chunk_size - 1, 1, 0, chunk_size);
+    hbitmap_test_meta(data, chunk_size - 1, 2, 0, chunk_size * 2);
+    hbitmap_test_meta(data, chunk_size - 1, chunk_size + 1, 0, chunk_size * 2);
+    hbitmap_test_meta(data, chunk_size - 1, chunk_size + 2, 0, chunk_size * 3);
+    hbitmap_test_meta(data, 7 * chunk_size - 1, chunk_size + 2,
+                      6 * chunk_size, chunk_size * 3);
+    hbitmap_test_meta(data, size - 1, 1, size - chunk_size, chunk_size);
+    hbitmap_test_meta(data, 0, size, 0, size);
+}
+
+static void test_hbitmap_meta_byte(TestHBitmapData *data, const void *unused)
+{
+    hbitmap_test_meta_do(data, BITS_PER_BYTE);
+}
+
+static void test_hbitmap_meta_word(TestHBitmapData *data, const void *unused)
+{
+    hbitmap_test_meta_do(data, BITS_PER_LONG);
+}
+
+static void test_hbitmap_meta_sector(TestHBitmapData *data, const void *unused)
+{
+    hbitmap_test_meta_do(data, BDRV_SECTOR_SIZE * BITS_PER_BYTE);
+}
+
+/**
+ * Create an HBitmap and test set/unset.
+ */
+static void test_hbitmap_meta_one(TestHBitmapData *data, const void *unused)
+{
+    int i;
+    int64_t offsets[] = {
+        0, 1, L1 - 1, L1, L1 + 1, L2 - 1, L2, L2 + 1, L3 - 1, L3, L3 + 1
+    };
+
+    hbitmap_test_init_meta(data, L3 * 2, 0, 1);
+    for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+        hbitmap_test_meta(data, offsets[i], 1, offsets[i], 1);
+        hbitmap_test_meta(data, offsets[i], L1, offsets[i], L1);
+        hbitmap_test_meta(data, offsets[i], L2, offsets[i], L2);
+    }
+}
+
+static void test_hbitmap_serialize_granularity(TestHBitmapData *data,
+                                               const void *unused)
+{
+    int r;
+
+    hbitmap_test_init(data, L3 * 2, 3);
+    r = hbitmap_serialization_granularity(data->hb);
+    g_assert_cmpint(r, ==, 64 << 3);
+}
+
+static void test_hbitmap_meta_zero(TestHBitmapData *data, const void *unused)
+{
+    hbitmap_test_init_meta(data, 0, 0, 1);
+
+    hbitmap_check_meta(data, 0, 0);
+}
+
+static void hbitmap_test_serialize_range(TestHBitmapData *data,
+                                         uint8_t *buf, size_t buf_size,
+                                         uint64_t pos, uint64_t count)
+{
+    size_t i;
+    unsigned long *el = (unsigned long *)buf;
+
+    assert(hbitmap_granularity(data->hb) == 0);
+    hbitmap_reset_all(data->hb);
+    memset(buf, 0, buf_size);
+    if (count) {
+        hbitmap_set(data->hb, pos, count);
+    }
+    hbitmap_serialize_part(data->hb, buf, 0, data->size);
+
+    /* Serialized buffer is inherently LE, convert it back manually to test */
+    for (i = 0; i < buf_size / sizeof(unsigned long); i++) {
+        el[i] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[i]) : le64_to_cpu(el[i]));
+    }
+
+    for (i = 0; i < data->size; i++) {
+        int is_set = test_bit(i, (unsigned long *)buf);
+        if (i >= pos && i < pos + count) {
+            g_assert(is_set);
+        } else {
+            g_assert(!is_set);
+        }
+    }
+
+    /* Re-serialize for deserialization testing */
+    memset(buf, 0, buf_size);
+    hbitmap_serialize_part(data->hb, buf, 0, data->size);
+    hbitmap_reset_all(data->hb);
+    hbitmap_deserialize_part(data->hb, buf, 0, data->size, true);
+
+    for (i = 0; i < data->size; i++) {
+        int is_set = hbitmap_get(data->hb, i);
+        if (i >= pos && i < pos + count) {
+            g_assert(is_set);
+        } else {
+            g_assert(!is_set);
+        }
+    }
+}
+
+static void test_hbitmap_serialize_basic(TestHBitmapData *data,
+                                         const void *unused)
+{
+    int i, j;
+    size_t buf_size;
+    uint8_t *buf;
+    uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 };
+    int num_positions = sizeof(positions) / sizeof(positions[0]);
+
+    hbitmap_test_init(data, L3, 0);
+    buf_size = hbitmap_serialization_size(data->hb, 0, data->size);
+    buf = g_malloc0(buf_size);
+
+    for (i = 0; i < num_positions; i++) {
+        for (j = 0; j < num_positions; j++) {
+            hbitmap_test_serialize_range(data, buf, buf_size,
+                                         positions[i],
+                                         MIN(positions[j], L3 - positions[i]));
+        }
+    }
+
+    g_free(buf);
+}
+
+static void test_hbitmap_serialize_part(TestHBitmapData *data,
+                                        const void *unused)
+{
+    int i, j, k;
+    size_t buf_size;
+    uint8_t *buf;
+    uint64_t positions[] = { 0, 1, L1 - 1, L1, L2 - 1, L2, L2 + 1, L3 - 1 };
+    int num_positions = sizeof(positions) / sizeof(positions[0]);
+
+    hbitmap_test_init(data, L3, 0);
+    buf_size = L2;
+    buf = g_malloc0(buf_size);
+
+    for (i = 0; i < num_positions; i++) {
+        hbitmap_set(data->hb, positions[i], 1);
+    }
+
+    for (i = 0; i < data->size; i += buf_size) {
+        unsigned long *el = (unsigned long *)buf;
+        hbitmap_serialize_part(data->hb, buf, i, buf_size);
+        for (j = 0; j < buf_size / sizeof(unsigned long); j++) {
+            el[j] = (BITS_PER_LONG == 32 ? le32_to_cpu(el[j]) : le64_to_cpu(el[j]));
+        }
+
+        for (j = 0; j < buf_size; j++) {
+            bool should_set = false;
+            for (k = 0; k < num_positions; k++) {
+                if (positions[k] == j + i) {
+                    should_set = true;
+                    break;
+                }
+            }
+            g_assert_cmpint(should_set, ==, test_bit(j, (unsigned long *)buf));
+        }
+    }
+
+    g_free(buf);
+}
+
+static void test_hbitmap_serialize_zeroes(TestHBitmapData *data,
+                                          const void *unused)
+{
+    int i;
+    HBitmapIter iter;
+    int64_t next;
+    uint64_t min_l1 = MAX(L1, 64);
+    uint64_t positions[] = { 0, min_l1, L2, L3 - min_l1};
+    int num_positions = sizeof(positions) / sizeof(positions[0]);
+
+    hbitmap_test_init(data, L3, 0);
+
+    for (i = 0; i < num_positions; i++) {
+        hbitmap_set(data->hb, positions[i], L1);
+    }
+
+    for (i = 0; i < num_positions; i++) {
+        hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
+        hbitmap_iter_init(&iter, data->hb, 0);
+        next = hbitmap_iter_next(&iter);
+        if (i == num_positions - 1) {
+            g_assert_cmpint(next, ==, -1);
+        } else {
+            g_assert_cmpint(next, ==, positions[i + 1]);
+        }
+    }
+}
+
 static void hbitmap_test_add(const char *testpath,
                                    void (*test_func)(TestHBitmapData *data, const void *user_data))
 {
@@ -683,6 +940,21 @@ int main(int argc, char **argv)
                      test_hbitmap_truncate_grow_large);
     hbitmap_test_add("/hbitmap/truncate/shrink/large",
                      test_hbitmap_truncate_shrink_large);
+
+    hbitmap_test_add("/hbitmap/meta/zero", test_hbitmap_meta_zero);
+    hbitmap_test_add("/hbitmap/meta/one", test_hbitmap_meta_one);
+    hbitmap_test_add("/hbitmap/meta/byte", test_hbitmap_meta_byte);
+    hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word);
+    hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector);
+
+    hbitmap_test_add("/hbitmap/serialize/granularity",
+                     test_hbitmap_serialize_granularity);
+    hbitmap_test_add("/hbitmap/serialize/basic",
+                     test_hbitmap_serialize_basic);
+    hbitmap_test_add("/hbitmap/serialize/part",
+                     test_hbitmap_serialize_part);
+    hbitmap_test_add("/hbitmap/serialize/zeroes",
+                     test_hbitmap_serialize_zeroes);
     g_test_run();
 
     return 0;
diff --git a/tests/test-int128.c b/tests/test-int128.c
index 4390123bd3..b86a3c76e6 100644
--- a/tests/test-int128.c
+++ b/tests/test-int128.c
@@ -41,7 +41,7 @@ static Int128 expand(uint32_t x)
     uint64_t l, h;
     l = expand16(x & 65535);
     h = expand16(x >> 16);
-    return (Int128) {l, h};
+    return (Int128) int128_make128(l, h);
 };
 
 static void test_and(void)
@@ -54,8 +54,8 @@ static void test_and(void)
             Int128 b = expand(tests[j]);
             Int128 r = expand(tests[i] & tests[j]);
             Int128 s = int128_and(a, b);
-            g_assert_cmpuint(r.lo, ==, s.lo);
-            g_assert_cmpuint(r.hi, ==, s.hi);
+            g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+            g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
         }
     }
 }
@@ -70,8 +70,8 @@ static void test_add(void)
             Int128 b = expand(tests[j]);
             Int128 r = expand(tests[i] + tests[j]);
             Int128 s = int128_add(a, b);
-            g_assert_cmpuint(r.lo, ==, s.lo);
-            g_assert_cmpuint(r.hi, ==, s.hi);
+            g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+            g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
         }
     }
 }
@@ -86,8 +86,8 @@ static void test_sub(void)
             Int128 b = expand(tests[j]);
             Int128 r = expand(tests[i] - tests[j]);
             Int128 s = int128_sub(a, b);
-            g_assert_cmpuint(r.lo, ==, s.lo);
-            g_assert_cmpuint(r.hi, ==, s.hi);
+            g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+            g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
         }
     }
 }
@@ -100,8 +100,8 @@ static void test_neg(void)
         Int128 a = expand(tests[i]);
         Int128 r = expand(-tests[i]);
         Int128 s = int128_neg(a);
-        g_assert_cmpuint(r.lo, ==, s.lo);
-        g_assert_cmpuint(r.hi, ==, s.hi);
+        g_assert_cmpuint(int128_getlo(r), ==, int128_getlo(s));
+        g_assert_cmpuint(int128_gethi(r), ==, int128_gethi(s));
     }
 }
 
@@ -180,8 +180,8 @@ test_rshift_one(uint32_t x, int n, uint64_t h, uint64_t l)
 {
     Int128 a = expand(x);
     Int128 r = int128_rshift(a, n);
-    g_assert_cmpuint(r.lo, ==, l);
-    g_assert_cmpuint(r.hi, ==, h);
+    g_assert_cmpuint(int128_getlo(r), ==, l);
+    g_assert_cmpuint(int128_gethi(r), ==, h);
 }
 
 static void test_rshift(void)
diff --git a/tests/test-io-channel-socket.c b/tests/test-io-channel-socket.c
index f73e063d7d..aa88c3cf45 100644
--- a/tests/test-io-channel-socket.c
+++ b/tests/test-io-channel-socket.c
@@ -491,6 +491,37 @@ static void test_io_channel_unix_fd_pass(void)
     }
     g_free(fdrecv);
 }
+
+static void test_io_channel_unix_listen_cleanup(void)
+{
+    QIOChannelSocket *ioc;
+    struct sockaddr_un un;
+    int sock;
+
+#define TEST_SOCKET "test-io-channel-socket.sock"
+
+    ioc = qio_channel_socket_new();
+
+    /* Manually bind ioc without calling the qio api to avoid setting
+     * the LISTEN feature */
+    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    snprintf(un.sun_path, sizeof(un.sun_path), "%s", TEST_SOCKET);
+    unlink(TEST_SOCKET);
+    bind(sock, (struct sockaddr *)&un, sizeof(un));
+    ioc->fd = sock;
+    ioc->localAddrLen = sizeof(ioc->localAddr);
+    getsockname(sock, (struct sockaddr *)&ioc->localAddr,
+                &ioc->localAddrLen);
+
+    g_assert(g_file_test(TEST_SOCKET, G_FILE_TEST_EXISTS));
+    object_unref(OBJECT(ioc));
+    g_assert(g_file_test(TEST_SOCKET, G_FILE_TEST_EXISTS));
+
+    unlink(TEST_SOCKET);
+}
+
 #endif /* _WIN32 */
 
 
@@ -562,6 +593,8 @@ int main(int argc, char **argv)
                     test_io_channel_unix_async);
     g_test_add_func("/io/channel/socket/unix-fd-pass",
                     test_io_channel_unix_fd_pass);
+    g_test_add_func("/io/channel/socket/unix-listen-cleanup",
+                    test_io_channel_unix_listen_cleanup);
 #endif /* _WIN32 */
 
     return g_test_run();
diff --git a/tests/test-io-channel-tls.c b/tests/test-io-channel-tls.c
index 3c361a7bef..bd3ae2bf7a 100644
--- a/tests/test-io-channel-tls.c
+++ b/tests/test-io-channel-tls.c
@@ -27,6 +27,7 @@
 #include "io/channel-tls.h"
 #include "io/channel-socket.h"
 #include "io-channel-helpers.h"
+#include "crypto/init.h"
 #include "crypto/tlscredsx509.h"
 #include "qemu/acl.h"
 #include "qom/object_interfaces.h"
@@ -265,6 +266,8 @@ int main(int argc, char **argv)
 {
     int ret;
 
+    g_assert(qcrypto_init(NULL) == 0);
+
     module_call_init(MODULE_INIT_QOM);
     g_test_init(&argc, &argv, NULL);
     setenv("GNUTLS_FORCE_FIPS_MODE", "2", 1);
diff --git a/tests/test-iov.c b/tests/test-iov.c
index 46ae25efd6..a22d71fd2c 100644
--- a/tests/test-iov.c
+++ b/tests/test-iov.c
@@ -208,6 +208,9 @@ static void test_io(void)
                } while(k < j);
            }
        }
+       iov_free(iov, niov);
+       g_free(buf);
+       g_free(siov);
        exit(0);
 
     } else {
@@ -246,6 +249,10 @@ static void test_io(void)
                test_iov_bytes(iov, niov, i, j - i);
            }
         }
+
+       iov_free(iov, niov);
+       g_free(buf);
+       g_free(siov);
      }
 #endif
 }
diff --git a/tests/test-qga.c b/tests/test-qga.c
index dac8fb8c0a..868b02a40f 100644
--- a/tests/test-qga.c
+++ b/tests/test-qga.c
@@ -198,6 +198,26 @@ static void test_qga_ping(gconstpointer fix)
     QDECREF(ret);
 }
 
+static void test_qga_invalid_args(gconstpointer fix)
+{
+    const TestFixture *fixture = fix;
+    QDict *ret, *error;
+    const gchar *class, *desc;
+
+    ret = qmp_fd(fixture->fd, "{'execute': 'guest-ping', "
+                 "'arguments': {'foo': 42 }}");
+    g_assert_nonnull(ret);
+
+    error = qdict_get_qdict(ret, "error");
+    class = qdict_get_try_str(error, "class");
+    desc = qdict_get_try_str(error, "desc");
+
+    g_assert_cmpstr(class, ==, "GenericError");
+    g_assert_cmpstr(desc, ==, "QMP input object member 'foo' is unexpected");
+
+    QDECREF(ret);
+}
+
 static void test_qga_invalid_cmd(gconstpointer fix)
 {
     const TestFixture *fixture = fix;
@@ -398,6 +418,7 @@ static void test_qga_file_ops(gconstpointer fix)
     /* check content */
     path = g_build_filename(fixture->test_dir, "foo", NULL);
     f = fopen(path, "r");
+    g_free(path);
     g_assert_nonnull(f);
     count = fread(tmp, 1, sizeof(tmp), f);
     g_assert_cmpint(count, ==, sizeof(helloworld));
@@ -700,7 +721,9 @@ static void test_qga_config(gconstpointer data)
     cwd = g_get_current_dir();
     cmd = g_strdup_printf("%s%cqemu-ga -D",
                           cwd, G_DIR_SEPARATOR);
+    g_free(cwd);
     g_shell_parse_argv(cmd, NULL, &argv, &error);
+    g_free(cmd);
     g_assert_no_error(error);
 
     env[0] = g_strdup_printf("QGA_CONF=tests%cdata%ctest-qga-config",
@@ -708,6 +731,8 @@ static void test_qga_config(gconstpointer data)
     env[1] = NULL;
     g_spawn_sync(NULL, argv, env, 0,
                  NULL, NULL, &out, &err, &status, &error);
+    g_strfreev(argv);
+
     g_assert_no_error(error);
     g_assert_cmpstr(err, ==, "");
     g_assert_cmpint(status, ==, 0);
@@ -812,6 +837,7 @@ static void test_qga_guest_exec(gconstpointer fix)
     int64_t pid, now, exitcode;
     gsize len;
     bool exited;
+    char *cmd;
 
     /* exec 'echo foo bar' */
     ret = qmp_fd(fixture->fd, "{'execute': 'guest-exec', 'arguments': {"
@@ -826,9 +852,10 @@ static void test_qga_guest_exec(gconstpointer fix)
 
     /* wait for completion */
     now = g_get_monotonic_time();
+    cmd = g_strdup_printf("{'execute': 'guest-exec-status',"
+                          " 'arguments': { 'pid': %" PRId64 " } }", pid);
     do {
-        ret = qmp_fd(fixture->fd, "{'execute': 'guest-exec-status',"
-                     " 'arguments': { 'pid': %" PRId64 "  } }", pid);
+        ret = qmp_fd(fixture->fd, cmd);
         g_assert_nonnull(ret);
         val = qdict_get_qdict(ret, "return");
         exited = qdict_get_bool(val, "exited");
@@ -838,6 +865,7 @@ static void test_qga_guest_exec(gconstpointer fix)
     } while (!exited &&
              g_get_monotonic_time() < now + 5 * G_TIME_SPAN_SECOND);
     g_assert(exited);
+    g_free(cmd);
 
     /* check stdout */
     exitcode = qdict_get_int(val, "exitcode");
@@ -906,6 +934,7 @@ int main(int argc, char **argv)
     g_test_add_data_func("/qga/file-write-read", &fix, test_qga_file_write_read);
     g_test_add_data_func("/qga/get-time", &fix, test_qga_get_time);
     g_test_add_data_func("/qga/invalid-cmd", &fix, test_qga_invalid_cmd);
+    g_test_add_data_func("/qga/invalid-args", &fix, test_qga_invalid_args);
     g_test_add_data_func("/qga/fsfreeze-status", &fix,
                          test_qga_fsfreeze_status);
 
diff --git a/tests/test-qht.c b/tests/test-qht.c
index 46a64b6731..9b7423abb6 100644
--- a/tests/test-qht.c
+++ b/tests/test-qht.c
@@ -6,6 +6,7 @@
  */
 #include "qemu/osdep.h"
 #include "qemu/qht.h"
+#include "qemu/rcu.h"
 
 #define N 5000
 
@@ -51,6 +52,7 @@ static void check(int a, int b, bool expected)
     struct qht_stats stats;
     int i;
 
+    rcu_read_lock();
     for (i = a; i < b; i++) {
         void *p;
         uint32_t hash;
@@ -61,6 +63,8 @@ static void check(int a, int b, bool expected)
         p = qht_lookup(&ht, is_equal, &val, hash);
         g_assert_true(!!p == expected);
     }
+    rcu_read_unlock();
+
     qht_statistics_init(&ht, &stats);
     if (stats.used_head_buckets) {
         g_assert_cmpfloat(qdist_avg(&stats.chain), >=, 1.0);
diff --git a/tests/test-qmp-commands.c b/tests/test-qmp-commands.c
index 261fd9e313..ff944811e8 100644
--- a/tests/test-qmp-commands.c
+++ b/tests/test-qmp-commands.c
@@ -4,7 +4,7 @@
 #include "test-qmp-commands.h"
 #include "qapi/qmp/dispatch.h"
 #include "qemu/module.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "tests/test-qapi-types.h"
 #include "tests/test-qapi-visit.h"
 
@@ -106,6 +106,7 @@ static void test_dispatch_cmd(void)
 static void test_dispatch_cmd_failure(void)
 {
     QDict *req = qdict_new();
+    QDict *args = qdict_new();
     QObject *resp;
 
     qdict_put_obj(req, "execute", QOBJECT(qstring_from_str("user_def_cmd2")));
@@ -116,6 +117,20 @@ static void test_dispatch_cmd_failure(void)
 
     qobject_decref(resp);
     QDECREF(req);
+
+    /* check that with extra arguments it throws an error */
+    req = qdict_new();
+    qdict_put(args, "a", qint_from_int(66));
+    qdict_put(req, "arguments", args);
+
+    qdict_put_obj(req, "execute", QOBJECT(qstring_from_str("user_def_cmd")));
+
+    resp = qmp_dispatch(QOBJECT(req));
+    assert(resp != NULL);
+    assert(qdict_haskey(qobject_to_qdict(resp), "error"));
+
+    qobject_decref(resp);
+    QDECREF(req);
 }
 
 static QObject *test_qmp_dispatch(QDict *req)
@@ -229,7 +244,7 @@ static void test_dealloc_partial(void)
         ud2_dict = qdict_new();
         qdict_put_obj(ud2_dict, "string0", QOBJECT(qstring_from_str(text)));
 
-        v = qmp_input_visitor_new(QOBJECT(ud2_dict), true);
+        v = qobject_input_visitor_new(QOBJECT(ud2_dict), true);
         visit_type_UserDefTwo(v, NULL, &ud2, &err);
         visit_free(v);
         QDECREF(ud2_dict);
diff --git a/tests/test-qmp-input-strict.c b/tests/test-qobject-input-strict.c
similarity index 86%
rename from tests/test-qmp-input-strict.c
rename to tests/test-qobject-input-strict.c
index 814550ac71..4087ea3dd3 100644
--- a/tests/test-qmp-input-strict.c
+++ b/tests/test-qobject-input-strict.c
@@ -1,5 +1,5 @@
 /*
- * QMP Input Visitor unit-tests (strict mode).
+ * QObject Input Visitor unit-tests (strict mode).
  *
  * Copyright (C) 2011-2012, 2015 Red Hat Inc.
  *
@@ -15,7 +15,7 @@
 
 #include "qemu-common.h"
 #include "qapi/error.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "test-qapi-types.h"
 #include "test-qapi-visit.h"
 #include "qapi/qmp/types.h"
@@ -53,7 +53,7 @@ static Visitor *validate_test_init_internal(TestInputVisitorData *data,
     data->obj = qobject_from_jsonv(json_string, ap);
     g_assert(data->obj);
 
-    data->qiv = qmp_input_visitor_new(data->obj, true);
+    data->qiv = qobject_input_visitor_new(data->obj, true);
     g_assert(data->qiv);
     return data->qiv;
 }
@@ -193,6 +193,50 @@ static void test_validate_fail_struct_nested(TestInputVisitorData *data,
     g_assert(!udp);
 }
 
+static void test_validate_fail_struct_missing(TestInputVisitorData *data,
+                                              const void *unused)
+{
+    Error *err = NULL;
+    Visitor *v;
+    QObject *any;
+    GenericAlternate *alt;
+    bool present;
+    int en;
+    int64_t i64;
+    uint32_t u32;
+    int8_t i8;
+    char *str;
+    double dbl;
+
+    v = validate_test_init(data, "{}");
+    visit_start_struct(v, NULL, NULL, 0, &error_abort);
+    visit_start_struct(v, "struct", NULL, 0, &err);
+    error_free_or_abort(&err);
+    visit_start_list(v, "list", NULL, 0, &err);
+    error_free_or_abort(&err);
+    visit_start_alternate(v, "alternate", &alt, sizeof(*alt), false, &err);
+    error_free_or_abort(&err);
+    visit_optional(v, "optional", &present);
+    g_assert(!present);
+    visit_type_enum(v, "enum", &en, EnumOne_lookup, &err);
+    error_free_or_abort(&err);
+    visit_type_int(v, "i64", &i64, &err);
+    error_free_or_abort(&err);
+    visit_type_uint32(v, "u32", &u32, &err);
+    error_free_or_abort(&err);
+    visit_type_int8(v, "i8", &i8, &err);
+    error_free_or_abort(&err);
+    visit_type_str(v, "i8", &str, &err);
+    error_free_or_abort(&err);
+    visit_type_number(v, "dbl", &dbl, &err);
+    error_free_or_abort(&err);
+    visit_type_any(v, "any", &any, &err);
+    error_free_or_abort(&err);
+    visit_type_null(v, "null", &err);
+    error_free_or_abort(&err);
+    visit_end_struct(v, NULL);
+}
+
 static void test_validate_fail_list(TestInputVisitorData *data,
                                      const void *unused)
 {
@@ -316,6 +360,8 @@ int main(int argc, char **argv)
                       &testdata, test_validate_fail_struct);
     validate_test_add("/visitor/input-strict/fail/struct-nested",
                       &testdata, test_validate_fail_struct_nested);
+    validate_test_add("/visitor/input-strict/fail/struct-missing",
+                      &testdata, test_validate_fail_struct_missing);
     validate_test_add("/visitor/input-strict/fail/list",
                       &testdata, test_validate_fail_list);
     validate_test_add("/visitor/input-strict/fail/union-flat",
diff --git a/tests/test-qmp-input-visitor.c b/tests/test-qobject-input-visitor.c
similarity index 91%
rename from tests/test-qmp-input-visitor.c
rename to tests/test-qobject-input-visitor.c
index f583dce3ed..945404a9e0 100644
--- a/tests/test-qmp-input-visitor.c
+++ b/tests/test-qobject-input-visitor.c
@@ -1,5 +1,5 @@
 /*
- * QMP Input Visitor unit-tests.
+ * QObject Input Visitor unit-tests.
  *
  * Copyright (C) 2011-2016 Red Hat Inc.
  *
@@ -14,7 +14,7 @@
 
 #include "qemu-common.h"
 #include "qapi/error.h"
-#include "qapi/qmp-input-visitor.h"
+#include "qapi/qobject-input-visitor.h"
 #include "test-qapi-types.h"
 #include "test-qapi-visit.h"
 #include "qapi/qmp/types.h"
@@ -49,7 +49,7 @@ static Visitor *visitor_input_test_init_internal(TestInputVisitorData *data,
     data->obj = qobject_from_jsonv(json_string, ap);
     g_assert(data->obj);
 
-    data->qiv = qmp_input_visitor_new(data->obj, false);
+    data->qiv = qobject_input_visitor_new(data->obj, false);
     g_assert(data->qiv);
     return data->qiv;
 }
@@ -83,10 +83,11 @@ static Visitor *visitor_input_test_init_raw(TestInputVisitorData *data,
 static void test_visitor_in_int(TestInputVisitorData *data,
                                 const void *unused)
 {
-    int64_t res = 0, value = -42;
+    int64_t res = 0;
+    int value = -42;
     Visitor *v;
 
-    v = visitor_input_test_init(data, "%" PRId64, value);
+    v = visitor_input_test_init(data, "%d", value);
 
     visit_type_int(v, NULL, &res, &error_abort);
     g_assert_cmpint(res, ==, value);
@@ -747,10 +748,11 @@ static void test_visitor_in_native_list_number(TestInputVisitorData *data,
 }
 
 static void input_visitor_test_add(const char *testpath,
-                                   TestInputVisitorData *data,
-                                   void (*test_func)(TestInputVisitorData *data, const void *user_data))
+                                   const void *user_data,
+                                   void (*test_func)(TestInputVisitorData *data,
+                                                     const void *user_data))
 {
-    g_test_add(testpath, TestInputVisitorData, data, NULL, test_func,
+    g_test_add(testpath, TestInputVisitorData, user_data, NULL, test_func,
                visitor_input_teardown);
 }
 
@@ -833,77 +835,64 @@ static void test_visitor_in_wrong_type(TestInputVisitorData *data,
 
 int main(int argc, char **argv)
 {
-    TestInputVisitorData in_visitor_data;
-
     g_test_init(&argc, &argv, NULL);
 
     input_visitor_test_add("/visitor/input/int",
-                           &in_visitor_data, test_visitor_in_int);
+                           NULL, test_visitor_in_int);
     input_visitor_test_add("/visitor/input/int_overflow",
-                           &in_visitor_data, test_visitor_in_int_overflow);
+                           NULL, test_visitor_in_int_overflow);
     input_visitor_test_add("/visitor/input/bool",
-                           &in_visitor_data, test_visitor_in_bool);
+                           NULL, test_visitor_in_bool);
     input_visitor_test_add("/visitor/input/number",
-                           &in_visitor_data, test_visitor_in_number);
+                           NULL, test_visitor_in_number);
     input_visitor_test_add("/visitor/input/string",
-                           &in_visitor_data, test_visitor_in_string);
+                           NULL, test_visitor_in_string);
     input_visitor_test_add("/visitor/input/enum",
-                           &in_visitor_data, test_visitor_in_enum);
+                           NULL, test_visitor_in_enum);
     input_visitor_test_add("/visitor/input/struct",
-                           &in_visitor_data, test_visitor_in_struct);
+                           NULL, test_visitor_in_struct);
     input_visitor_test_add("/visitor/input/struct-nested",
-                           &in_visitor_data, test_visitor_in_struct_nested);
+                           NULL, test_visitor_in_struct_nested);
     input_visitor_test_add("/visitor/input/list",
-                           &in_visitor_data, test_visitor_in_list);
+                           NULL, test_visitor_in_list);
     input_visitor_test_add("/visitor/input/any",
-                           &in_visitor_data, test_visitor_in_any);
+                           NULL, test_visitor_in_any);
     input_visitor_test_add("/visitor/input/null",
-                           &in_visitor_data, test_visitor_in_null);
+                           NULL, test_visitor_in_null);
     input_visitor_test_add("/visitor/input/union-flat",
-                           &in_visitor_data, test_visitor_in_union_flat);
+                           NULL, test_visitor_in_union_flat);
     input_visitor_test_add("/visitor/input/alternate",
-                           &in_visitor_data, test_visitor_in_alternate);
+                           NULL, test_visitor_in_alternate);
     input_visitor_test_add("/visitor/input/errors",
-                           &in_visitor_data, test_visitor_in_errors);
+                           NULL, test_visitor_in_errors);
     input_visitor_test_add("/visitor/input/wrong-type",
-                           &in_visitor_data, test_visitor_in_wrong_type);
+                           NULL, test_visitor_in_wrong_type);
     input_visitor_test_add("/visitor/input/alternate-number",
-                           &in_visitor_data, test_visitor_in_alternate_number);
+                           NULL, test_visitor_in_alternate_number);
     input_visitor_test_add("/visitor/input/native_list/int",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_int);
+                           NULL, test_visitor_in_native_list_int);
     input_visitor_test_add("/visitor/input/native_list/int8",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_int8);
+                           NULL, test_visitor_in_native_list_int8);
     input_visitor_test_add("/visitor/input/native_list/int16",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_int16);
+                           NULL, test_visitor_in_native_list_int16);
     input_visitor_test_add("/visitor/input/native_list/int32",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_int32);
+                           NULL, test_visitor_in_native_list_int32);
     input_visitor_test_add("/visitor/input/native_list/int64",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_int64);
+                           NULL, test_visitor_in_native_list_int64);
     input_visitor_test_add("/visitor/input/native_list/uint8",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_uint8);
+                           NULL, test_visitor_in_native_list_uint8);
     input_visitor_test_add("/visitor/input/native_list/uint16",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_uint16);
+                           NULL, test_visitor_in_native_list_uint16);
     input_visitor_test_add("/visitor/input/native_list/uint32",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_uint32);
+                           NULL, test_visitor_in_native_list_uint32);
     input_visitor_test_add("/visitor/input/native_list/uint64",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_uint64);
+                           NULL, test_visitor_in_native_list_uint64);
     input_visitor_test_add("/visitor/input/native_list/bool",
-                           &in_visitor_data, test_visitor_in_native_list_bool);
+                           NULL, test_visitor_in_native_list_bool);
     input_visitor_test_add("/visitor/input/native_list/str",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_string);
+                           NULL, test_visitor_in_native_list_string);
     input_visitor_test_add("/visitor/input/native_list/number",
-                           &in_visitor_data,
-                           test_visitor_in_native_list_number);
+                           NULL, test_visitor_in_native_list_number);
 
     g_test_run();
 
diff --git a/tests/test-qmp-output-visitor.c b/tests/test-qobject-output-visitor.c
similarity index 99%
rename from tests/test-qmp-output-visitor.c
rename to tests/test-qobject-output-visitor.c
index 513d71f0d1..4e2d79c5d1 100644
--- a/tests/test-qmp-output-visitor.c
+++ b/tests/test-qobject-output-visitor.c
@@ -1,5 +1,5 @@
 /*
- * QMP Output Visitor unit-tests.
+ * QObject Output Visitor unit-tests.
  *
  * Copyright (C) 2011-2016 Red Hat Inc.
  *
@@ -14,7 +14,7 @@
 
 #include "qemu-common.h"
 #include "qapi/error.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "test-qapi-types.h"
 #include "test-qapi-visit.h"
 #include "qapi/qmp/types.h"
@@ -28,7 +28,7 @@ typedef struct TestOutputVisitorData {
 static void visitor_output_setup(TestOutputVisitorData *data,
                                  const void *unused)
 {
-    data->ov = qmp_output_visitor_new(&data->obj);
+    data->ov = qobject_output_visitor_new(&data->obj);
     g_assert(data->ov);
 }
 
diff --git a/tests/test-replication.c b/tests/test-replication.c
new file mode 100644
index 0000000000..fac2da3f58
--- /dev/null
+++ b/tests/test-replication.c
@@ -0,0 +1,578 @@
+/*
+ * Block replication tests
+ *
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Author: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "replication.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+
+#define IMG_SIZE (64 * 1024 * 1024)
+
+/* primary */
+#define P_ID "primary-id"
+static char p_local_disk[] = "/tmp/p_local_disk.XXXXXX";
+
+/* secondary */
+#define S_ID "secondary-id"
+#define S_LOCAL_DISK_ID "secondary-local-disk-id"
+static char s_local_disk[] = "/tmp/s_local_disk.XXXXXX";
+static char s_active_disk[] = "/tmp/s_active_disk.XXXXXX";
+static char s_hidden_disk[] = "/tmp/s_hidden_disk.XXXXXX";
+
+/* FIXME: steal from blockdev.c */
+QemuOptsList qemu_drive_opts = {
+    .name = "drive",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
+    .desc = {
+        { /* end of list */ }
+    },
+};
+
+#define NOT_DONE 0x7fffffff
+
+static void blk_rw_done(void *opaque, int ret)
+{
+    *(int *)opaque = ret;
+}
+
+static void test_blk_read(BlockBackend *blk, long pattern,
+                          int64_t pattern_offset, int64_t pattern_count,
+                          int64_t offset, int64_t count,
+                          bool expect_failed)
+{
+    void *pattern_buf = NULL;
+    QEMUIOVector qiov;
+    void *cmp_buf = NULL;
+    int async_ret = NOT_DONE;
+
+    if (pattern) {
+        cmp_buf = g_malloc(pattern_count);
+        memset(cmp_buf, pattern, pattern_count);
+    }
+
+    pattern_buf = g_malloc(count);
+    if (pattern) {
+        memset(pattern_buf, pattern, count);
+    } else {
+        memset(pattern_buf, 0x00, count);
+    }
+
+    qemu_iovec_init(&qiov, 1);
+    qemu_iovec_add(&qiov, pattern_buf, count);
+
+    blk_aio_preadv(blk, offset, &qiov, 0, blk_rw_done, &async_ret);
+    while (async_ret == NOT_DONE) {
+        main_loop_wait(false);
+    }
+
+    if (expect_failed) {
+        g_assert(async_ret != 0);
+    } else {
+        g_assert(async_ret == 0);
+        if (pattern) {
+            g_assert(memcmp(pattern_buf + pattern_offset,
+                            cmp_buf, pattern_count) <= 0);
+        }
+    }
+
+    g_free(pattern_buf);
+    g_free(cmp_buf);
+    qemu_iovec_destroy(&qiov);
+}
+
+static void test_blk_write(BlockBackend *blk, long pattern, int64_t offset,
+                           int64_t count, bool expect_failed)
+{
+    void *pattern_buf = NULL;
+    QEMUIOVector qiov;
+    int async_ret = NOT_DONE;
+
+    pattern_buf = g_malloc(count);
+    if (pattern) {
+        memset(pattern_buf, pattern, count);
+    } else {
+        memset(pattern_buf, 0x00, count);
+    }
+
+    qemu_iovec_init(&qiov, 1);
+    qemu_iovec_add(&qiov, pattern_buf, count);
+
+    blk_aio_pwritev(blk, offset, &qiov, 0, blk_rw_done, &async_ret);
+    while (async_ret == NOT_DONE) {
+        main_loop_wait(false);
+    }
+
+    if (expect_failed) {
+        g_assert(async_ret != 0);
+    } else {
+        g_assert(async_ret == 0);
+    }
+
+    g_free(pattern_buf);
+    qemu_iovec_destroy(&qiov);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ */
+static void make_temp(char *template)
+{
+    int fd;
+
+    fd = mkstemp(template);
+    g_assert(fd >= 0);
+    close(fd);
+}
+
+static void prepare_imgs(void)
+{
+    Error *local_err = NULL;
+
+    make_temp(p_local_disk);
+    make_temp(s_local_disk);
+    make_temp(s_active_disk);
+    make_temp(s_hidden_disk);
+
+    /* Primary */
+    bdrv_img_create(p_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
+                    BDRV_O_RDWR, &local_err, true);
+    g_assert(!local_err);
+
+    /* Secondary */
+    bdrv_img_create(s_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
+                    BDRV_O_RDWR, &local_err, true);
+    g_assert(!local_err);
+    bdrv_img_create(s_active_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
+                    BDRV_O_RDWR, &local_err, true);
+    g_assert(!local_err);
+    bdrv_img_create(s_hidden_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
+                    BDRV_O_RDWR, &local_err, true);
+    g_assert(!local_err);
+}
+
+static void cleanup_imgs(void)
+{
+    /* Primary */
+    unlink(p_local_disk);
+
+    /* Secondary */
+    unlink(s_local_disk);
+    unlink(s_active_disk);
+    unlink(s_hidden_disk);
+}
+
+static BlockBackend *start_primary(void)
+{
+    BlockBackend *blk;
+    QemuOpts *opts;
+    QDict *qdict;
+    Error *local_err = NULL;
+    char *cmdline;
+
+    cmdline = g_strdup_printf("driver=replication,mode=primary,node-name=xxx,"
+                              "file.driver=qcow2,file.file.filename=%s"
+                              , p_local_disk);
+    opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false);
+    g_free(cmdline);
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
+    blk = blk_new_open(NULL, NULL, qdict, BDRV_O_RDWR, &local_err);
+    g_assert(blk);
+    g_assert(!local_err);
+
+    monitor_add_blk(blk, P_ID, &local_err);
+    g_assert(!local_err);
+
+    qemu_opts_del(opts);
+
+    return blk;
+}
+
+static void teardown_primary(void)
+{
+    BlockBackend *blk;
+
+    /* remove P_ID */
+    blk = blk_by_name(P_ID);
+    assert(blk);
+
+    monitor_remove_blk(blk);
+    blk_unref(blk);
+}
+
+static void test_primary_read(void)
+{
+    BlockBackend *blk;
+
+    blk = start_primary();
+
+    /* read from 0 to IMG_SIZE */
+    test_blk_read(blk, 0, 0, IMG_SIZE, 0, IMG_SIZE, true);
+
+    teardown_primary();
+}
+
+static void test_primary_write(void)
+{
+    BlockBackend *blk;
+
+    blk = start_primary();
+
+    /* write from 0 to IMG_SIZE */
+    test_blk_write(blk, 0, 0, IMG_SIZE, true);
+
+    teardown_primary();
+}
+
+static void test_primary_start(void)
+{
+    BlockBackend *blk = NULL;
+    Error *local_err = NULL;
+
+    blk = start_primary();
+
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    g_assert(!local_err);
+
+    /* read from 0 to IMG_SIZE */
+    test_blk_read(blk, 0, 0, IMG_SIZE, 0, IMG_SIZE, true);
+
+    /* write 0x22 from 0 to IMG_SIZE */
+    test_blk_write(blk, 0x22, 0, IMG_SIZE, false);
+
+    teardown_primary();
+}
+
+static void test_primary_stop(void)
+{
+    Error *local_err = NULL;
+    bool failover = true;
+
+    start_primary();
+
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    g_assert(!local_err);
+
+    replication_stop_all(failover, &local_err);
+    g_assert(!local_err);
+
+    teardown_primary();
+}
+
+static void test_primary_do_checkpoint(void)
+{
+    Error *local_err = NULL;
+
+    start_primary();
+
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    g_assert(!local_err);
+
+    replication_do_checkpoint_all(&local_err);
+    g_assert(!local_err);
+
+    teardown_primary();
+}
+
+static void test_primary_get_error_all(void)
+{
+    Error *local_err = NULL;
+
+    start_primary();
+
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    g_assert(!local_err);
+
+    replication_get_error_all(&local_err);
+    g_assert(!local_err);
+
+    teardown_primary();
+}
+
+static BlockBackend *start_secondary(void)
+{
+    QemuOpts *opts;
+    QDict *qdict;
+    BlockBackend *blk;
+    char *cmdline;
+    Error *local_err = NULL;
+
+    /* add s_local_disk and forge S_LOCAL_DISK_ID */
+    cmdline = g_strdup_printf("file.filename=%s,driver=qcow2", s_local_disk);
+    opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false);
+    g_free(cmdline);
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
+    blk = blk_new_open(NULL, NULL, qdict, BDRV_O_RDWR, &local_err);
+    assert(blk);
+    monitor_add_blk(blk, S_LOCAL_DISK_ID, &local_err);
+    g_assert(!local_err);
+
+    /* format s_local_disk with pattern "0x11" */
+    test_blk_write(blk, 0x11, 0, IMG_SIZE, false);
+
+    qemu_opts_del(opts);
+
+    /* add S_(ACTIVE/HIDDEN)_DISK and forge S_ID */
+    cmdline = g_strdup_printf("driver=replication,mode=secondary,top-id=%s,"
+                              "file.driver=qcow2,file.file.filename=%s,"
+                              "file.backing.driver=qcow2,"
+                              "file.backing.file.filename=%s,"
+                              "file.backing.backing=%s"
+                              , S_ID, s_active_disk, s_hidden_disk
+                              , S_LOCAL_DISK_ID);
+    opts = qemu_opts_parse_noisily(&qemu_drive_opts, cmdline, false);
+    g_free(cmdline);
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
+    qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
+    blk = blk_new_open(NULL, NULL, qdict, BDRV_O_RDWR, &local_err);
+    assert(blk);
+    monitor_add_blk(blk, S_ID, &local_err);
+    g_assert(!local_err);
+
+    qemu_opts_del(opts);
+
+    return blk;
+}
+
+static void teardown_secondary(void)
+{
+    /* only need to destroy two BBs */
+    BlockBackend *blk;
+
+    /* remove S_LOCAL_DISK_ID */
+    blk = blk_by_name(S_LOCAL_DISK_ID);
+    assert(blk);
+
+    monitor_remove_blk(blk);
+    blk_unref(blk);
+
+    /* remove S_ID */
+    blk = blk_by_name(S_ID);
+    assert(blk);
+
+    monitor_remove_blk(blk);
+    blk_unref(blk);
+}
+
+static void test_secondary_read(void)
+{
+    BlockBackend *blk;
+
+    blk = start_secondary();
+
+    /* read from 0 to IMG_SIZE */
+    test_blk_read(blk, 0, 0, IMG_SIZE, 0, IMG_SIZE, true);
+
+    teardown_secondary();
+}
+
+static void test_secondary_write(void)
+{
+    BlockBackend *blk;
+
+    blk = start_secondary();
+
+    /* write from 0 to IMG_SIZE */
+    test_blk_write(blk, 0, 0, IMG_SIZE, true);
+
+    teardown_secondary();
+}
+
+static void test_secondary_start(void)
+{
+    BlockBackend *top_blk, *local_blk;
+    Error *local_err = NULL;
+    bool failover = true;
+
+    top_blk = start_secondary();
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    g_assert(!local_err);
+
+    /* read from s_local_disk (0, IMG_SIZE) */
+    test_blk_read(top_blk, 0x11, 0, IMG_SIZE, 0, IMG_SIZE, false);
+
+    /* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
+    local_blk = blk_by_name(S_LOCAL_DISK_ID);
+    test_blk_write(local_blk, 0x22, IMG_SIZE / 2, IMG_SIZE / 2, false);
+
+    /* replication will backup s_local_disk to s_hidden_disk */
+    test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
+                  IMG_SIZE / 2, 0, IMG_SIZE, false);
+
+    /* write 0x33 to s_active_disk (0, IMG_SIZE / 2) */
+    test_blk_write(top_blk, 0x33, 0, IMG_SIZE / 2, false);
+
+    /* read from s_active_disk (0, IMG_SIZE/2) */
+    test_blk_read(top_blk, 0x33, 0, IMG_SIZE / 2,
+                  0, IMG_SIZE / 2, false);
+
+    /* unblock top_bs */
+    replication_stop_all(failover, &local_err);
+    g_assert(!local_err);
+
+    teardown_secondary();
+}
+
+
+static void test_secondary_stop(void)
+{
+    BlockBackend *top_blk, *local_blk;
+    Error *local_err = NULL;
+    bool failover = true;
+
+    top_blk = start_secondary();
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    g_assert(!local_err);
+
+    /* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
+    local_blk = blk_by_name(S_LOCAL_DISK_ID);
+    test_blk_write(local_blk, 0x22, IMG_SIZE / 2, IMG_SIZE / 2, false);
+
+    /* replication will backup s_local_disk to s_hidden_disk */
+    test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
+                  IMG_SIZE / 2, 0, IMG_SIZE, false);
+
+    /* write 0x33 to s_active_disk (0, IMG_SIZE / 2) */
+    test_blk_write(top_blk, 0x33, 0, IMG_SIZE / 2, false);
+
+    /* do active commit */
+    replication_stop_all(failover, &local_err);
+    g_assert(!local_err);
+
+    /* read from s_local_disk (0, IMG_SIZE / 2) */
+    test_blk_read(top_blk, 0x33, 0, IMG_SIZE / 2,
+                  0, IMG_SIZE / 2, false);
+
+
+    /* read from s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
+    test_blk_read(top_blk, 0x22, IMG_SIZE / 2,
+                  IMG_SIZE / 2, 0, IMG_SIZE, false);
+
+    teardown_secondary();
+}
+
+static void test_secondary_do_checkpoint(void)
+{
+    BlockBackend *top_blk, *local_blk;
+    Error *local_err = NULL;
+    bool failover = true;
+
+    top_blk = start_secondary();
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    g_assert(!local_err);
+
+    /* write 0x22 to s_local_disk (IMG_SIZE / 2, IMG_SIZE) */
+    local_blk = blk_by_name(S_LOCAL_DISK_ID);
+    test_blk_write(local_blk, 0x22, IMG_SIZE / 2,
+                   IMG_SIZE / 2, false);
+
+    /* replication will backup s_local_disk to s_hidden_disk */
+    test_blk_read(top_blk, 0x11, IMG_SIZE / 2,
+                  IMG_SIZE / 2, 0, IMG_SIZE, false);
+
+    replication_do_checkpoint_all(&local_err);
+    g_assert(!local_err);
+
+    /* after checkpoint, read pattern 0x22 from s_local_disk */
+    test_blk_read(top_blk, 0x22, IMG_SIZE / 2,
+                  IMG_SIZE / 2, 0, IMG_SIZE, false);
+
+    /* unblock top_bs */
+    replication_stop_all(failover, &local_err);
+    g_assert(!local_err);
+
+    teardown_secondary();
+}
+
+static void test_secondary_get_error_all(void)
+{
+    Error *local_err = NULL;
+    bool failover = true;
+
+    start_secondary();
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    g_assert(!local_err);
+
+    replication_get_error_all(&local_err);
+    g_assert(!local_err);
+
+    /* unblock top_bs */
+    replication_stop_all(failover, &local_err);
+    g_assert(!local_err);
+
+    teardown_secondary();
+}
+
+static void sigabrt_handler(int signo)
+{
+    cleanup_imgs();
+}
+
+static void setup_sigabrt_handler(void)
+{
+    struct sigaction sigact;
+
+    sigact = (struct sigaction) {
+        .sa_handler = sigabrt_handler,
+        .sa_flags = SA_RESETHAND,
+    };
+    sigemptyset(&sigact.sa_mask);
+    sigaction(SIGABRT, &sigact, NULL);
+}
+
+int main(int argc, char **argv)
+{
+    int ret;
+    qemu_init_main_loop(&error_fatal);
+    bdrv_init();
+
+    g_test_init(&argc, &argv, NULL);
+    setup_sigabrt_handler();
+
+    prepare_imgs();
+
+    /* Primary */
+    g_test_add_func("/replication/primary/read",    test_primary_read);
+    g_test_add_func("/replication/primary/write",   test_primary_write);
+    g_test_add_func("/replication/primary/start",   test_primary_start);
+    g_test_add_func("/replication/primary/stop",    test_primary_stop);
+    g_test_add_func("/replication/primary/do_checkpoint",
+                    test_primary_do_checkpoint);
+    g_test_add_func("/replication/primary/get_error_all",
+                    test_primary_get_error_all);
+
+    /* Secondary */
+    g_test_add_func("/replication/secondary/read",  test_secondary_read);
+    g_test_add_func("/replication/secondary/write", test_secondary_write);
+    g_test_add_func("/replication/secondary/start", test_secondary_start);
+    g_test_add_func("/replication/secondary/stop",  test_secondary_stop);
+    g_test_add_func("/replication/secondary/do_checkpoint",
+                    test_secondary_do_checkpoint);
+    g_test_add_func("/replication/secondary/get_error_all",
+                    test_secondary_get_error_all);
+
+    ret = g_test_run();
+
+    cleanup_imgs();
+
+    return ret;
+}
diff --git a/tests/test-rfifolock.c b/tests/test-rfifolock.c
deleted file mode 100644
index 471a81114d..0000000000
--- a/tests/test-rfifolock.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * RFifoLock tests
- *
- * Copyright Red Hat, Inc. 2013
- *
- * Authors:
- *  Stefan Hajnoczi    <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/rfifolock.h"
-
-static void test_nesting(void)
-{
-    RFifoLock lock;
-
-    /* Trivial test, ensure the lock is recursive */
-    rfifolock_init(&lock, NULL, NULL);
-    rfifolock_lock(&lock);
-    rfifolock_lock(&lock);
-    rfifolock_lock(&lock);
-    rfifolock_unlock(&lock);
-    rfifolock_unlock(&lock);
-    rfifolock_unlock(&lock);
-    rfifolock_destroy(&lock);
-}
-
-typedef struct {
-    RFifoLock lock;
-    int fd[2];
-} CallbackTestData;
-
-static void rfifolock_cb(void *opaque)
-{
-    CallbackTestData *data = opaque;
-    int ret;
-    char c = 0;
-
-    ret = write(data->fd[1], &c, sizeof(c));
-    g_assert(ret == 1);
-}
-
-static void *callback_thread(void *opaque)
-{
-    CallbackTestData *data = opaque;
-
-    /* The other thread holds the lock so the contention callback will be
-     * invoked...
-     */
-    rfifolock_lock(&data->lock);
-    rfifolock_unlock(&data->lock);
-    return NULL;
-}
-
-static void test_callback(void)
-{
-    CallbackTestData data;
-    QemuThread thread;
-    int ret;
-    char c;
-
-    rfifolock_init(&data.lock, rfifolock_cb, &data);
-    ret = qemu_pipe(data.fd);
-    g_assert(ret == 0);
-
-    /* Hold lock but allow the callback to kick us by writing to the pipe */
-    rfifolock_lock(&data.lock);
-    qemu_thread_create(&thread, "callback_thread",
-                       callback_thread, &data, QEMU_THREAD_JOINABLE);
-    ret = read(data.fd[0], &c, sizeof(c));
-    g_assert(ret == 1);
-    rfifolock_unlock(&data.lock);
-    /* If we got here then the callback was invoked, as expected */
-
-    qemu_thread_join(&thread);
-    close(data.fd[0]);
-    close(data.fd[1]);
-    rfifolock_destroy(&data.lock);
-}
-
-int main(int argc, char **argv)
-{
-    g_test_init(&argc, &argv, NULL);
-    g_test_add_func("/nesting", test_nesting);
-    g_test_add_func("/callback", test_callback);
-    return g_test_run();
-}
diff --git a/tests/test-string-input-visitor.c b/tests/test-string-input-visitor.c
index d837ebedad..7f10e2582f 100644
--- a/tests/test-string-input-visitor.c
+++ b/tests/test-string-input-visitor.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2012 Red Hat Inc.
  *
  * Authors:
- *  Paolo Bonzini <pbonzini@redhat.com> (based on test-qmp-input-visitor)
+ *  Paolo Bonzini <pbonzini@redhat.com> (based on test-qobject-input-visitor)
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -228,6 +228,7 @@ static void test_visitor_in_fuzz(TestInputVisitorData *data,
 
         v = visitor_input_test_init(data, buf);
         visit_type_intList(v, NULL, &ilres, NULL);
+        qapi_free_intList(ilres);
         visitor_input_teardown(data, NULL);
 
         v = visitor_input_test_init(data, buf);
diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c
index 444844a15a..e736db3af8 100644
--- a/tests/test-string-output-visitor.c
+++ b/tests/test-string-output-visitor.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2012 Red Hat Inc.
  *
  * Authors:
- *  Paolo Bonzini <pbonzini@redhat.com> (based on test-qmp-output-visitor)
+ *  Paolo Bonzini <pbonzini@redhat.com> (based on test-qobject-output-visitor)
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
diff --git a/tests/test-uuid.c b/tests/test-uuid.c
new file mode 100644
index 0000000000..d3a2791fd4
--- /dev/null
+++ b/tests/test-uuid.c
@@ -0,0 +1,178 @@
+/*
+ * QEMU UUID Library
+ *
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/uuid.h"
+
+struct {
+    const char *uuidstr;
+    QemuUUID uuid;
+    bool uuidstr_is_valid;
+    bool check_unparse;
+} uuid_test_data[] = {
+    {    /* Normal */
+        "586ece27-7f09-41e0-9e74-e901317e9d42",
+        { { {
+             0x58, 0x6e, 0xce, 0x27, 0x7f, 0x09, 0x41, 0xe0,
+             0x9e, 0x74, 0xe9, 0x01, 0x31, 0x7e, 0x9d, 0x42,
+        } } },
+        true, true,
+    }, { /* NULL */
+        "00000000-0000-0000-0000-000000000000",
+        { },
+        true, true,
+    }, { /* Upper case */
+        "0CC6C752-3961-4028-A286-C05CC616D396",
+        { { {
+             0x0c, 0xc6, 0xc7, 0x52, 0x39, 0x61, 0x40, 0x28,
+             0xa2, 0x86, 0xc0, 0x5c, 0xc6, 0x16, 0xd3, 0x96,
+        } } },
+        true, false,
+    }, { /* Mixed case */
+        "0CC6C752-3961-4028-a286-c05cc616D396",
+        { { {
+             0x0c, 0xc6, 0xc7, 0x52, 0x39, 0x61, 0x40, 0x28,
+             0xa2, 0x86, 0xc0, 0x5c, 0xc6, 0x16, 0xd3, 0x96,
+        } } },
+        true, false,
+    }, { /* Empty */
+        ""
+    }, { /* Too short */
+        "abc",
+    }, { /* Non-hex */
+        "abcdefgh-0000-0000-0000-000000000000",
+    }, { /* No '-' */
+        "0cc6c75239614028a286c05cc616d396",
+    }, { /* '-' in wrong position */
+        "0cc6c-7523961-4028-a286-c05cc616d396",
+    }, { /* Double '-' */
+        "0cc6c752--3961-4028-a286-c05cc616d396",
+    }, { /* Too long */
+        "0000000000000000000000000000000000000000000000",
+    }, { /* Invalid char in the beginning */
+        ")cc6c752-3961-4028-a286-c05cc616d396",
+    }, { /* Invalid char in the beginning, in extra */
+        ")0cc6c752-3961-4028-a286-c05cc616d396",
+    }, { /* Invalid char in the middle */
+        "0cc6c752-39*1-4028-a286-c05cc616d396",
+    }, { /* Invalid char in the middle, in extra */
+        "0cc6c752-39*61-4028-a286-c05cc616d396",
+    }, { /* Invalid char in the end */
+        "0cc6c752-3961-4028-a286-c05cc616d39&",
+    }, { /* Invalid char in the end, in extra */
+        "0cc6c752-3961-4028-a286-c05cc616d396&",
+    }, { /* Short end and trailing space */
+        "0cc6c752-3961-4028-a286-c05cc616d39 ",
+    }, { /* Leading space and short end */
+        " 0cc6c752-3961-4028-a286-c05cc616d39",
+    },
+};
+
+static inline bool uuid_is_valid(QemuUUID *uuid)
+{
+    return qemu_uuid_is_null(uuid) ||
+            ((uuid->data[6] & 0xf0) == 0x40 && (uuid->data[8] & 0xc0) == 0x80);
+}
+
+static void test_uuid_generate(void)
+{
+    QemuUUID uuid;
+    int i;
+
+    for (i = 0; i < 100; ++i) {
+        qemu_uuid_generate(&uuid);
+        g_assert(uuid_is_valid(&uuid));
+    }
+}
+
+static void test_uuid_is_null(void)
+{
+    QemuUUID uuid_null = { };
+    QemuUUID uuid_not_null = { { {
+        0x58, 0x6e, 0xce, 0x27, 0x7f, 0x09, 0x41, 0xe0,
+        0x9e, 0x74, 0xe9, 0x01, 0x31, 0x7e, 0x9d, 0x42
+    } } };
+    QemuUUID uuid_not_null_2 = { { { 1 } } };
+
+    g_assert(qemu_uuid_is_null(&uuid_null));
+    g_assert_false(qemu_uuid_is_null(&uuid_not_null));
+    g_assert_false(qemu_uuid_is_null(&uuid_not_null_2));
+}
+
+static void test_uuid_parse(void)
+{
+    int i, r;
+
+    for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) {
+        QemuUUID uuid;
+        bool is_valid = uuid_test_data[i].uuidstr_is_valid;
+
+        r = qemu_uuid_parse(uuid_test_data[i].uuidstr, &uuid);
+        g_assert_cmpint(!r, ==, is_valid);
+        if (is_valid) {
+            g_assert_cmpint(is_valid, ==, uuid_is_valid(&uuid));
+            g_assert_cmpmem(&uuid_test_data[i].uuid, sizeof(uuid),
+                            &uuid, sizeof(uuid));
+        }
+    }
+}
+
+static void test_uuid_unparse(void)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) {
+        char out[37];
+
+        if (!uuid_test_data[i].check_unparse) {
+            continue;
+        }
+        qemu_uuid_unparse(&uuid_test_data[i].uuid, out);
+        g_assert_cmpstr(uuid_test_data[i].uuidstr, ==, out);
+    }
+}
+
+static void test_uuid_unparse_strdup(void)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(uuid_test_data); i++) {
+        char *out;
+
+        if (!uuid_test_data[i].check_unparse) {
+            continue;
+        }
+        out = qemu_uuid_unparse_strdup(&uuid_test_data[i].uuid);
+        g_assert_cmpstr(uuid_test_data[i].uuidstr, ==, out);
+        g_free(out);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    g_test_add_func("/uuid/generate", test_uuid_generate);
+    g_test_add_func("/uuid/is_null", test_uuid_is_null);
+    g_test_add_func("/uuid/parse", test_uuid_parse);
+    g_test_add_func("/uuid/unparse", test_uuid_unparse);
+    g_test_add_func("/uuid/unparse_strdup", test_uuid_unparse_strdup);
+
+    return g_test_run();
+}
diff --git a/tests/test-visitor-serialization.c b/tests/test-visitor-serialization.c
index dba4670762..66b2b1c564 100644
--- a/tests/test-visitor-serialization.c
+++ b/tests/test-visitor-serialization.c
@@ -20,8 +20,8 @@
 #include "qapi/error.h"
 #include "qapi/qmp/types.h"
 #include "qapi/qmp/qjson.h"
-#include "qapi/qmp-input-visitor.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qapi-types.h"
@@ -1022,7 +1022,7 @@ static void qmp_serialize(void *native_in, void **datap,
 {
     QmpSerializeData *d = g_malloc0(sizeof(*d));
 
-    d->qov = qmp_output_visitor_new(&d->obj);
+    d->qov = qobject_output_visitor_new(&d->obj);
     visit(d->qov, &native_in, errp);
     *datap = d;
 }
@@ -1040,7 +1040,7 @@ static void qmp_deserialize(void **native_out, void *datap,
     obj = qobject_from_json(qstring_get_str(output_json));
 
     QDECREF(output_json);
-    d->qiv = qmp_input_visitor_new(obj, true);
+    d->qiv = qobject_input_visitor_new(obj, true);
     qobject_decref(obj_orig);
     qobject_decref(obj);
     visit(d->qiv, native_out, errp);
diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 41fd841aed..d2f529b831 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -50,16 +50,20 @@ static QEMUFile *open_test_file(bool write)
 {
     int fd = dup(temp_fd);
     QIOChannel *ioc;
+    QEMUFile *f;
+
     lseek(fd, 0, SEEK_SET);
     if (write) {
         g_assert_cmpint(ftruncate(fd, 0), ==, 0);
     }
     ioc = QIO_CHANNEL(qio_channel_file_new_fd(fd));
     if (write) {
-        return qemu_fopen_channel_output(ioc);
+        f = qemu_fopen_channel_output(ioc);
     } else {
-        return qemu_fopen_channel_input(ioc);
+        f = qemu_fopen_channel_input(ioc);
     }
+    object_unref(OBJECT(ioc));
+    return f;
 }
 
 #define SUCCESS(val) \
@@ -79,6 +83,13 @@ static void save_vmstate(const VMStateDescription *desc, void *obj)
     qemu_fclose(f);
 }
 
+static void save_buffer(const uint8_t *buf, size_t buf_size)
+{
+    QEMUFile *fsave = open_test_file(true);
+    qemu_put_buffer(fsave, buf, buf_size);
+    qemu_fclose(fsave);
+}
+
 static void compare_vmstate(uint8_t *wire, size_t size)
 {
     QEMUFile *f = open_test_file(false);
@@ -305,15 +316,13 @@ static const VMStateDescription vmstate_versioned = {
 
 static void test_load_v1(void)
 {
-    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 30,             /* c */
         0, 0, 0, 0, 0, 0, 0, 40, /* d */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
-    qemu_put_buffer(fsave, buf, sizeof(buf));
-    qemu_fclose(fsave);
+    save_buffer(buf, sizeof(buf));
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .b = 200, .e = 500, .f = 600 };
@@ -330,7 +339,6 @@ static void test_load_v1(void)
 
 static void test_load_v2(void)
 {
-    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 20,             /* b */
@@ -340,8 +348,7 @@ static void test_load_v2(void)
         0, 0, 0, 0, 0, 0, 0, 60, /* f */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
-    qemu_put_buffer(fsave, buf, sizeof(buf));
-    qemu_fclose(fsave);
+    save_buffer(buf, sizeof(buf));
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj;
@@ -419,7 +426,6 @@ static void test_save_skip(void)
 
 static void test_load_noskip(void)
 {
-    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 20,             /* b */
@@ -429,8 +435,7 @@ static void test_load_noskip(void)
         0, 0, 0, 0, 0, 0, 0, 60, /* f */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
-    qemu_put_buffer(fsave, buf, sizeof(buf));
-    qemu_fclose(fsave);
+    save_buffer(buf, sizeof(buf));
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = false };
@@ -447,7 +452,6 @@ static void test_load_noskip(void)
 
 static void test_load_skip(void)
 {
-    QEMUFile *fsave = open_test_file(true);
     uint8_t buf[] = {
         0, 0, 0, 10,             /* a */
         0, 0, 0, 20,             /* b */
@@ -455,8 +459,7 @@ static void test_load_skip(void)
         0, 0, 0, 0, 0, 0, 0, 60, /* f */
         QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
     };
-    qemu_put_buffer(fsave, buf, sizeof(buf));
-    qemu_fclose(fsave);
+    save_buffer(buf, sizeof(buf));
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
@@ -471,6 +474,76 @@ static void test_load_skip(void)
     qemu_fclose(loading);
 }
 
+
+typedef struct {
+    int32_t i;
+} TestStructTriv;
+
+const VMStateDescription vmsd_tst = {
+    .name = "test/tst",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(i, TestStructTriv),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#define AR_SIZE 4
+
+typedef struct {
+    TestStructTriv *ar[AR_SIZE];
+} TestArrayOfPtrToStuct;
+
+const VMStateDescription vmsd_arps = {
+    .name = "test/arps",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_ARRAY_OF_POINTER_TO_STRUCT(ar, TestArrayOfPtrToStuct,
+                AR_SIZE, 0, vmsd_tst, TestStructTriv),
+        VMSTATE_END_OF_LIST()
+    }
+};
+static void test_arr_ptr_str_no0_save(void)
+{
+    TestStructTriv ar[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+    TestArrayOfPtrToStuct sample = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
+    uint8_t wire_sample[] = {
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x01,
+        0x00, 0x00, 0x00, 0x02,
+        0x00, 0x00, 0x00, 0x03,
+        QEMU_VM_EOF
+    };
+
+    save_vmstate(&vmsd_arps, &sample);
+    compare_vmstate(wire_sample, sizeof(wire_sample));
+}
+
+static void test_arr_ptr_str_no0_load(void)
+{
+    TestStructTriv ar_gt[AR_SIZE] = {{.i = 0}, {.i = 1}, {.i = 2}, {.i = 3} };
+    TestStructTriv ar[AR_SIZE] = {};
+    TestArrayOfPtrToStuct obj = {.ar = {&ar[0], &ar[1], &ar[2], &ar[3]} };
+    int idx;
+    uint8_t wire_sample[] = {
+        0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x01,
+        0x00, 0x00, 0x00, 0x02,
+        0x00, 0x00, 0x00, 0x03,
+        QEMU_VM_EOF
+    };
+
+    save_buffer(wire_sample, sizeof(wire_sample));
+    SUCCESS(load_vmstate_one(&vmsd_arps, &obj, 1,
+                          wire_sample, sizeof(wire_sample)));
+    for (idx = 0; idx < AR_SIZE; ++idx) {
+        /* compare the target array ar with the ground truth array ar_gt */
+        g_assert_cmpint(ar_gt[idx].i, ==, ar[idx].i);
+    }
+}
+
 int main(int argc, char **argv)
 {
     temp_fd = mkstemp(temp_file);
@@ -485,6 +558,10 @@ int main(int argc, char **argv)
     g_test_add_func("/vmstate/field_exists/load/skip", test_load_skip);
     g_test_add_func("/vmstate/field_exists/save/noskip", test_save_noskip);
     g_test_add_func("/vmstate/field_exists/save/skip", test_save_skip);
+    g_test_add_func("/vmstate/array/ptr/str/no0/save",
+                    test_arr_ptr_str_no0_save);
+    g_test_add_func("/vmstate/array/ptr/str/no0/load",
+                    test_arr_ptr_str_no0_load);
     g_test_run();
 
     close(temp_fd);
diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c
new file mode 100644
index 0000000000..79a2e69a28
--- /dev/null
+++ b/tests/test-x86-cpuid-compat.c
@@ -0,0 +1,233 @@
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qbool.h"
+#include "libqtest.h"
+
+static char *get_cpu0_qom_path(void)
+{
+    QDict *resp;
+    QList *ret;
+    QDict *cpu0;
+    char *path;
+
+    resp = qmp("{'execute': 'query-cpus', 'arguments': {}}");
+    g_assert(qdict_haskey(resp, "return"));
+    ret = qdict_get_qlist(resp, "return");
+
+    cpu0 = qobject_to_qdict(qlist_peek(ret));
+    path = g_strdup(qdict_get_str(cpu0, "qom_path"));
+    QDECREF(resp);
+    return path;
+}
+
+static QObject *qom_get(const char *path, const char *prop)
+{
+    QDict *resp = qmp("{ 'execute': 'qom-get',"
+                      "  'arguments': { 'path': %s,"
+                      "                 'property': %s } }",
+                      path, prop);
+    QObject *ret = qdict_get(resp, "return");
+    qobject_incref(ret);
+    QDECREF(resp);
+    return ret;
+}
+
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static bool qom_get_bool(const char *path, const char *prop)
+{
+    QBool *value = qobject_to_qbool(qom_get(path, prop));
+    bool b = qbool_get_bool(value);
+
+    QDECREF(value);
+    return b;
+}
+#endif
+
+typedef struct CpuidTestArgs {
+    const char *cmdline;
+    const char *property;
+    int64_t expected_value;
+} CpuidTestArgs;
+
+static void test_cpuid_prop(const void *data)
+{
+    const CpuidTestArgs *args = data;
+    char *path;
+    QInt *value;
+
+    qtest_start(args->cmdline);
+    path = get_cpu0_qom_path();
+    value = qobject_to_qint(qom_get(path, args->property));
+    g_assert_cmpint(qint_get_int(value), ==, args->expected_value);
+    qtest_end();
+
+    QDECREF(value);
+    g_free(path);
+}
+
+static void add_cpuid_test(const char *name, const char *cmdline,
+                           const char *property, int64_t expected_value)
+{
+    CpuidTestArgs *args = g_new0(CpuidTestArgs, 1);
+    args->cmdline = cmdline;
+    args->property = property;
+    args->expected_value = expected_value;
+    qtest_add_data_func(name, args, test_cpuid_prop);
+}
+
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+static void test_plus_minus_subprocess(void)
+{
+    char *path;
+
+    /* Rules:
+     * 1)"-foo" overrides "+foo"
+     * 2) "[+-]foo" overrides "foo=..."
+     * 3) Old feature names with underscores (e.g. "sse4_2")
+     *    should keep working
+     *
+     * Note: rules 1 and 2 are planned to be removed soon, and
+     * should generate a warning.
+     */
+    qtest_start("-cpu pentium,-fpu,+fpu,-mce,mce=on,+cx8,cx8=off,+sse4_1,sse4_2=on");
+    path = get_cpu0_qom_path();
+
+    g_assert_false(qom_get_bool(path, "fpu"));
+    g_assert_false(qom_get_bool(path, "mce"));
+    g_assert_true(qom_get_bool(path, "cx8"));
+
+    /* Test both the original and the alias feature names: */
+    g_assert_true(qom_get_bool(path, "sse4-1"));
+    g_assert_true(qom_get_bool(path, "sse4.1"));
+
+    g_assert_true(qom_get_bool(path, "sse4-2"));
+    g_assert_true(qom_get_bool(path, "sse4.2"));
+
+    qtest_end();
+    g_free(path);
+}
+
+static void test_plus_minus(void)
+{
+    g_test_trap_subprocess("/x86/cpuid/parsing-plus-minus/subprocess", 0, 0);
+    g_test_trap_assert_passed();
+    g_test_trap_assert_stderr("*Ambiguous CPU model string. "
+                              "Don't mix both \"-mce\" and \"mce=on\"*");
+    g_test_trap_assert_stderr("*Ambiguous CPU model string. "
+                              "Don't mix both \"+cx8\" and \"cx8=off\"*");
+    g_test_trap_assert_stdout("");
+}
+#endif
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
+    g_test_add_func("/x86/cpuid/parsing-plus-minus/subprocess",
+                    test_plus_minus_subprocess);
+    g_test_add_func("/x86/cpuid/parsing-plus-minus", test_plus_minus);
+#endif
+
+    /* Original level values for CPU models: */
+    add_cpuid_test("x86/cpuid/phenom/level",
+                   "-cpu phenom", "level", 5);
+    add_cpuid_test("x86/cpuid/Conroe/level",
+                   "-cpu Conroe", "level", 10);
+    add_cpuid_test("x86/cpuid/SandyBridge/level",
+                   "-cpu SandyBridge", "level", 0xd);
+    add_cpuid_test("x86/cpuid/486/xlevel",
+                   "-cpu 486", "xlevel", 0);
+    add_cpuid_test("x86/cpuid/core2duo/xlevel",
+                   "-cpu core2duo", "xlevel", 0x80000008);
+    add_cpuid_test("x86/cpuid/phenom/xlevel",
+                   "-cpu phenom", "xlevel", 0x8000001A);
+    add_cpuid_test("x86/cpuid/athlon/xlevel",
+                   "-cpu athlon", "xlevel", 0x80000008);
+
+    /* If level is not large enough, it should increase automatically: */
+    /* CPUID[6].EAX: */
+    add_cpuid_test("x86/cpuid/auto-level/phenom/arat",
+                   "-cpu 486,+arat", "level", 6);
+    /* CPUID[EAX=7,ECX=0].EBX: */
+    add_cpuid_test("x86/cpuid/auto-level/phenom/fsgsbase",
+                   "-cpu phenom,+fsgsbase", "level", 7);
+    /* CPUID[EAX=7,ECX=0].ECX: */
+    add_cpuid_test("x86/cpuid/auto-level/phenom/avx512vbmi",
+                   "-cpu phenom,+avx512vbmi", "level", 7);
+    /* CPUID[EAX=0xd,ECX=1].EAX: */
+    add_cpuid_test("x86/cpuid/auto-level/phenom/xsaveopt",
+                   "-cpu phenom,+xsaveopt", "level", 0xd);
+    /* CPUID[8000_0001].EDX: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/3dnow",
+                   "-cpu 486,+3dnow", "xlevel", 0x80000001);
+    /* CPUID[8000_0001].ECX: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/sse4a",
+                   "-cpu 486,+sse4a", "xlevel", 0x80000001);
+    /* CPUID[8000_0007].EDX: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/invtsc",
+                   "-cpu 486,+invtsc", "xlevel", 0x80000007);
+    /* CPUID[8000_000A].EDX: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/npt",
+                   "-cpu 486,+npt", "xlevel", 0x8000000A);
+    /* CPUID[C000_0001].EDX: */
+    add_cpuid_test("x86/cpuid/auto-xlevel2/phenom/xstore",
+                   "-cpu phenom,+xstore", "xlevel2", 0xC0000001);
+    /* SVM needs CPUID[0x8000000A] */
+    add_cpuid_test("x86/cpuid/auto-xlevel/athlon/svm",
+                   "-cpu athlon,+svm", "xlevel", 0x8000000A);
+
+
+    /* If level is already large enough, it shouldn't change: */
+    add_cpuid_test("x86/cpuid/auto-level/SandyBridge/multiple",
+                   "-cpu SandyBridge,+arat,+fsgsbase,+avx512vbmi",
+                   "level", 0xd);
+    /* If level is explicitly set, it shouldn't change: */
+    add_cpuid_test("x86/cpuid/auto-level/486/fixed/0xF",
+                   "-cpu 486,level=0xF,+arat,+fsgsbase,+avx512vbmi,+xsaveopt",
+                   "level", 0xF);
+    add_cpuid_test("x86/cpuid/auto-level/486/fixed/2",
+                   "-cpu 486,level=2,+arat,+fsgsbase,+avx512vbmi,+xsaveopt",
+                   "level", 2);
+    add_cpuid_test("x86/cpuid/auto-level/486/fixed/0",
+                   "-cpu 486,level=0,+arat,+fsgsbase,+avx512vbmi,+xsaveopt",
+                   "level", 0);
+
+    /* if xlevel is already large enough, it shouldn't change: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/phenom/3dnow",
+                   "-cpu phenom,+3dnow,+sse4a,+invtsc,+npt,+svm",
+                   "xlevel", 0x8000001A);
+    /* If xlevel is explicitly set, it shouldn't change: */
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/fixed/80000002",
+                   "-cpu 486,xlevel=0x80000002,+3dnow,+sse4a,+invtsc,+npt,+svm",
+                   "xlevel", 0x80000002);
+    add_cpuid_test("x86/cpuid/auto-xlevel/486/fixed/8000001A",
+                   "-cpu 486,xlevel=0x8000001A,+3dnow,+sse4a,+invtsc,+npt,+svm",
+                   "xlevel", 0x8000001A);
+    add_cpuid_test("x86/cpuid/auto-xlevel/phenom/fixed/0",
+                   "-cpu 486,xlevel=0,+3dnow,+sse4a,+invtsc,+npt,+svm",
+                   "xlevel", 0);
+
+    /* if xlevel2 is already large enough, it shouldn't change: */
+    add_cpuid_test("x86/cpuid/auto-xlevel2/486/fixed",
+                   "-cpu 486,xlevel2=0xC0000002,+xstore",
+                   "xlevel2", 0xC0000002);
+
+    /* Check compatibility of old machine-types that didn't
+     * auto-increase level/xlevel/xlevel2: */
+
+    add_cpuid_test("x86/cpuid/auto-level/pc-2.7",
+                   "-machine pc-i440fx-2.7 -cpu 486,+arat,+avx512vbmi,+xsaveopt",
+                   "level", 1);
+    add_cpuid_test("x86/cpuid/auto-xlevel/pc-2.7",
+                   "-machine pc-i440fx-2.7 -cpu 486,+3dnow,+sse4a,+invtsc,+npt,+svm",
+                   "xlevel", 0);
+    add_cpuid_test("x86/cpuid/auto-xlevel2/pc-2.7",
+                   "-machine pc-i440fx-2.7 -cpu 486,+xstore",
+                   "xlevel2", 0);
+
+    return g_test_run();
+}
diff --git a/tests/usb-hcd-ehci-test.c b/tests/usb-hcd-ehci-test.c
index eb247ad453..57af8a034e 100644
--- a/tests/usb-hcd-ehci-test.c
+++ b/tests/usb-hcd-ehci-test.c
@@ -38,8 +38,7 @@ static void uhci_port_update(struct qhc *hc, int port,
 
 static void ehci_port_test(struct qhc *hc, int port, uint32_t expect)
 {
-    void *addr = hc->base + 0x64 + 4 * port;
-    uint32_t value = qpci_io_readl(hc->dev, addr);
+    uint32_t value = qpci_io_readl(hc->dev, hc->bar, 0x64 + 4 * port);
     uint16_t mask = ~(PORTSC_CSC | PORTSC_PEDC | PORTSC_OCC);
 
 #if 0
@@ -56,7 +55,7 @@ static void pci_init(void)
     if (pcibus) {
         return;
     }
-    pcibus = qpci_init_pc();
+    pcibus = qpci_init_pc(NULL);
     g_assert(pcibus != NULL);
 
     qusb_pci_init_one(pcibus, &uhci1, QPCI_DEVFN(0x1d, 0), 4);
@@ -91,7 +90,7 @@ static void pci_ehci_port_1(void)
 static void pci_ehci_config(void)
 {
     /* hands over all ports from companion uhci to ehci */
-    qpci_io_writew(ehci1.dev, ehci1.base + 0x60, 1);
+    qpci_io_writew(ehci1.dev, ehci1.bar, 0x60, 1);
 }
 
 static void pci_uhci_port_2(void)
diff --git a/tests/usb-hcd-uhci-test.c b/tests/usb-hcd-uhci-test.c
index 5cd59ad91f..e956b9ccb7 100644
--- a/tests/usb-hcd-uhci-test.c
+++ b/tests/usb-hcd-uhci-test.c
@@ -9,9 +9,13 @@
 
 #include "qemu/osdep.h"
 #include "libqtest.h"
+#include "libqos/libqos.h"
 #include "libqos/usb.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
 #include "hw/usb/uhci-regs.h"
 
+static QOSState *qs;
 
 static void test_uhci_init(void)
 {
@@ -19,13 +23,10 @@ static void test_uhci_init(void)
 
 static void test_port(int port)
 {
-    QPCIBus *pcibus;
     struct qhc uhci;
 
     g_assert(port > 0);
-    pcibus = qpci_init_pc();
-    g_assert(pcibus != NULL);
-    qusb_pci_init_one(pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
+    qusb_pci_init_one(qs->pcibus, &uhci, QPCI_DEVFN(0x1d, 0), 4);
     uhci_port_test(&uhci, port - 1, UHCI_PORT_CCS);
 }
 
@@ -75,6 +76,10 @@ static void test_usb_storage_hotplug(void)
 
 int main(int argc, char **argv)
 {
+    const char *arch = qtest_get_arch();
+    const char *cmd = "-device piix3-usb-uhci,id=uhci,addr=1d.0"
+                      " -drive id=drive0,if=none,file=/dev/null,format=raw"
+                      " -device usb-tablet,bus=uhci.0,port=1";
     int ret;
 
     g_test_init(&argc, &argv, NULL);
@@ -84,11 +89,17 @@ int main(int argc, char **argv)
     qtest_add_func("/uhci/pci/hotplug", test_uhci_hotplug);
     qtest_add_func("/uhci/pci/hotplug/usb-storage", test_usb_storage_hotplug);
 
-    qtest_start("-device piix3-usb-uhci,id=uhci,addr=1d.0"
-                " -drive id=drive0,if=none,file=/dev/null,format=raw"
-                " -device usb-tablet,bus=uhci.0,port=1");
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        qs = qtest_pc_boot(cmd);
+    } else if (strcmp(arch, "ppc64") == 0) {
+        qs = qtest_spapr_boot(cmd);
+    } else {
+        g_printerr("usb-hcd-uhci-test tests are only "
+                   "available on x86 or ppc64\n");
+        exit(EXIT_FAILURE);
+    }
     ret = g_test_run();
-    qtest_end();
+    qtest_shutdown(qs);
 
     return ret;
 }
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 27b10c19b8..96bf00eefa 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -11,13 +11,23 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
+#include "qapi/error.h"
 #include "qemu/option.h"
 #include "qemu/range.h"
 #include "qemu/sockets.h"
 #include "sysemu/char.h"
 #include "sysemu/sysemu.h"
+#include "libqos/libqos.h"
+#include "libqos/pci-pc.h"
+#include "libqos/virtio-pci.h"
+#include "qapi/error.h"
+
+#include "libqos/malloc-pc.h"
+#include "hw/virtio/virtio-net.h"
 
 #include <linux/vhost.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
 #include <sys/vfs.h>
 
 /* GLIB version compatibility flags */
@@ -29,14 +39,13 @@
 #define HAVE_MONOTONIC_TIME
 #endif
 
-#define QEMU_CMD_ACCEL  " -machine accel=tcg"
 #define QEMU_CMD_MEM    " -m %d -object memory-backend-file,id=mem,size=%dM,"\
                         "mem-path=%s,share=on -numa node,memdev=mem"
 #define QEMU_CMD_CHR    " -chardev socket,id=%s,path=%s%s"
 #define QEMU_CMD_NETDEV " -netdev vhost-user,id=net0,chardev=%s,vhostforce"
-#define QEMU_CMD_NET    " -device virtio-net-pci,netdev=net0,romfile=./pc-bios/pxe-virtio.rom"
+#define QEMU_CMD_NET    " -device virtio-net-pci,netdev=net0"
 
-#define QEMU_CMD        QEMU_CMD_ACCEL QEMU_CMD_MEM QEMU_CMD_CHR \
+#define QEMU_CMD        QEMU_CMD_MEM QEMU_CMD_CHR \
                         QEMU_CMD_NETDEV QEMU_CMD_NET
 
 #define HUGETLBFS_MAGIC       0x958458f6
@@ -46,6 +55,7 @@
 #define VHOST_MEMORY_MAX_NREGIONS    8
 
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
+#define VHOST_USER_PROTOCOL_F_MQ 0
 #define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
 
 #define VHOST_LOG_PAGE 0x1000
@@ -68,6 +78,7 @@ typedef enum VhostUserRequest {
     VHOST_USER_SET_VRING_ERR = 14,
     VHOST_USER_GET_PROTOCOL_FEATURES = 15,
     VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+    VHOST_USER_GET_QUEUE_NUM = 17,
     VHOST_USER_SET_VRING_ENABLE = 18,
     VHOST_USER_MAX
 } VhostUserRequest;
@@ -119,11 +130,18 @@ static VhostUserMsg m __attribute__ ((unused));
 #define VHOST_USER_VERSION    (0x1)
 /*****************************************************************************/
 
+enum {
+    TEST_FLAGS_OK,
+    TEST_FLAGS_DISCONNECT,
+    TEST_FLAGS_BAD,
+    TEST_FLAGS_END,
+};
+
 typedef struct TestServer {
     gchar *socket_path;
     gchar *mig_path;
     gchar *chr_name;
-    CharDriverState *chr;
+    CharBackend chr;
     int fds_num;
     int fds[VHOST_MEMORY_MAX_NREGIONS];
     VhostUserMemory memory;
@@ -131,11 +149,38 @@ typedef struct TestServer {
     CompatGCond data_cond;
     int log_fd;
     uint64_t rings;
+    bool test_fail;
+    int test_flags;
+    int queues;
 } TestServer;
 
 static const char *tmpfs;
 static const char *root;
 
+static void init_virtio_dev(TestServer *s)
+{
+    QPCIBus *bus;
+    QVirtioPCIDevice *dev;
+    uint32_t features;
+
+    bus = qpci_init_pc(NULL);
+    g_assert_nonnull(bus);
+
+    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+    g_assert_nonnull(dev);
+
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&dev->vdev);
+    qvirtio_set_acknowledge(&dev->vdev);
+    qvirtio_set_driver(&dev->vdev);
+
+    features = qvirtio_get_features(&dev->vdev);
+    features = features & VIRTIO_NET_F_MAC;
+    qvirtio_set_features(&dev->vdev, features);
+
+    qvirtio_set_driver_ok(&dev->vdev);
+}
+
 static void wait_for_fds(TestServer *s)
 {
     gint64 end_time;
@@ -216,11 +261,17 @@ static int chr_can_read(void *opaque)
 static void chr_read(void *opaque, const uint8_t *buf, int size)
 {
     TestServer *s = opaque;
-    CharDriverState *chr = s->chr;
+    CharBackend *chr = &s->chr;
     VhostUserMsg msg;
     uint8_t *p = (uint8_t *) &msg;
     int fd;
 
+    if (s->test_fail) {
+        qemu_chr_fe_disconnect(chr);
+        /* now switch to non-failure */
+        s->test_fail = false;
+    }
+
     if (size != VHOST_USER_HDR_SIZE) {
         g_test_message("Wrong message size received %d\n", size);
         return;
@@ -246,6 +297,13 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         msg.size = sizeof(m.payload.u64);
         msg.payload.u64 = 0x1ULL << VHOST_F_LOG_ALL |
             0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
+        if (s->queues > 1) {
+            msg.payload.u64 |= 0x1ULL << VIRTIO_NET_F_MQ;
+        }
+        if (s->test_flags >= TEST_FLAGS_BAD) {
+            msg.payload.u64 = 0;
+            s->test_flags = TEST_FLAGS_END;
+        }
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -253,6 +311,10 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
     case VHOST_USER_SET_FEATURES:
 	g_assert_cmpint(msg.payload.u64 & (0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES),
 			!=, 0ULL);
+        if (s->test_flags == TEST_FLAGS_DISCONNECT) {
+            qemu_chr_fe_disconnect(chr);
+            s->test_flags = TEST_FLAGS_BAD;
+        }
         break;
 
     case VHOST_USER_GET_PROTOCOL_FEATURES:
@@ -260,6 +322,9 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         msg.flags |= VHOST_USER_REPLY_MASK;
         msg.size = sizeof(m.payload.u64);
         msg.payload.u64 = 1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
+        if (s->queues > 1) {
+            msg.payload.u64 |= 1 << VHOST_USER_PROTOCOL_F_MQ;
+        }
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
         break;
@@ -272,14 +337,15 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         p = (uint8_t *) &msg;
         qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
 
-        assert(msg.payload.state.index < 2);
+        assert(msg.payload.state.index < s->queues * 2);
         s->rings &= ~(0x1ULL << msg.payload.state.index);
         break;
 
     case VHOST_USER_SET_MEM_TABLE:
         /* received the mem table */
         memcpy(&s->memory, &msg.payload.memory, sizeof(msg.payload.memory));
-        s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds, G_N_ELEMENTS(s->fds));
+        s->fds_num = qemu_chr_fe_get_msgfds(chr, s->fds,
+                                            G_N_ELEMENTS(s->fds));
 
         /* signal the test that it can continue */
         g_cond_signal(&s->data_cond);
@@ -312,10 +378,18 @@ static void chr_read(void *opaque, const uint8_t *buf, int size)
         break;
 
     case VHOST_USER_SET_VRING_BASE:
-        assert(msg.payload.state.index < 2);
+        assert(msg.payload.state.index < s->queues * 2);
         s->rings |= 0x1ULL << msg.payload.state.index;
         break;
 
+    case VHOST_USER_GET_QUEUE_NUM:
+        msg.flags |= VHOST_USER_REPLY_MASK;
+        msg.size = sizeof(m.payload.u64);
+        msg.payload.u64 = s->queues;
+        p = (uint8_t *) &msg;
+        qemu_chr_fe_write_all(chr, p, VHOST_USER_HDR_SIZE + msg.size);
+        break;
+
     default:
         break;
     }
@@ -362,19 +436,33 @@ static TestServer *test_server_new(const gchar *name)
     g_cond_init(&server->data_cond);
 
     server->log_fd = -1;
+    server->queues = 1;
 
     return server;
 }
 
+static void chr_event(void *opaque, int event)
+{
+    TestServer *s = opaque;
+
+    if (s->test_flags == TEST_FLAGS_END &&
+        event == CHR_EVENT_CLOSED) {
+        s->test_flags = TEST_FLAGS_OK;
+    }
+}
+
 static void test_server_create_chr(TestServer *server, const gchar *opt)
 {
     gchar *chr_path;
+    CharDriverState *chr;
 
     chr_path = g_strdup_printf("unix:%s%s", server->socket_path, opt);
-    server->chr = qemu_chr_new(server->chr_name, chr_path, NULL);
+    chr = qemu_chr_new(server->chr_name, chr_path);
     g_free(chr_path);
 
-    qemu_chr_add_handlers(server->chr, chr_can_read, chr_read, NULL, server);
+    qemu_chr_fe_init(&server->chr, chr, &error_abort);
+    qemu_chr_fe_set_handlers(&server->chr, chr_can_read, chr_read,
+                             chr_event, server, NULL, true);
 }
 
 static void test_server_listen(TestServer *server)
@@ -398,8 +486,10 @@ static inline void test_server_connect(TestServer *server)
 static gboolean _test_server_free(TestServer *server)
 {
     int i;
+    CharDriverState *chr = qemu_chr_fe_get_driver(&server->chr);
 
-    qemu_chr_delete(server->chr);
+    qemu_chr_fe_deinit(&server->chr);
+    qemu_chr_delete(chr);
 
     for (i = 0; i < server->fds_num; i++) {
         close(server->fds[i]);
@@ -548,6 +638,7 @@ static void test_migrate(void)
     from = qtest_start(cmd);
     g_free(cmd);
 
+    init_virtio_dev(s);
     wait_for_fds(s);
     size = get_log_size(s);
     g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
@@ -612,7 +703,6 @@ static void test_migrate(void)
     global_qtest = global;
 }
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static void wait_for_rings_started(TestServer *s, size_t count)
 {
     gint64 end_time;
@@ -630,12 +720,13 @@ static void wait_for_rings_started(TestServer *s, size_t count)
     g_mutex_unlock(&s->data_mutex);
 }
 
+#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static gboolean
 reconnect_cb(gpointer user_data)
 {
     TestServer *s = user_data;
 
-    qemu_chr_disconnect(s->chr);
+    qemu_chr_fe_disconnect(&s->chr);
 
     return FALSE;
 }
@@ -662,6 +753,7 @@ static void test_reconnect_subprocess(void)
     qtest_start(cmd);
     g_free(cmd);
 
+    init_virtio_dev(s);
     wait_for_fds(s);
     wait_for_rings_started(s, 2);
 
@@ -685,8 +777,143 @@ static void test_reconnect(void)
     g_test_trap_assert_passed();
     g_free(path);
 }
+
+static void test_connect_fail_subprocess(void)
+{
+    TestServer *s = test_server_new("connect-fail");
+    char *cmd;
+
+    s->test_fail = true;
+    g_thread_new("connect", connect_thread, s);
+    cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+    qtest_start(cmd);
+    g_free(cmd);
+
+    init_virtio_dev(s);
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    qtest_end();
+    test_server_free(s);
+}
+
+static void test_connect_fail(void)
+{
+    gchar *path = g_strdup_printf("/%s/vhost-user/connect-fail/subprocess",
+                                  qtest_get_arch());
+    g_test_trap_subprocess(path, 0, 0);
+    g_test_trap_assert_passed();
+    g_free(path);
+}
+
+static void test_flags_mismatch_subprocess(void)
+{
+    TestServer *s = test_server_new("flags-mismatch");
+    char *cmd;
+
+    s->test_flags = TEST_FLAGS_DISCONNECT;
+    g_thread_new("connect", connect_thread, s);
+    cmd = GET_QEMU_CMDE(s, 2, ",server", "");
+    qtest_start(cmd);
+    g_free(cmd);
+
+    init_virtio_dev(s);
+    wait_for_fds(s);
+    wait_for_rings_started(s, 2);
+
+    qtest_end();
+    test_server_free(s);
+}
+
+static void test_flags_mismatch(void)
+{
+    gchar *path = g_strdup_printf("/%s/vhost-user/flags-mismatch/subprocess",
+                                  qtest_get_arch());
+    g_test_trap_subprocess(path, 0, 0);
+    g_test_trap_assert_passed();
+    g_free(path);
+}
+
 #endif
 
+static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
+{
+    QVirtioPCIDevice *dev;
+
+    dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
+    g_assert(dev != NULL);
+    g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
+
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&dev->vdev);
+    qvirtio_set_acknowledge(&dev->vdev);
+    qvirtio_set_driver(&dev->vdev);
+
+    return dev;
+}
+
+static void driver_init(QVirtioDevice *dev)
+{
+    uint32_t features;
+
+    features = qvirtio_get_features(dev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            (1u << VIRTIO_RING_F_INDIRECT_DESC) |
+                            (1u << VIRTIO_RING_F_EVENT_IDX));
+    qvirtio_set_features(dev, features);
+
+    qvirtio_set_driver_ok(dev);
+}
+
+#define PCI_SLOT                0x04
+
+static void test_multiqueue(void)
+{
+    const int queues = 2;
+    TestServer *s = test_server_new("mq");
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    QVirtQueuePCI *vq[queues * 2];
+    QGuestAllocator *alloc;
+    char *cmd;
+    int i;
+
+    s->queues = queues;
+    test_server_listen(s);
+
+    cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d "
+                          "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d",
+                          512, 512, root, s->chr_name,
+                          s->socket_path, "", s->chr_name,
+                          queues, queues * 2 + 2);
+    qtest_start(cmd);
+    g_free(cmd);
+
+    bus = qpci_init_pc(NULL);
+    dev = virtio_net_pci_init(bus, PCI_SLOT);
+
+    alloc = pc_alloc_init();
+    for (i = 0; i < queues * 2; i++) {
+        vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, alloc, i);
+    }
+
+    driver_init(&dev->vdev);
+    wait_for_rings_started(s, queues * 2);
+
+    /* End test */
+    for (i = 0; i < queues * 2; i++) {
+        qvirtqueue_cleanup(dev->vdev.bus, &vq[i]->vq, alloc);
+    }
+    pc_alloc_uninit(alloc);
+    qvirtio_pci_device_disable(dev);
+    g_free(dev->pdev);
+    g_free(dev);
+    qpci_free_pc(bus);
+    qtest_end();
+
+    test_server_free(s);
+}
+
 int main(int argc, char **argv)
 {
     QTestState *s = NULL;
@@ -728,13 +955,21 @@ int main(int argc, char **argv)
 
     s = qtest_start(qemu_cmd);
     g_free(qemu_cmd);
+    init_virtio_dev(server);
 
     qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
     qtest_add_func("/vhost-user/migrate", test_migrate);
+    qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
 #ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
     qtest_add_func("/vhost-user/reconnect/subprocess",
                    test_reconnect_subprocess);
     qtest_add_func("/vhost-user/reconnect", test_reconnect);
+    qtest_add_func("/vhost-user/connect-fail/subprocess",
+                   test_connect_fail_subprocess);
+    qtest_add_func("/vhost-user/connect-fail", test_connect_fail);
+    qtest_add_func("/vhost-user/flags-mismatch/subprocess",
+                   test_flags_mismatch_subprocess);
+    qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch);
 #endif
 
     ret = g_test_run();
diff --git a/tests/virtio-9p-test.c b/tests/virtio-9p-test.c
index 1e39335a79..9c4f6cb406 100644
--- a/tests/virtio-9p-test.c
+++ b/tests/virtio-9p-test.c
@@ -10,34 +10,118 @@
 #include "qemu/osdep.h"
 #include "libqtest.h"
 #include "qemu-common.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
+#include "libqos/virtio.h"
+#include "libqos/virtio-pci.h"
+#include "standard-headers/linux/virtio_ids.h"
+#include "standard-headers/linux/virtio_pci.h"
+
+static const char mount_tag[] = "qtest";
+static char *test_share;
+
+
+static QOSState *qvirtio_9p_start(void)
+{
+    const char *arch = qtest_get_arch();
+    const char *cmd = "-fsdev local,id=fsdev0,security_model=none,path=%s "
+                      "-device virtio-9p-pci,fsdev=fsdev0,mount_tag=%s";
+
+    test_share = g_strdup("/tmp/qtest.XXXXXX");
+    g_assert_nonnull(mkdtemp(test_share));
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        return qtest_pc_boot(cmd, test_share, mount_tag);
+    }
+    if (strcmp(arch, "ppc64") == 0) {
+        return qtest_spapr_boot(cmd, test_share, mount_tag);
+    }
+
+    g_printerr("virtio-9p tests are only available on x86 or ppc64\n");
+    exit(EXIT_FAILURE);
+}
+
+static void qvirtio_9p_stop(QOSState *qs)
+{
+    qtest_shutdown(qs);
+    rmdir(test_share);
+    g_free(test_share);
+}
 
-/* Tests only initialization so far. TODO: Replace with functional tests */
 static void pci_nop(void)
 {
+    QOSState *qs;
+
+    qs = qvirtio_9p_start();
+    qvirtio_9p_stop(qs);
 }
 
-static char test_share[] = "/tmp/qtest.XXXXXX";
+typedef struct {
+    QVirtioDevice *dev;
+    QOSState *qs;
+    QVirtQueue *vq;
+} QVirtIO9P;
 
-int main(int argc, char **argv)
+static QVirtIO9P *qvirtio_9p_pci_init(QOSState *qs)
 {
-    char *args;
-    int ret;
+    QVirtIO9P *v9p;
+    QVirtioPCIDevice *dev;
 
-    g_test_init(&argc, &argv, NULL);
-    qtest_add_func("/virtio/9p/pci/nop", pci_nop);
+    v9p = g_new0(QVirtIO9P, 1);
 
-    g_assert(mkdtemp(test_share));
+    v9p->qs = qs;
+    dev = qvirtio_pci_device_find(v9p->qs->pcibus, VIRTIO_ID_9P);
+    g_assert_nonnull(dev);
+    g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_9P);
+    v9p->dev = (QVirtioDevice *) dev;
 
-    args = g_strdup_printf("-fsdev local,id=fsdev0,security_model=none,path=%s "
-                           "-device virtio-9p-pci,fsdev=fsdev0,mount_tag=qtest",
-                           test_share);
-    qtest_start(args);
-    g_free(args);
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(v9p->dev);
+    qvirtio_set_acknowledge(v9p->dev);
+    qvirtio_set_driver(v9p->dev);
 
-    ret = g_test_run();
+    v9p->vq = qvirtqueue_setup(v9p->dev, v9p->qs->alloc, 0);
+    return v9p;
+}
 
-    qtest_end();
-    rmdir(test_share);
+static void qvirtio_9p_pci_free(QVirtIO9P *v9p)
+{
+    qvirtqueue_cleanup(v9p->dev->bus, v9p->vq, v9p->qs->alloc);
+    qvirtio_pci_device_disable(container_of(v9p->dev, QVirtioPCIDevice, vdev));
+    g_free(v9p->dev);
+    g_free(v9p);
+}
+
+static void pci_basic_config(void)
+{
+    QVirtIO9P *v9p;
+    size_t tag_len;
+    char *tag;
+    int i;
+    QOSState *qs;
+
+    qs = qvirtio_9p_start();
+    v9p = qvirtio_9p_pci_init(qs);
+
+    tag_len = qvirtio_config_readw(v9p->dev, 0);
+    g_assert_cmpint(tag_len, ==, strlen(mount_tag));
+
+    tag = g_malloc(tag_len);
+    for (i = 0; i < tag_len; i++) {
+        tag[i] = qvirtio_config_readb(v9p->dev, i + 2);
+    }
+    g_assert_cmpmem(tag, tag_len, mount_tag, tag_len);
+    g_free(tag);
+
+    qvirtio_9p_pci_free(v9p);
+    qvirtio_9p_stop(qs);
+}
+
+int main(int argc, char **argv)
+{
+    g_test_init(&argc, &argv, NULL);
+    qtest_add_func("/virtio/9p/pci/nop", pci_nop);
+    qtest_add_func("/virtio/9p/pci/basic/configuration", pci_basic_config);
 
-    return ret;
+    return g_test_run();
 }
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 811cf756c8..0e32e416dd 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -10,12 +10,11 @@
 
 #include "qemu/osdep.h"
 #include "libqtest.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
 #include "libqos/virtio-mmio.h"
-#include "libqos/pci-pc.h"
-#include "libqos/malloc.h"
-#include "libqos/malloc-pc.h"
 #include "libqos/malloc-generic.h"
 #include "qemu/bswap.h"
 #include "standard-headers/linux/virtio_ids.h"
@@ -58,24 +57,29 @@ static char *drive_create(void)
     return tmp_path;
 }
 
-static QPCIBus *pci_test_start(void)
+static QOSState *pci_test_start(void)
 {
-    char *cmdline;
+    QOSState *qs;
+    const char *arch = qtest_get_arch();
     char *tmp_path;
+    const char *cmd = "-drive if=none,id=drive0,file=%s,format=raw "
+                      "-drive if=none,id=drive1,file=/dev/null,format=raw "
+                      "-device virtio-blk-pci,id=drv0,drive=drive0,"
+                      "addr=%x.%x";
 
     tmp_path = drive_create();
 
-    cmdline = g_strdup_printf("-drive if=none,id=drive0,file=%s,format=raw "
-                        "-drive if=none,id=drive1,file=/dev/null,format=raw "
-                        "-device virtio-blk-pci,id=drv0,drive=drive0,"
-                        "addr=%x.%x",
-                        tmp_path, PCI_SLOT, PCI_FN);
-    qtest_start(cmdline);
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        qs = qtest_pc_boot(cmd, tmp_path, PCI_SLOT, PCI_FN);
+    } else if (strcmp(arch, "ppc64") == 0) {
+        qs = qtest_spapr_boot(cmd, tmp_path, PCI_SLOT, PCI_FN);
+    } else {
+        g_printerr("virtio-blk tests are only available on x86 or ppc64\n");
+        exit(EXIT_FAILURE);
+    }
     unlink(tmp_path);
     g_free(tmp_path);
-    g_free(cmdline);
-
-    return qpci_init_pc();
+    return qs;
 }
 
 static void arm_test_start(void)
@@ -110,30 +114,30 @@ static QVirtioPCIDevice *virtio_blk_pci_init(QPCIBus *bus, int slot)
     g_assert_cmphex(dev->pdev->devfn, ==, ((slot << 3) | PCI_FN));
 
     qvirtio_pci_device_enable(dev);
-    qvirtio_reset(&qvirtio_pci, &dev->vdev);
-    qvirtio_set_acknowledge(&qvirtio_pci, &dev->vdev);
-    qvirtio_set_driver(&qvirtio_pci, &dev->vdev);
+    qvirtio_reset(&dev->vdev);
+    qvirtio_set_acknowledge(&dev->vdev);
+    qvirtio_set_driver(&dev->vdev);
 
     return dev;
 }
 
-static inline void virtio_blk_fix_request(QVirtioBlkReq *req)
+static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
 {
 #ifdef HOST_WORDS_BIGENDIAN
-    bool host_endian = true;
+    const bool host_is_big_endian = true;
 #else
-    bool host_endian = false;
+    const bool host_is_big_endian = false;
 #endif
 
-    if (qtest_big_endian() != host_endian) {
+    if (qvirtio_is_big_endian(d) != host_is_big_endian) {
         req->type = bswap32(req->type);
         req->ioprio = bswap32(req->ioprio);
         req->sector = bswap64(req->sector);
     }
 }
 
-static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioBlkReq *req,
-                                                            uint64_t data_size)
+static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d,
+                                   QVirtioBlkReq *req, uint64_t data_size)
 {
     uint64_t addr;
     uint8_t status = 0xFF;
@@ -141,7 +145,7 @@ static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioBlkReq *req,
     g_assert_cmpuint(data_size % 512, ==, 0);
     addr = guest_alloc(alloc, sizeof(*req) + data_size);
 
-    virtio_blk_fix_request(req);
+    virtio_blk_fix_request(d, req);
 
     memwrite(addr, req, 16);
     memwrite(addr + 16, req->data, data_size);
@@ -150,8 +154,8 @@ static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioBlkReq *req,
     return addr;
 }
 
-static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
-            QGuestAllocator *alloc, QVirtQueue *vq, uint64_t device_specific)
+static void test_basic(QVirtioDevice *dev, QGuestAllocator *alloc,
+                       QVirtQueue *vq)
 {
     QVirtioBlkReq req;
     uint64_t req_addr;
@@ -161,18 +165,18 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
     uint8_t status;
     char *data;
 
-    capacity = qvirtio_config_readq(bus, dev, device_specific);
+    capacity = qvirtio_config_readq(dev, 0);
 
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
-    features = qvirtio_get_features(bus, dev);
+    features = qvirtio_get_features(dev);
     features = features & ~(QVIRTIO_F_BAD_FEATURE |
                     (1u << VIRTIO_RING_F_INDIRECT_DESC) |
                     (1u << VIRTIO_RING_F_EVENT_IDX) |
                     (1u << VIRTIO_BLK_F_SCSI));
-    qvirtio_set_features(bus, dev, features);
+    qvirtio_set_features(dev, features);
 
-    qvirtio_set_driver_ok(bus, dev);
+    qvirtio_set_driver_ok(dev);
 
     /* Write and read with 3 descriptor layout */
     /* Write request */
@@ -182,7 +186,7 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
 
     g_free(req.data);
 
@@ -190,9 +194,9 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
     qvirtqueue_add(vq, req_addr + 16, 512, false, true);
     qvirtqueue_add(vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(bus, dev, vq, free_head);
+    qvirtqueue_kick(dev, vq, free_head);
 
-    qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US);
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
 
@@ -204,7 +208,7 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
     req.sector = 0;
     req.data = g_malloc0(512);
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(alloc, dev, &req, 512);
 
     g_free(req.data);
 
@@ -212,9 +216,9 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
     qvirtqueue_add(vq, req_addr + 16, 512, true, true);
     qvirtqueue_add(vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(bus, dev, vq, free_head);
+    qvirtqueue_kick(dev, vq, free_head);
 
-    qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US);
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
 
@@ -234,15 +238,15 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
         req.data = g_malloc0(512);
         strcpy(req.data, "TEST");
 
-        req_addr = virtio_blk_request(alloc, &req, 512);
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
 
         g_free(req.data);
 
         free_head = qvirtqueue_add(vq, req_addr, 528, false, true);
         qvirtqueue_add(vq, req_addr + 528, 1, true, false);
-        qvirtqueue_kick(bus, dev, vq, free_head);
+        qvirtqueue_kick(dev, vq, free_head);
 
-        qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+        qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US);
         status = readb(req_addr + 528);
         g_assert_cmpint(status, ==, 0);
 
@@ -254,16 +258,16 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
         req.sector = 1;
         req.data = g_malloc0(512);
 
-        req_addr = virtio_blk_request(alloc, &req, 512);
+        req_addr = virtio_blk_request(alloc, dev, &req, 512);
 
         g_free(req.data);
 
         free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
         qvirtqueue_add(vq, req_addr + 16, 513, true, false);
 
-        qvirtqueue_kick(bus, dev, vq, free_head);
+        qvirtqueue_kick(dev, vq, free_head);
 
-        qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_BLK_TIMEOUT_US);
+        qvirtio_wait_queue_isr(dev, vq, QVIRTIO_BLK_TIMEOUT_US);
         status = readb(req_addr + 528);
         g_assert_cmpint(status, ==, 0);
 
@@ -279,42 +283,30 @@ static void test_basic(const QVirtioBus *bus, QVirtioDevice *dev,
 static void pci_basic(void)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
+    QOSState *qs;
     QVirtQueuePCI *vqpci;
-    QGuestAllocator *alloc;
-    void *addr;
 
-    bus = pci_test_start();
-    dev = virtio_blk_pci_init(bus, PCI_SLOT);
+    qs = pci_test_start();
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT);
 
-    alloc = pc_alloc_init();
-    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                                                    alloc, 0);
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 0);
 
-    /* MSI-X is not enabled */
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(false);
-
-    test_basic(&qvirtio_pci, &dev->vdev, alloc, &vqpci->vq,
-                                                    (uint64_t)(uintptr_t)addr);
+    test_basic(&dev->vdev, qs->alloc, &vqpci->vq);
 
     /* End test */
-    qvirtqueue_cleanup(&qvirtio_pci, &vqpci->vq, alloc);
-    pc_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+    qtest_shutdown(qs);
 }
 
 static void pci_indirect(void)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
     QVirtQueuePCI *vqpci;
-    QGuestAllocator *alloc;
+    QOSState *qs;
     QVirtioBlkReq req;
     QVRingIndirectDesc *indirect;
-    void *addr;
     uint64_t req_addr;
     uint64_t capacity;
     uint32_t features;
@@ -322,28 +314,22 @@ static void pci_indirect(void)
     uint8_t status;
     char *data;
 
-    bus = pci_test_start();
-
-    dev = virtio_blk_pci_init(bus, PCI_SLOT);
+    qs = pci_test_start();
 
-    /* MSI-X is not enabled */
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(false);
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT);
 
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
-    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = qvirtio_get_features(&dev->vdev);
     g_assert_cmphex(features & (1u << VIRTIO_RING_F_INDIRECT_DESC), !=, 0);
     features = features & ~(QVIRTIO_F_BAD_FEATURE |
                             (1u << VIRTIO_RING_F_EVENT_IDX) |
                             (1u << VIRTIO_BLK_F_SCSI));
-    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+    qvirtio_set_features(&dev->vdev, features);
 
-    alloc = pc_alloc_init();
-    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                                                    alloc, 0);
-    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 0);
+    qvirtio_set_driver_ok(&dev->vdev);
 
     /* Write request */
     req.type = VIRTIO_BLK_T_OUT;
@@ -352,23 +338,23 @@ static void pci_indirect(void)
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
-    indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
+    indirect = qvring_indirect_desc_setup(&dev->vdev, qs->alloc, 2);
     qvring_indirect_desc_add(indirect, req_addr, 528, false);
     qvring_indirect_desc_add(indirect, req_addr + 528, 1, true);
     free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq,
                            QVIRTIO_BLK_TIMEOUT_US);
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
 
     g_free(indirect);
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* Read request */
     req.type = VIRTIO_BLK_T_IN;
@@ -377,17 +363,17 @@ static void pci_indirect(void)
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
-    indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
+    indirect = qvring_indirect_desc_setup(&dev->vdev, qs->alloc, 2);
     qvring_indirect_desc_add(indirect, req_addr, 16, false);
     qvring_indirect_desc_add(indirect, req_addr + 16, 513, true);
     free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq,
                            QVIRTIO_BLK_TIMEOUT_US);
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -398,61 +384,51 @@ static void pci_indirect(void)
     g_free(data);
 
     g_free(indirect);
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* End test */
-    qvirtqueue_cleanup(&qvirtio_pci, &vqpci->vq, alloc);
-    pc_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+    qtest_shutdown(qs);
 }
 
 static void pci_config(void)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
+    QOSState *qs;
     int n_size = TEST_IMAGE_SIZE / 2;
-    void *addr;
     uint64_t capacity;
 
-    bus = pci_test_start();
+    qs = pci_test_start();
 
-    dev = virtio_blk_pci_init(bus, PCI_SLOT);
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT);
 
-    /* MSI-X is not enabled */
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(false);
-
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
-    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_driver_ok(&dev->vdev);
 
     qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
                                                     " 'size': %d } }", n_size);
-    qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, n_size / 512);
 
     qvirtio_pci_device_disable(dev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+
+    qtest_shutdown(qs);
 }
 
 static void pci_msix(void)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
+    QOSState *qs;
     QVirtQueuePCI *vqpci;
-    QGuestAllocator *alloc;
     QVirtioBlkReq req;
     int n_size = TEST_IMAGE_SIZE / 2;
-    void *addr;
     uint64_t req_addr;
     uint64_t capacity;
     uint32_t features;
@@ -460,41 +436,34 @@ static void pci_msix(void)
     uint8_t status;
     char *data;
 
-    bus = pci_test_start();
-    alloc = pc_alloc_init();
+    qs = pci_test_start();
 
-    dev = virtio_blk_pci_init(bus, PCI_SLOT);
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT);
     qpci_msix_enable(dev->pdev);
 
-    qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0);
+    qvirtio_pci_set_msix_configuration_vector(dev, qs->alloc, 0);
 
-    /* MSI-X is enabled */
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(true);
-
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
-    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = qvirtio_get_features(&dev->vdev);
     features = features & ~(QVIRTIO_F_BAD_FEATURE |
                             (1u << VIRTIO_RING_F_INDIRECT_DESC) |
                             (1u << VIRTIO_RING_F_EVENT_IDX) |
                             (1u << VIRTIO_BLK_F_SCSI));
-    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+    qvirtio_set_features(&dev->vdev, features);
 
-    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                                                    alloc, 0);
-    qvirtqueue_pci_msix_setup(dev, vqpci, alloc, 1);
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 0);
+    qvirtqueue_pci_msix_setup(dev, vqpci, qs->alloc, 1);
 
-    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_driver_ok(&dev->vdev);
 
     qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
                                                     " 'size': %d } }", n_size);
 
-    qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_config_isr(&dev->vdev, QVIRTIO_BLK_TIMEOUT_US);
 
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, n_size / 512);
 
     /* Write request */
@@ -504,22 +473,22 @@ static void pci_msix(void)
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
     free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq,
                            QVIRTIO_BLK_TIMEOUT_US);
 
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
 
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* Read request */
     req.type = VIRTIO_BLK_T_IN;
@@ -527,7 +496,7 @@ static void pci_msix(void)
     req.sector = 0;
     req.data = g_malloc0(512);
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
@@ -535,10 +504,10 @@ static void pci_msix(void)
     qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq,
                            QVIRTIO_BLK_TIMEOUT_US);
 
     status = readb(req_addr + 528);
@@ -549,26 +518,22 @@ static void pci_msix(void)
     g_assert_cmpstr(data, ==, "TEST");
     g_free(data);
 
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* End test */
-    qvirtqueue_cleanup(&qvirtio_pci, &vqpci->vq, alloc);
-    pc_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+    qtest_shutdown(qs);
 }
 
 static void pci_idx(void)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
+    QOSState *qs;
     QVirtQueuePCI *vqpci;
-    QGuestAllocator *alloc;
     QVirtioBlkReq req;
-    void *addr;
     uint64_t req_addr;
     uint64_t capacity;
     uint32_t features;
@@ -576,33 +541,27 @@ static void pci_idx(void)
     uint8_t status;
     char *data;
 
-    bus = pci_test_start();
-    alloc = pc_alloc_init();
+    qs = pci_test_start();
 
-    dev = virtio_blk_pci_init(bus, PCI_SLOT);
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT);
     qpci_msix_enable(dev->pdev);
 
-    qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0);
+    qvirtio_pci_set_msix_configuration_vector(dev, qs->alloc, 0);
 
-    /* MSI-X is enabled */
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(true);
-
-    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev,
-                                                    (uint64_t)(uintptr_t)addr);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
-    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = qvirtio_get_features(&dev->vdev);
     features = features & ~(QVIRTIO_F_BAD_FEATURE |
                             (1u << VIRTIO_RING_F_INDIRECT_DESC) |
                             (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
                             (1u << VIRTIO_BLK_F_SCSI));
-    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+    qvirtio_set_features(&dev->vdev, features);
 
-    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                                                    alloc, 0);
-    qvirtqueue_pci_msix_setup(dev, vqpci, alloc, 1);
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 0);
+    qvirtqueue_pci_msix_setup(dev, vqpci, qs->alloc, 1);
 
-    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_driver_ok(&dev->vdev);
 
     /* Write request */
     req.type = VIRTIO_BLK_T_OUT;
@@ -611,17 +570,16 @@ static void pci_idx(void)
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
     free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
-                           QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq, QVIRTIO_BLK_TIMEOUT_US);
 
     /* Write request */
     req.type = VIRTIO_BLK_T_OUT;
@@ -630,7 +588,7 @@ static void pci_idx(void)
     req.data = g_malloc0(512);
     strcpy(req.data, "TEST");
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
@@ -639,15 +597,15 @@ static void pci_idx(void)
     free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
     /* No notification expected */
-    status = qvirtio_wait_status_byte_no_isr(&qvirtio_pci, &dev->vdev,
+    status = qvirtio_wait_status_byte_no_isr(&dev->vdev,
                                              &vqpci->vq, req_addr + 528,
                                              QVIRTIO_BLK_TIMEOUT_US);
     g_assert_cmpint(status, ==, 0);
 
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* Read request */
     req.type = VIRTIO_BLK_T_IN;
@@ -655,7 +613,7 @@ static void pci_idx(void)
     req.sector = 1;
     req.data = g_malloc0(512);
 
-    req_addr = virtio_blk_request(alloc, &req, 512);
+    req_addr = virtio_blk_request(qs->alloc, &dev->vdev, &req, 512);
 
     g_free(req.data);
 
@@ -663,9 +621,9 @@ static void pci_idx(void)
     qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true);
     qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+    qvirtqueue_kick(&dev->vdev, &vqpci->vq, free_head);
 
-    qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+    qvirtio_wait_queue_isr(&dev->vdev, &vqpci->vq,
                            QVIRTIO_BLK_TIMEOUT_US);
 
     status = readb(req_addr + 528);
@@ -676,38 +634,38 @@ static void pci_idx(void)
     g_assert_cmpstr(data, ==, "TEST");
     g_free(data);
 
-    guest_free(alloc, req_addr);
+    guest_free(qs->alloc, req_addr);
 
     /* End test */
-    qvirtqueue_cleanup(&qvirtio_pci, &vqpci->vq, alloc);
-    pc_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &vqpci->vq, qs->alloc);
     qpci_msix_disable(dev->pdev);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+    qtest_shutdown(qs);
 }
 
 static void pci_hotplug(void)
 {
-    QPCIBus *bus;
     QVirtioPCIDevice *dev;
+    QOSState *qs;
+    const char *arch = qtest_get_arch();
 
-    bus = pci_test_start();
+    qs = pci_test_start();
 
     /* plug secondary disk */
     qpci_plug_device_test("virtio-blk-pci", "drv1", PCI_SLOT_HP,
                           "'drive': 'drive1'");
 
-    dev = virtio_blk_pci_init(bus, PCI_SLOT_HP);
+    dev = virtio_blk_pci_init(qs->pcibus, PCI_SLOT_HP);
     g_assert(dev);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
 
     /* unplug secondary disk */
-    qpci_unplug_acpi_device_test("drv1", PCI_SLOT_HP);
-    qpci_free_pc(bus);
-    test_end();
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        qpci_unplug_acpi_device_test("drv1", PCI_SLOT_HP);
+    }
+    qtest_shutdown(qs);
 }
 
 static void mmio_basic(void)
@@ -724,30 +682,27 @@ static void mmio_basic(void)
     g_assert(dev != NULL);
     g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
 
-    qvirtio_reset(&qvirtio_mmio, &dev->vdev);
-    qvirtio_set_acknowledge(&qvirtio_mmio, &dev->vdev);
-    qvirtio_set_driver(&qvirtio_mmio, &dev->vdev);
+    qvirtio_reset(&dev->vdev);
+    qvirtio_set_acknowledge(&dev->vdev);
+    qvirtio_set_driver(&dev->vdev);
 
     alloc = generic_alloc_init(MMIO_RAM_ADDR, MMIO_RAM_SIZE, MMIO_PAGE_SIZE);
-    vq = qvirtqueue_setup(&qvirtio_mmio, &dev->vdev, alloc, 0);
+    vq = qvirtqueue_setup(&dev->vdev, alloc, 0);
 
-    test_basic(&qvirtio_mmio, &dev->vdev, alloc, vq,
-                            QVIRTIO_MMIO_DEVICE_SPECIFIC);
+    test_basic(&dev->vdev, alloc, vq);
 
     qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
                                                     " 'size': %d } }", n_size);
 
-    qvirtio_wait_queue_isr(&qvirtio_mmio, &dev->vdev, vq,
-                           QVIRTIO_BLK_TIMEOUT_US);
+    qvirtio_wait_queue_isr(&dev->vdev, vq, QVIRTIO_BLK_TIMEOUT_US);
 
-    capacity = qvirtio_config_readq(&qvirtio_mmio, &dev->vdev,
-                                                QVIRTIO_MMIO_DEVICE_SPECIFIC);
+    capacity = qvirtio_config_readq(&dev->vdev, 0);
     g_assert_cmpint(capacity, ==, n_size / 512);
 
     /* End test */
-    qvirtqueue_cleanup(&qvirtio_mmio, vq, alloc);
-    generic_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, vq, alloc);
     g_free(dev);
+    generic_alloc_uninit(alloc);
     test_end();
 }
 
@@ -757,12 +712,15 @@ int main(int argc, char **argv)
 
     g_test_init(&argc, &argv, NULL);
 
-    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0 ||
+        strcmp(arch, "ppc64") == 0) {
         qtest_add_func("/virtio/blk/pci/basic", pci_basic);
         qtest_add_func("/virtio/blk/pci/indirect", pci_indirect);
         qtest_add_func("/virtio/blk/pci/config", pci_config);
-        qtest_add_func("/virtio/blk/pci/msix", pci_msix);
-        qtest_add_func("/virtio/blk/pci/idx", pci_idx);
+        if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+            qtest_add_func("/virtio/blk/pci/msix", pci_msix);
+            qtest_add_func("/virtio/blk/pci/idx", pci_idx);
+        }
         qtest_add_func("/virtio/blk/pci/hotplug", pci_hotplug);
     } else if (strcmp(arch, "arm") == 0) {
         qtest_add_func("/virtio/blk/mmio/basic", mmio_basic);
diff --git a/tests/virtio-net-test.c b/tests/virtio-net-test.c
index 361506faf5..8f94360480 100644
--- a/tests/virtio-net-test.c
+++ b/tests/virtio-net-test.c
@@ -12,12 +12,10 @@
 #include "qemu-common.h"
 #include "qemu/sockets.h"
 #include "qemu/iov.h"
-#include "libqos/pci-pc.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
-#include "libqos/malloc.h"
-#include "libqos/malloc-pc.h"
-#include "libqos/malloc-generic.h"
 #include "qemu/bswap.h"
 #include "hw/virtio/virtio-net.h"
 #include "standard-headers/linux/virtio_ids.h"
@@ -46,39 +44,43 @@ static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
     g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
 
     qvirtio_pci_device_enable(dev);
-    qvirtio_reset(&qvirtio_pci, &dev->vdev);
-    qvirtio_set_acknowledge(&qvirtio_pci, &dev->vdev);
-    qvirtio_set_driver(&qvirtio_pci, &dev->vdev);
+    qvirtio_reset(&dev->vdev);
+    qvirtio_set_acknowledge(&dev->vdev);
+    qvirtio_set_driver(&dev->vdev);
 
     return dev;
 }
 
-static QPCIBus *pci_test_start(int socket)
+static QOSState *pci_test_start(int socket)
 {
-    char *cmdline;
-
-    cmdline = g_strdup_printf("-netdev socket,fd=%d,id=hs0 -device "
-                              "virtio-net-pci,netdev=hs0", socket);
-    qtest_start(cmdline);
-    g_free(cmdline);
-
-    return qpci_init_pc();
+    const char *arch = qtest_get_arch();
+    const char *cmd = "-netdev socket,fd=%d,id=hs0 -device "
+                      "virtio-net-pci,netdev=hs0";
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        return qtest_pc_boot(cmd, socket);
+    }
+    if (strcmp(arch, "ppc64") == 0) {
+        return qtest_spapr_boot(cmd, socket);
+    }
+    g_printerr("virtio-net tests are only available on x86 or ppc64\n");
+    exit(EXIT_FAILURE);
 }
 
-static void driver_init(const QVirtioBus *bus, QVirtioDevice *dev)
+static void driver_init(QVirtioDevice *dev)
 {
     uint32_t features;
 
-    features = qvirtio_get_features(bus, dev);
+    features = qvirtio_get_features(dev);
     features = features & ~(QVIRTIO_F_BAD_FEATURE |
                             (1u << VIRTIO_RING_F_INDIRECT_DESC) |
                             (1u << VIRTIO_RING_F_EVENT_IDX));
-    qvirtio_set_features(bus, dev, features);
+    qvirtio_set_features(dev, features);
 
-    qvirtio_set_driver_ok(bus, dev);
+    qvirtio_set_driver_ok(dev);
 }
 
-static void rx_test(const QVirtioBus *bus, QVirtioDevice *dev,
+static void rx_test(QVirtioDevice *dev,
                     QGuestAllocator *alloc, QVirtQueue *vq,
                     int socket)
 {
@@ -101,19 +103,19 @@ static void rx_test(const QVirtioBus *bus, QVirtioDevice *dev,
     req_addr = guest_alloc(alloc, 64);
 
     free_head = qvirtqueue_add(vq, req_addr, 64, true, false);
-    qvirtqueue_kick(bus, dev, vq, free_head);
+    qvirtqueue_kick(dev, vq, free_head);
 
     ret = iov_send(socket, iov, 2, 0, sizeof(len) + sizeof(test));
     g_assert_cmpint(ret, ==, sizeof(test) + sizeof(len));
 
-    qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_NET_TIMEOUT_US);
+    qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US);
     memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test));
     g_assert_cmpstr(buffer, ==, "TEST");
 
     guest_free(alloc, req_addr);
 }
 
-static void tx_test(const QVirtioBus *bus, QVirtioDevice *dev,
+static void tx_test(QVirtioDevice *dev,
                     QGuestAllocator *alloc, QVirtQueue *vq,
                     int socket)
 {
@@ -127,9 +129,9 @@ static void tx_test(const QVirtioBus *bus, QVirtioDevice *dev,
     memwrite(req_addr + VNET_HDR_SIZE, "TEST", 4);
 
     free_head = qvirtqueue_add(vq, req_addr, 64, false, false);
-    qvirtqueue_kick(bus, dev, vq, free_head);
+    qvirtqueue_kick(dev, vq, free_head);
 
-    qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_NET_TIMEOUT_US);
+    qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US);
     guest_free(alloc, req_addr);
 
     ret = qemu_recv(socket, &len, sizeof(len), 0);
@@ -140,7 +142,7 @@ static void tx_test(const QVirtioBus *bus, QVirtioDevice *dev,
     g_assert_cmpstr(buffer, ==, "TEST");
 }
 
-static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev,
+static void rx_stop_cont_test(QVirtioDevice *dev,
                               QGuestAllocator *alloc, QVirtQueue *vq,
                               int socket)
 {
@@ -164,7 +166,7 @@ static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev,
     req_addr = guest_alloc(alloc, 64);
 
     free_head = qvirtqueue_add(vq, req_addr, 64, true, false);
-    qvirtqueue_kick(bus, dev, vq, free_head);
+    qvirtqueue_kick(dev, vq, free_head);
 
     rsp = qmp("{ 'execute' : 'stop'}");
     QDECREF(rsp);
@@ -180,36 +182,34 @@ static void rx_stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev,
     rsp = qmp("{ 'execute' : 'cont'}");
     QDECREF(rsp);
 
-    qvirtio_wait_queue_isr(bus, dev, vq, QVIRTIO_NET_TIMEOUT_US);
+    qvirtio_wait_queue_isr(dev, vq, QVIRTIO_NET_TIMEOUT_US);
     memread(req_addr + VNET_HDR_SIZE, buffer, sizeof(test));
     g_assert_cmpstr(buffer, ==, "TEST");
 
     guest_free(alloc, req_addr);
 }
 
-static void send_recv_test(const QVirtioBus *bus, QVirtioDevice *dev,
+static void send_recv_test(QVirtioDevice *dev,
                            QGuestAllocator *alloc, QVirtQueue *rvq,
                            QVirtQueue *tvq, int socket)
 {
-    rx_test(bus, dev, alloc, rvq, socket);
-    tx_test(bus, dev, alloc, tvq, socket);
+    rx_test(dev, alloc, rvq, socket);
+    tx_test(dev, alloc, tvq, socket);
 }
 
-static void stop_cont_test(const QVirtioBus *bus, QVirtioDevice *dev,
+static void stop_cont_test(QVirtioDevice *dev,
                            QGuestAllocator *alloc, QVirtQueue *rvq,
                            QVirtQueue *tvq, int socket)
 {
-    rx_stop_cont_test(bus, dev, alloc, rvq, socket);
+    rx_stop_cont_test(dev, alloc, rvq, socket);
 }
 
 static void pci_basic(gconstpointer data)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
+    QOSState *qs;
     QVirtQueuePCI *tx, *rx;
-    QGuestAllocator *alloc;
-    void (*func) (const QVirtioBus *bus,
-                  QVirtioDevice *dev,
+    void (*func) (QVirtioDevice *dev,
                   QGuestAllocator *alloc,
                   QVirtQueue *rvq,
                   QVirtQueue *tvq,
@@ -219,37 +219,37 @@ static void pci_basic(gconstpointer data)
     ret = socketpair(PF_UNIX, SOCK_STREAM, 0, sv);
     g_assert_cmpint(ret, !=, -1);
 
-    bus = pci_test_start(sv[1]);
-    dev = virtio_net_pci_init(bus, PCI_SLOT);
+    qs = pci_test_start(sv[1]);
+    dev = virtio_net_pci_init(qs->pcibus, PCI_SLOT);
 
-    alloc = pc_alloc_init();
-    rx = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                           alloc, 0);
-    tx = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
-                                           alloc, 1);
+    rx = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 0);
+    tx = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, qs->alloc, 1);
 
-    driver_init(&qvirtio_pci, &dev->vdev);
-    func(&qvirtio_pci, &dev->vdev, alloc, &rx->vq, &tx->vq, sv[0]);
+    driver_init(&dev->vdev);
+    func(&dev->vdev, qs->alloc, &rx->vq, &tx->vq, sv[0]);
 
     /* End test */
     close(sv[0]);
-    qvirtqueue_cleanup(&qvirtio_pci, &tx->vq, alloc);
-    qvirtqueue_cleanup(&qvirtio_pci, &rx->vq, alloc);
-    pc_alloc_uninit(alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &tx->vq, qs->alloc);
+    qvirtqueue_cleanup(dev->vdev.bus, &rx->vq, qs->alloc);
     qvirtio_pci_device_disable(dev);
     g_free(dev->pdev);
     g_free(dev);
-    qpci_free_pc(bus);
-    test_end();
+    qtest_shutdown(qs);
 }
 #endif
 
 static void hotplug(void)
 {
+    const char *arch = qtest_get_arch();
+
     qtest_start("-device virtio-net-pci");
 
     qpci_plug_device_test("virtio-net-pci", "net1", PCI_SLOT_HP, NULL);
-    qpci_unplug_acpi_device_test("net1", PCI_SLOT_HP);
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        qpci_unplug_acpi_device_test("net1", PCI_SLOT_HP);
+    }
 
     test_end();
 }
diff --git a/tests/virtio-rng-test.c b/tests/virtio-rng-test.c
index e1b26401f9..dcecf77463 100644
--- a/tests/virtio-rng-test.c
+++ b/tests/virtio-rng-test.c
@@ -20,8 +20,13 @@ static void pci_nop(void)
 
 static void hotplug(void)
 {
+    const char *arch = qtest_get_arch();
+
     qpci_plug_device_test("virtio-rng-pci", "rng1", PCI_SLOT_HP, NULL);
-    qpci_unplug_acpi_device_test("rng1", PCI_SLOT_HP);
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        qpci_unplug_acpi_device_test("rng1", PCI_SLOT_HP);
+    }
 }
 
 int main(int argc, char **argv)
diff --git a/tests/virtio-scsi-test.c b/tests/virtio-scsi-test.c
index f1489e68a0..69220ef07b 100644
--- a/tests/virtio-scsi-test.c
+++ b/tests/virtio-scsi-test.c
@@ -11,12 +11,10 @@
 #include "qemu/osdep.h"
 #include "libqtest.h"
 #include "block/scsi.h"
+#include "libqos/libqos-pc.h"
+#include "libqos/libqos-spapr.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
-#include "libqos/pci-pc.h"
-#include "libqos/malloc.h"
-#include "libqos/malloc-pc.h"
-#include "libqos/malloc-generic.h"
 #include "standard-headers/linux/virtio_ids.h"
 #include "standard-headers/linux/virtio_pci.h"
 #include "standard-headers/linux/virtio_scsi.h"
@@ -29,28 +27,32 @@
 
 typedef struct {
     QVirtioDevice *dev;
-    QGuestAllocator *alloc;
-    QPCIBus *bus;
+    QOSState *qs;
     int num_queues;
     QVirtQueue *vq[MAX_NUM_QUEUES + 2];
 } QVirtIOSCSI;
 
-static void qvirtio_scsi_start(const char *extra_opts)
+static QOSState *qvirtio_scsi_start(const char *extra_opts)
 {
-    char *cmdline;
-
-    cmdline = g_strdup_printf(
-                "-drive id=drv0,if=none,file=/dev/null,format=raw "
-                "-device virtio-scsi-pci,id=vs0 "
-                "-device scsi-hd,bus=vs0.0,drive=drv0 %s",
-                extra_opts ? : "");
-    qtest_start(cmdline);
-    g_free(cmdline);
+    const char *arch = qtest_get_arch();
+    const char *cmd = "-drive id=drv0,if=none,file=/dev/null,format=raw "
+                      "-device virtio-scsi-pci,id=vs0 "
+                      "-device scsi-hd,bus=vs0.0,drive=drv0 %s";
+
+    if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
+        return qtest_pc_boot(cmd, extra_opts ? : "");
+    }
+    if (strcmp(arch, "ppc64") == 0) {
+        return qtest_spapr_boot(cmd, extra_opts ? : "");
+    }
+
+    g_printerr("virtio-scsi tests are only available on x86 or ppc64\n");
+    exit(EXIT_FAILURE);
 }
 
-static void qvirtio_scsi_stop(void)
+static void qvirtio_scsi_stop(QOSState *qs)
 {
-    qtest_end();
+    qtest_shutdown(qs);
 }
 
 static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
@@ -58,12 +60,12 @@ static void qvirtio_scsi_pci_free(QVirtIOSCSI *vs)
     int i;
 
     for (i = 0; i < vs->num_queues + 2; i++) {
-        qvirtqueue_cleanup(&qvirtio_pci, vs->vq[i], vs->alloc);
+        qvirtqueue_cleanup(vs->dev->bus, vs->vq[i], vs->qs->alloc);
     }
-    pc_alloc_uninit(vs->alloc);
     qvirtio_pci_device_disable(container_of(vs->dev, QVirtioPCIDevice, vdev));
     g_free(vs->dev);
-    qpci_free_pc(vs->bus);
+    qvirtio_scsi_stop(vs->qs);
+    g_free(vs);
 }
 
 static uint64_t qvirtio_scsi_alloc(QVirtIOSCSI *vs, size_t alloc_size,
@@ -71,7 +73,7 @@ static uint64_t qvirtio_scsi_alloc(QVirtIOSCSI *vs, size_t alloc_size,
 {
     uint64_t addr;
 
-    addr = guest_alloc(vs->alloc, alloc_size);
+    addr = guest_alloc(vs->qs->alloc, alloc_size);
     if (data) {
         memwrite(addr, data, alloc_size);
     }
@@ -118,8 +120,8 @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
         qvirtqueue_add(vq, data_in_addr, data_in_len, true, false);
     }
 
-    qvirtqueue_kick(&qvirtio_pci, vs->dev, vq, free_head);
-    qvirtio_wait_queue_isr(&qvirtio_pci, vs->dev, vq, QVIRTIO_SCSI_TIMEOUT_US);
+    qvirtqueue_kick(vs->dev, vq, free_head);
+    qvirtio_wait_queue_isr(vs->dev, vq, QVIRTIO_SCSI_TIMEOUT_US);
 
     response = readb(resp_addr +
                      offsetof(struct virtio_scsi_cmd_resp, response));
@@ -128,10 +130,10 @@ static uint8_t virtio_scsi_do_command(QVirtIOSCSI *vs, const uint8_t *cdb,
         memread(resp_addr, resp_out, sizeof(*resp_out));
     }
 
-    guest_free(vs->alloc, req_addr);
-    guest_free(vs->alloc, resp_addr);
-    guest_free(vs->alloc, data_in_addr);
-    guest_free(vs->alloc, data_out_addr);
+    guest_free(vs->qs->alloc, req_addr);
+    guest_free(vs->qs->alloc, resp_addr);
+    guest_free(vs->qs->alloc, data_in_addr);
+    guest_free(vs->qs->alloc, data_out_addr);
     return response;
 }
 
@@ -141,31 +143,29 @@ static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
     QVirtIOSCSI *vs;
     QVirtioPCIDevice *dev;
     struct virtio_scsi_cmd_resp resp;
-    void *addr;
     int i;
 
     vs = g_new0(QVirtIOSCSI, 1);
-    vs->alloc = pc_alloc_init();
-    vs->bus = qpci_init_pc();
 
-    dev = qvirtio_pci_device_find(vs->bus, VIRTIO_ID_SCSI);
+    vs->qs = qvirtio_scsi_start("-drive file=blkdebug::null-co://,"
+                                "if=none,id=dr1,format=raw,file.align=4k "
+                                "-device scsi-disk,drive=dr1,lun=0,scsi-id=1");
+    dev = qvirtio_pci_device_find(vs->qs->pcibus, VIRTIO_ID_SCSI);
     vs->dev = (QVirtioDevice *)dev;
     g_assert(dev != NULL);
     g_assert_cmphex(vs->dev->device_type, ==, VIRTIO_ID_SCSI);
 
     qvirtio_pci_device_enable(dev);
-    qvirtio_reset(&qvirtio_pci, vs->dev);
-    qvirtio_set_acknowledge(&qvirtio_pci, vs->dev);
-    qvirtio_set_driver(&qvirtio_pci, vs->dev);
+    qvirtio_reset(vs->dev);
+    qvirtio_set_acknowledge(vs->dev);
+    qvirtio_set_driver(vs->dev);
 
-    addr = dev->addr + VIRTIO_PCI_CONFIG_OFF(false);
-    vs->num_queues = qvirtio_config_readl(&qvirtio_pci, vs->dev,
-                                          (uint64_t)(uintptr_t)addr);
+    vs->num_queues = qvirtio_config_readl(vs->dev, 0);
 
     g_assert_cmpint(vs->num_queues, <, MAX_NUM_QUEUES);
 
     for (i = 0; i < vs->num_queues + 2; i++) {
-        vs->vq[i] = qvirtqueue_setup(&qvirtio_pci, vs->dev, vs->alloc, i);
+        vs->vq[i] = qvirtqueue_setup(vs->dev, vs->qs->alloc, i);
     }
 
     /* Clear the POWER ON OCCURRED unit attention */
@@ -184,15 +184,18 @@ static QVirtIOSCSI *qvirtio_scsi_pci_init(int slot)
 /* Tests only initialization so far. TODO: Replace with functional tests */
 static void pci_nop(void)
 {
-    qvirtio_scsi_start(NULL);
-    qvirtio_scsi_stop();
+    QOSState *qs;
+
+    qs = qvirtio_scsi_start(NULL);
+    qvirtio_scsi_stop(qs);
 }
 
 static void hotplug(void)
 {
     QDict *response;
+    QOSState *qs;
 
-    qvirtio_scsi_start("-drive id=drv1,if=none,file=/dev/null,format=raw");
+    qs = qvirtio_scsi_start("-drive id=drv1,if=none,file=/dev/null,format=raw");
     response = qmp("{\"execute\": \"device_add\","
                    " \"arguments\": {"
                    "   \"driver\": \"scsi-hd\","
@@ -214,7 +217,7 @@ static void hotplug(void)
     g_assert(qdict_haskey(response, "event"));
     g_assert(!strcmp(qdict_get_str(response, "event"), "DEVICE_DELETED"));
     QDECREF(response);
-    qvirtio_scsi_stop();
+    qvirtio_scsi_stop(qs);
 }
 
 /* Test WRITE SAME with the lba not aligned */
@@ -230,9 +233,6 @@ static void test_unaligned_write_same(void)
         0x41, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x33, 0x00, 0x00
     };
 
-    qvirtio_scsi_start("-drive file=blkdebug::null-co://,if=none,id=dr1"
-                       ",format=raw,file.align=4k "
-                       "-device scsi-disk,drive=dr1,lun=0,scsi-id=1");
     vs = qvirtio_scsi_pci_init(PCI_SLOT);
 
     g_assert_cmphex(0, ==,
@@ -242,7 +242,6 @@ static void test_unaligned_write_same(void)
         virtio_scsi_do_command(vs, write_same_cdb_2, NULL, 0, buf2, 512, NULL));
 
     qvirtio_scsi_pci_free(vs);
-    qvirtio_scsi_stop();
 }
 
 int main(int argc, char **argv)
diff --git a/trace-events b/trace-events
index 616cc52378..f74e1d3d22 100644
--- a/trace-events
+++ b/trace-events
@@ -37,10 +37,6 @@ cpu_out(unsigned int addr, char size, unsigned int val) "addr %#x(%c) value %u"
 # balloon.c
 # Since requests are raised via monitor, not many tracepoints are needed.
 balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
-virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s gpa: %"PRIx64
-virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d"
-virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d"
-virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
 
 # vl.c
 vm_state_notify(int running, int reason) "running %d reason %d"
@@ -83,22 +79,8 @@ xen_map_cache(uint64_t phys_addr) "want %#"PRIx64
 xen_remap_bucket(uint64_t index) "index %#"PRIx64
 xen_map_cache_return(void* ptr) "%p"
 
-# qemu-coroutine.c
-qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
-qemu_coroutine_yield(void *from, void *to) "from %p to %p"
-qemu_coroutine_terminate(void *co) "self %p"
-
-# qemu-coroutine-lock.c
-qemu_co_queue_run_restart(void *co) "co %p"
-qemu_co_queue_next(void *nxt) "next %p"
-qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
-qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
-qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
-qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"
-
 # monitor.c
 handle_qmp_command(void *mon, const char *cmd_name) "mon %p cmd_name \"%s\""
-monitor_protocol_emitter(void *mon) "mon %p"
 monitor_protocol_event_handler(uint32_t event, void *qdict) "event=%d data=%p"
 monitor_protocol_event_emit(uint32_t event, void *data) "event=%d data=%p"
 monitor_protocol_event_queue(uint32_t event, void *qdict, uint64_t rate) "event=%d data=%p rate=%" PRId64
@@ -139,9 +121,26 @@ memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t va
 memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset %#"PRIx64" value %#"PRIx64" size %u"
 memory_region_tb_read(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
 memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned size) "cpu %d addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
+memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
 
 ### Guest events, keep at bottom
 
+
+## vCPU
+
+# Hot-plug a new virtual (guest) CPU
+#
+# Mode: user, softmmu
+# Targets: all
+vcpu guest_cpu_enter(void)
+
+# Reset the state of a virtual (guest) CPU
+#
+# Mode: user, softmmu
+# Targets: all
+vcpu guest_cpu_reset(void)
+
 # @vaddr: Access' virtual address.
 # @info : Access' information (see below).
 #
@@ -158,6 +157,7 @@ memory_region_tb_write(int cpu_index, uint64_t addr, uint64_t value, unsigned si
 #     bool    store      : 1; /* wheter it's a store operation */
 # };
 #
+# Mode: user, softmmu
 # Targets: TCG(all)
 disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
 
@@ -166,6 +166,7 @@ disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x
 #
 # Start executing a guest system call in syscall emulation mode.
 #
+# Mode: user
 # Targets: TCG(all)
 disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
 
@@ -174,5 +175,6 @@ disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint
 #
 # Finish executing a guest system call in syscall emulation mode.
 #
+# Mode: user
 # Targets: TCG(all)
 disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" ret=0x%016"PRIx64
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index 4d91b3b833..1e1ce7479d 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -22,7 +22,7 @@ $(obj)/generated-ust-provider.h-timestamp: $(BUILD_DIR)/trace-events-all $(trace
 	$(call quiet-command,$(TRACETOOL) \
 		--format=ust-events-h \
 		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-ust.c: $(obj)/generated-ust.c-timestamp $(BUILD_DIR)/config-host.mak
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
@@ -30,34 +30,13 @@ $(obj)/generated-ust.c-timestamp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
 	$(call quiet-command,$(TRACETOOL) \
 		--format=ust-events-c \
 		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
-$(obj)/generated-events.h: $(obj)/generated-ust-provider.h
-$(obj)/generated-events.c: $(obj)/generated-ust.c
+$(obj)/generated-tracers.h: $(obj)/generated-ust-provider.h
+$(obj)/generated-tracers.c: $(obj)/generated-ust.c
 
 endif
 
-######################################################################
-# Auto-generated event descriptions
-
-$(obj)/generated-events.h: $(obj)/generated-events.h-timestamp
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-events.h-timestamp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--format=events-h \
-		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
-
-$(obj)/generated-events.c: $(obj)/generated-events.c-timestamp $(BUILD_DIR)/config-host.mak
-	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
-$(obj)/generated-events.c-timestamp: $(BUILD_DIR)/trace-events-all $(tracetool-y)
-	$(call quiet-command,$(TRACETOOL) \
-		--format=events-c \
-		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
-
-util-obj-y += generated-events.o
-
 
 ######################################################################
 # Auto-generated tracing routines
@@ -71,7 +50,7 @@ $(obj)/generated-tracers.h-timestamp: $(BUILD_DIR)/trace-events-all $(BUILD_DIR)
 	$(call quiet-command,$(TRACETOOL) \
 		--format=h \
 		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 ##############################
 # non-DTrace
@@ -82,7 +61,7 @@ $(obj)/generated-tracers.c-timestamp: $(BUILD_DIR)/trace-events-all $(BUILD_DIR)
 	$(call quiet-command,$(TRACETOOL) \
 		--format=c \
 		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.h
 
@@ -100,10 +79,10 @@ $(obj)/generated-tracers-dtrace.dtrace-timestamp: $(BUILD_DIR)/trace-events-all
 	$(call quiet-command,$(TRACETOOL) \
 		--format=d \
 		--backends=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-tracers-dtrace.h: $(obj)/generated-tracers-dtrace.dtrace
-	$(call quiet-command,dtrace -o $@ -h -s $<, "  GEN   $@")
+	$(call quiet-command,dtrace -o $@ -h -s $<,"GEN","$@")
 
 $(obj)/generated-tracers-dtrace.o: $(obj)/generated-tracers-dtrace.dtrace
 
@@ -119,7 +98,7 @@ $(obj)/generated-helpers-wrappers.h-timestamp: $(BUILD_DIR)/trace-events-all $(B
 	$(call quiet-command,$(TRACETOOL) \
 		--format=tcg-helper-wrapper-h \
 		--backend=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.h: $(obj)/generated-helpers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
@@ -127,7 +106,7 @@ $(obj)/generated-helpers.h-timestamp: $(BUILD_DIR)/trace-events-all $(BUILD_DIR)
 	$(call quiet-command,$(TRACETOOL) \
 		--format=tcg-helper-h \
 		--backend=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.c: $(obj)/generated-helpers.c-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
@@ -135,7 +114,7 @@ $(obj)/generated-helpers.c-timestamp: $(BUILD_DIR)/trace-events-all $(BUILD_DIR)
 	$(call quiet-command,$(TRACETOOL) \
 		--format=tcg-helper-c \
 		--backend=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.o: $(obj)/generated-helpers.c
 
@@ -148,13 +127,14 @@ $(obj)/generated-tcg-tracers.h-timestamp: $(BUILD_DIR)/trace-events-all $(BUILD_
 	$(call quiet-command,$(TRACETOOL) \
 		--format=tcg-h \
 		--backend=$(TRACE_BACKENDS) \
-		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+		$< > $@,"GEN","$(patsubst %-timestamp,%,$@)")
 
 
 ######################################################################
 # Backend code
 
-util-obj-$(CONFIG_TRACE_SIMPLE) += simple.o generated-tracers.o
+util-obj-y += generated-tracers.o
+util-obj-$(CONFIG_TRACE_SIMPLE) += simple.o
 util-obj-$(CONFIG_TRACE_FTRACE) += ftrace.o
 util-obj-$(CONFIG_TRACE_UST) += generated-ust.o
 util-obj-y += control.o
diff --git a/trace/control-internal.h b/trace/control-internal.h
index a4e5f4aa06..a9d395a587 100644
--- a/trace/control-internal.h
+++ b/trace/control-internal.h
@@ -15,42 +15,29 @@
 #include "qom/cpu.h"
 
 
-extern TraceEvent trace_events[];
-extern uint16_t trace_events_dstate[];
 extern int trace_events_enabled_count;
 
 
-static inline TraceEventID trace_event_count(void)
-{
-    return TRACE_EVENT_COUNT;
-}
-
-static inline TraceEvent *trace_event_id(TraceEventID id)
-{
-    assert(id < trace_event_count());
-    return &trace_events[id];
-}
-
 static inline bool trace_event_is_pattern(const char *str)
 {
     assert(str != NULL);
     return strchr(str, '*') != NULL;
 }
 
-static inline TraceEventID trace_event_get_id(TraceEvent *ev)
+static inline uint32_t trace_event_get_id(TraceEvent *ev)
 {
     assert(ev != NULL);
     return ev->id;
 }
 
-static inline TraceEventVCPUID trace_event_get_vcpu_id(TraceEvent *ev)
+static inline uint32_t trace_event_get_vcpu_id(TraceEvent *ev)
 {
     return ev->vcpu_id;
 }
 
 static inline bool trace_event_is_vcpu(TraceEvent *ev)
 {
-    return ev->vcpu_id != TRACE_VCPU_EVENT_COUNT;
+    return ev->vcpu_id != TRACE_VCPU_EVENT_NONE;
 }
 
 static inline const char * trace_event_get_name(TraceEvent *ev)
@@ -65,26 +52,22 @@ static inline bool trace_event_get_state_static(TraceEvent *ev)
     return ev->sstate;
 }
 
-static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
-{
-    /* it's on fast path, avoid consistency checks (asserts) */
-    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
-}
+/* it's on fast path, avoid consistency checks (asserts) */
+#define trace_event_get_state_dynamic_by_id(id) \
+    (unlikely(trace_events_enabled_count) && _ ## id ## _DSTATE)
 
 static inline bool trace_event_get_state_dynamic(TraceEvent *ev)
 {
-    TraceEventID id;
-    assert(trace_event_get_state_static(ev));
-    id = trace_event_get_id(ev);
-    return trace_event_get_state_dynamic_by_id(id);
+    return unlikely(trace_events_enabled_count) && *ev->dstate;
 }
 
-static inline bool trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
-                                                                 TraceEventVCPUID id)
+static inline bool
+trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
+                                              uint32_t vcpu_id)
 {
     /* it's on fast path, avoid consistency checks (asserts) */
     if (unlikely(trace_events_enabled_count)) {
-        return test_bit(id, vcpu->trace_dstate);
+        return test_bit(vcpu_id, vcpu->trace_dstate);
     } else {
         return false;
     }
@@ -93,10 +76,13 @@ static inline bool trace_event_get_vcpu_state_dynamic_by_vcpu_id(CPUState *vcpu,
 static inline bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu,
                                                       TraceEvent *ev)
 {
-    TraceEventVCPUID id;
+    uint32_t vcpu_id;
     assert(trace_event_is_vcpu(ev));
-    id = trace_event_get_vcpu_id(ev);
-    return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, id);
+    vcpu_id = trace_event_get_vcpu_id(ev);
+    return trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id);
 }
 
+
+void trace_event_register_group(TraceEvent **events);
+
 #endif /* TRACE__CONTROL_INTERNAL_H */
diff --git a/trace/control-target.c b/trace/control-target.c
index 74c029accc..7ebf6e0bcb 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -9,10 +9,31 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "trace.h"
 #include "trace/control.h"
 #include "translate-all.h"
 
 
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state)
+{
+    bool state_pre;
+    assert(trace_event_get_state_static(ev));
+    /*
+     * We ignore the "vcpu" property here, since no vCPUs have been created
+     * yet. Then dstate can only be 1 or 0.
+     */
+    state_pre = *ev->dstate;
+    if (state_pre != state) {
+        if (state) {
+            trace_events_enabled_count++;
+            *ev->dstate = 1;
+        } else {
+            trace_events_enabled_count--;
+            *ev->dstate = 0;
+        }
+    }
+}
+
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
 {
     CPUState *vcpu;
@@ -22,32 +43,76 @@ void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
             trace_event_set_vcpu_state_dynamic(vcpu, ev, state);
         }
     } else {
-        TraceEventID id = trace_event_get_id(ev);
-        trace_events_enabled_count += state - trace_events_dstate[id];
-        trace_events_dstate[id] = state;
+        /* Without the "vcpu" property, dstate can only be 1 or 0 */
+        bool state_pre = *ev->dstate;
+        if (state_pre != state) {
+            if (state) {
+                trace_events_enabled_count++;
+                *ev->dstate = 1;
+            } else {
+                trace_events_enabled_count--;
+                *ev->dstate = 0;
+            }
+        }
     }
 }
 
 void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
                                         TraceEvent *ev, bool state)
 {
-    TraceEventID id;
-    TraceEventVCPUID vcpu_id;
+    uint32_t vcpu_id;
     bool state_pre;
     assert(trace_event_get_state_static(ev));
     assert(trace_event_is_vcpu(ev));
-    id = trace_event_get_id(ev);
     vcpu_id = trace_event_get_vcpu_id(ev);
     state_pre = test_bit(vcpu_id, vcpu->trace_dstate);
     if (state_pre != state) {
         if (state) {
             trace_events_enabled_count++;
             set_bit(vcpu_id, vcpu->trace_dstate);
-            trace_events_dstate[id]++;
+            (*ev->dstate)++;
         } else {
             trace_events_enabled_count--;
             clear_bit(vcpu_id, vcpu->trace_dstate);
-            trace_events_dstate[id]--;
+            (*ev->dstate)--;
+        }
+    }
+}
+
+static bool adding_first_cpu(void)
+{
+    CPUState *cpu;
+    size_t count = 0;
+    CPU_FOREACH(cpu) {
+        count++;
+        if (count > 1) {
+            return false;
+        }
+    }
+    return true;
+}
+
+void trace_init_vcpu(CPUState *vcpu)
+{
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (trace_event_is_vcpu(ev) &&
+            trace_event_get_state_static(ev) &&
+            trace_event_get_state_dynamic(ev)) {
+            if (adding_first_cpu()) {
+                /* check preconditions */
+                assert(*ev->dstate == 1);
+                /* disable early-init state ... */
+                *ev->dstate = 0;
+                trace_events_enabled_count--;
+                /* ... and properly re-enable */
+                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
+            } else {
+                trace_event_set_vcpu_state_dynamic(vcpu, ev, true);
+            }
         }
     }
+    trace_guest_cpu_enter(vcpu);
 }
diff --git a/trace/control.c b/trace/control.c
index d173c09f44..1a7bee6ddc 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -19,20 +19,24 @@
 #ifdef CONFIG_TRACE_LOG
 #include "qemu/log.h"
 #endif
+#ifdef CONFIG_TRACE_SYSLOG
+#include <syslog.h>
+#endif
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/config-file.h"
 #include "monitor/monitor.h"
 
 int trace_events_enabled_count;
-/*
- * Interpretation depends on wether the event has the 'vcpu' property:
- * - false: Boolean value indicating whether the event is active.
- * - true : Integral counting the number of vCPUs that have this event enabled.
- */
-uint16_t trace_events_dstate[TRACE_EVENT_COUNT];
-/* Marks events for late vCPU state init */
-static bool trace_events_dstate_init[TRACE_EVENT_COUNT];
+
+typedef struct TraceEventGroup {
+    TraceEvent **events;
+} TraceEventGroup;
+
+static TraceEventGroup *event_groups;
+static size_t nevent_groups;
+static uint32_t next_id;
+static uint32_t next_vcpu_id;
 
 QemuOptsList qemu_trace_opts = {
     .name = "trace",
@@ -55,13 +59,29 @@ QemuOptsList qemu_trace_opts = {
 };
 
 
+void trace_event_register_group(TraceEvent **events)
+{
+    size_t i;
+    for (i = 0; events[i] != NULL; i++) {
+        events[i]->id = next_id++;
+        if (events[i]->vcpu_id != TRACE_VCPU_EVENT_NONE) {
+            events[i]->vcpu_id = next_vcpu_id++;
+        }
+    }
+    event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
+    event_groups[nevent_groups].events = events;
+    nevent_groups++;
+}
+
+
 TraceEvent *trace_event_name(const char *name)
 {
     assert(name != NULL);
 
-    TraceEventID i;
-    for (i = 0; i < trace_event_count(); i++) {
-        TraceEvent *ev = trace_event_id(i);
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (strcmp(trace_event_get_name(ev), name) == 0) {
             return ev;
         }
@@ -100,25 +120,29 @@ static bool pattern_glob(const char *pat, const char *ev)
     }
 }
 
-TraceEvent *trace_event_pattern(const char *pat, TraceEvent *ev)
-{
-    assert(pat != NULL);
 
-    TraceEventID i;
-
-    if (ev == NULL) {
-        i = -1;
-    } else {
-        i = trace_event_get_id(ev);
-    }
-    i++;
+void trace_event_iter_init(TraceEventIter *iter, const char *pattern)
+{
+    iter->event = 0;
+    iter->group = 0;
+    iter->pattern = pattern;
+}
 
-    while (i < trace_event_count()) {
-        TraceEvent *res = trace_event_id(i);
-        if (pattern_glob(pat, trace_event_get_name(res))) {
-            return res;
+TraceEvent *trace_event_iter_next(TraceEventIter *iter)
+{
+    while (iter->group < nevent_groups &&
+           event_groups[iter->group].events[iter->event] != NULL) {
+        TraceEvent *ev = event_groups[iter->group].events[iter->event];
+        iter->event++;
+        if (event_groups[iter->group].events[iter->event] == NULL) {
+            iter->event = 0;
+            iter->group++;
+        }
+        if (!iter->pattern ||
+            pattern_glob(iter->pattern,
+                         trace_event_get_name(ev))) {
+            return ev;
         }
-        i++;
     }
 
     return NULL;
@@ -126,10 +150,11 @@ TraceEvent *trace_event_pattern(const char *pat, TraceEvent *ev)
 
 void trace_list_events(void)
 {
-    int i;
-    for (i = 0; i < trace_event_count(); i++) {
-        TraceEvent *res = trace_event_id(i);
-        fprintf(stderr, "%s\n", trace_event_get_name(res));
+    TraceEventIter iter;
+    TraceEvent *ev;
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        fprintf(stderr, "%s\n", trace_event_get_name(ev));
     }
 }
 
@@ -137,32 +162,32 @@ static void do_trace_enable_events(const char *line_buf)
 {
     const bool enable = ('-' != line_buf[0]);
     const char *line_ptr = enable ? line_buf : line_buf + 1;
+    TraceEventIter iter;
+    TraceEvent *ev;
+    bool is_pattern = trace_event_is_pattern(line_ptr);
 
-    if (trace_event_is_pattern(line_ptr)) {
-        TraceEvent *ev = NULL;
-        while ((ev = trace_event_pattern(line_ptr, ev)) != NULL) {
-            if (trace_event_get_state_static(ev)) {
-                /* start tracing */
-                trace_event_set_state_dynamic(ev, enable);
-                /* mark for late vCPU init */
-                trace_events_dstate_init[ev->id] = true;
+    trace_event_iter_init(&iter, line_ptr);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        if (!trace_event_get_state_static(ev)) {
+            if (!is_pattern) {
+                error_report("WARNING: trace event '%s' is not traceable",
+                             line_ptr);
+                return;
             }
+            continue;
         }
-    } else {
-        TraceEvent *ev = trace_event_name(line_ptr);
-        if (ev == NULL) {
-            error_report("WARNING: trace event '%s' does not exist",
-                         line_ptr);
-        } else if (!trace_event_get_state_static(ev)) {
-            error_report("WARNING: trace event '%s' is not traceable",
-                         line_ptr);
-        } else {
-            /* start tracing */
-            trace_event_set_state_dynamic(ev, enable);
-            /* mark for late vCPU init */
-            trace_events_dstate_init[ev->id] = true;
+
+        /* start tracing */
+        trace_event_set_state_dynamic(ev, enable);
+        if (!is_pattern) {
+            return;
         }
     }
+
+    if (!is_pattern) {
+        error_report("WARNING: trace event '%s' does not exist",
+                     line_ptr);
+    }
 }
 
 void trace_enable_events(const char *line_buf)
@@ -250,6 +275,10 @@ bool trace_init_backends(void)
     }
 #endif
 
+#ifdef CONFIG_TRACE_SYSLOG
+    openlog(NULL, LOG_PID, LOG_DAEMON);
+#endif
+
     return true;
 }
 
@@ -271,14 +300,7 @@ char *trace_opt_parse(const char *optarg)
     return trace_file;
 }
 
-void trace_init_vcpu_events(void)
+uint32_t trace_get_vcpu_event_count(void)
 {
-    TraceEvent *ev = NULL;
-    while ((ev = trace_event_pattern("*", ev)) != NULL) {
-        if (trace_event_is_vcpu(ev) &&
-            trace_event_get_state_static(ev) &&
-            trace_events_dstate_init[ev->id]) {
-            trace_event_set_state_dynamic(ev, true);
-        }
-    }
+    return next_vcpu_id;
 }
diff --git a/trace/control.h b/trace/control.h
index 0413b28769..ccaeac8552 100644
--- a/trace/control.h
+++ b/trace/control.h
@@ -11,35 +11,37 @@
 #define TRACE__CONTROL_H
 
 #include "qemu-common.h"
-#include "trace/generated-events.h"
+#include "event-internal.h"
+
+typedef struct TraceEventIter {
+    size_t event;
+    size_t group;
+    const char *pattern;
+} TraceEventIter;
 
 
 /**
- * TraceEventID:
- *
- * Unique tracing event identifier.
- *
- * These are named as 'TRACE_${EVENT_NAME}'.
+ * trace_event_iter_init:
+ * @iter: the event iterator struct
+ * @pattern: optional pattern to filter events on name
  *
- * See also: "trace/generated-events.h"
+ * Initialize the event iterator struct @iter,
+ * optionally using @pattern to filter out events
+ * with non-matching names.
  */
-enum TraceEventID;
+void trace_event_iter_init(TraceEventIter *iter, const char *pattern);
 
 /**
- * trace_event_id:
- * @id: Event identifier.
- *
- * Get an event by its identifier.
+ * trace_event_iter_next:
+ * @iter: the event iterator struct
  *
- * This routine has a constant cost, as opposed to trace_event_name and
- * trace_event_pattern.
- *
- * Pre-conditions: The identifier is valid.
- *
- * Returns: pointer to #TraceEvent.
+ * Get the next event, if any. When this returns NULL,
+ * the iterator should no longer be used.
  *
+ * Returns: the next event, or NULL if no more events exist
  */
-static TraceEvent *trace_event_id(TraceEventID id);
+TraceEvent *trace_event_iter_next(TraceEventIter *iter);
+
 
 /**
  * trace_event_name:
@@ -51,17 +53,6 @@ static TraceEvent *trace_event_id(TraceEventID id);
  */
 TraceEvent *trace_event_name(const char *name);
 
-/**
- * trace_event_pattern:
- * @pat: Event name pattern.
- * @ev: Event to start searching from (not included).
- *
- * Get all events with a given name pattern.
- *
- * Returns: pointer to #TraceEvent or NULL if not found.
- */
-TraceEvent *trace_event_pattern(const char *pat, TraceEvent *ev);
-
 /**
  * trace_event_is_pattern:
  *
@@ -69,31 +60,23 @@ TraceEvent *trace_event_pattern(const char *pat, TraceEvent *ev);
  */
 static bool trace_event_is_pattern(const char *str);
 
-/**
- * trace_event_count:
- *
- * Return the number of events.
- */
-static TraceEventID trace_event_count(void);
-
-
 
 /**
  * trace_event_get_id:
  *
  * Get the identifier of an event.
  */
-static TraceEventID trace_event_get_id(TraceEvent *ev);
+static uint32_t trace_event_get_id(TraceEvent *ev);
 
 /**
  * trace_event_get_vcpu_id:
  *
  * Get the per-vCPU identifier of an event.
  *
- * Special value #TRACE_VCPU_EVENT_COUNT means the event is not vCPU-specific
+ * Special value #TRACE_VCPU_EVENT_NONE means the event is not vCPU-specific
  * (does not have the "vcpu" property).
  */
-static TraceEventVCPUID trace_event_get_vcpu_id(TraceEvent *ev);
+static uint32_t trace_event_get_vcpu_id(TraceEvent *ev);
 
 /**
  * trace_event_is_vcpu:
@@ -111,14 +94,12 @@ static const char * trace_event_get_name(TraceEvent *ev);
 
 /**
  * trace_event_get_state:
- * @id: Event identifier.
+ * @id: Event identifier name.
  *
  * Get the tracing state of an event (both static and dynamic).
  *
  * If the event has the disabled property, the check will have no performance
  * impact.
- *
- * As a down side, you must always use an immediate #TraceEventID value.
  */
 #define trace_event_get_state(id)                       \
     ((id ##_ENABLED) && trace_event_get_state_dynamic_by_id(id))
@@ -126,19 +107,18 @@ static const char * trace_event_get_name(TraceEvent *ev);
 /**
  * trace_event_get_vcpu_state:
  * @vcpu: Target vCPU.
- * @id: Event identifier (TraceEventID).
- * @vcpu_id: Per-vCPU event identifier (TraceEventVCPUID).
+ * @id: Event identifier name.
  *
  * Get the tracing state of an event (both static and dynamic) for the given
  * vCPU.
  *
  * If the event has the disabled property, the check will have no performance
  * impact.
- *
- * As a down side, you must always use an immediate #TraceEventID value.
  */
-#define trace_event_get_vcpu_state(vcpu, id, vcpu_id)                   \
-    ((id ##_ENABLED) && trace_event_get_vcpu_state_dynamic_by_vcpu_id(vcpu, vcpu_id))
+#define trace_event_get_vcpu_state(vcpu, id)                            \
+    ((id ##_ENABLED) &&                                                 \
+     trace_event_get_vcpu_state_dynamic_by_vcpu_id(                     \
+         vcpu, _ ## id ## _EVENT.vcpu_id))
 
 /**
  * trace_event_get_state_static:
@@ -167,31 +147,6 @@ static bool trace_event_get_state_dynamic(TraceEvent *ev);
  */
 static bool trace_event_get_vcpu_state_dynamic(CPUState *vcpu, TraceEvent *ev);
 
-/**
- * trace_event_set_state:
- *
- * Set the tracing state of an event (only if possible).
- */
-#define trace_event_set_state(id, state)                \
-    do {                                                \
-        if ((id ##_ENABLED)) {                          \
-            TraceEvent *_e = trace_event_id(id);        \
-            trace_event_set_state_dynamic(_e, state);   \
-        }                                               \
-    } while (0)
-
-/**
- * trace_event_set_vcpu_state:
- *
- * Set the tracing state of an event for the given vCPU (only if not disabled).
- */
-#define trace_event_set_vcpu_state(vcpu, id, state)                     \
-    do {                                                                \
-        if ((id ##_ENABLED)) {                                          \
-            TraceEvent *_e = trace_event_id(id);                        \
-            trace_event_set_vcpu_state_dynamic(vcpu, _e, state);        \
-        }                                                               \
-    } while (0)
 
 /**
  * trace_event_set_state_dynamic:
@@ -238,6 +193,14 @@ bool trace_init_backends(void);
  */
 void trace_init_file(const char *file);
 
+/**
+ * trace_init_vcpu:
+ * @vcpu: Added vCPU.
+ *
+ * Set initial dynamic event state for a hot-plugged vCPU.
+ */
+void trace_init_vcpu(CPUState *vcpu);
+
 /**
  * trace_list_events:
  *
@@ -270,12 +233,11 @@ extern QemuOptsList qemu_trace_opts;
 char *trace_opt_parse(const char *optarg);
 
 /**
- * trace_init_vcpu_events:
+ * trace_get_vcpu_event_count:
  *
- * Re-synchronize initial event state with vCPUs (which can be created after
- * trace_init_events()).
+ * Return the number of known vcpu-specific events
  */
-void trace_init_vcpu_events(void);
+uint32_t trace_get_vcpu_event_count(void);
 
 
 #include "trace/control-internal.h"
diff --git a/trace/event-internal.h b/trace/event-internal.h
index 5d8fa97ca5..f63500b37e 100644
--- a/trace/event-internal.h
+++ b/trace/event-internal.h
@@ -10,8 +10,11 @@
 #ifndef TRACE__EVENT_INTERNAL_H
 #define TRACE__EVENT_INTERNAL_H
 
-#include "trace/generated-events.h"
-
+/*
+ * Special value for TraceEvent.vcpu_id field to indicate
+ * that the event is not VCPU specific
+ */
+#define TRACE_VCPU_EVENT_NONE ((uint32_t)-1)
 
 /**
  * TraceEvent:
@@ -19,15 +22,23 @@
  * @vcpu_id: Unique per-vCPU event identifier.
  * @name: Event name.
  * @sstate: Static tracing state.
+ * @dstate: Dynamic tracing state
+ *
+ * Interpretation of @dstate depends on whether the event has the 'vcpu'
+ *  property:
+ * - false: Boolean value indicating whether the event is active.
+ * - true : Integral counting the number of vCPUs that have this event enabled.
  *
  * Opaque generic description of a tracing event.
  */
 typedef struct TraceEvent {
-    TraceEventID id;
-    TraceEventVCPUID vcpu_id;
+    uint32_t id;
+    uint32_t vcpu_id;
     const char * name;
     const bool sstate;
+    uint16_t *dstate;
 } TraceEvent;
 
+void trace_event_set_state_dynamic_init(TraceEvent *ev, bool state);
 
 #endif /* TRACE__EVENT_INTERNAL_H */
diff --git a/trace/ftrace.c b/trace/ftrace.c
index e953922f5b..3588bb0eb4 100644
--- a/trace/ftrace.c
+++ b/trace/ftrace.c
@@ -51,6 +51,12 @@ bool ftrace_init(void)
         snprintf(path, PATH_MAX, "%s/tracing/tracing_on", debugfs);
         trace_fd = open(path, O_WRONLY);
         if (trace_fd < 0) {
+            if (errno == EACCES) {
+                trace_marker_fd = open("/dev/null", O_WRONLY);
+                if (trace_marker_fd != -1) {
+                    return true;
+                }
+            }
             perror("Could not open ftrace 'tracing_on' file");
             return false;
         } else {
diff --git a/trace/qmp.c b/trace/qmp.c
index 11d2564e7b..ac777d154f 100644
--- a/trace/qmp.c
+++ b/trace/qmp.c
@@ -52,8 +52,10 @@ static bool check_events(bool has_vcpu, bool ignore_unavailable, bool is_pattern
         return true;
     } else {
         /* error for unavailable events */
-        TraceEvent *ev = NULL;
-        while ((ev = trace_event_pattern(name, ev)) != NULL) {
+        TraceEventIter iter;
+        TraceEvent *ev;
+        trace_event_iter_init(&iter, name);
+        while ((ev = trace_event_iter_next(&iter)) != NULL) {
             if (!ignore_unavailable && !trace_event_get_state_static(ev)) {
                 error_setg(errp, "event \"%s\" is disabled", trace_event_get_name(ev));
                 return false;
@@ -69,6 +71,7 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name,
 {
     Error *err = NULL;
     TraceEventInfoList *events = NULL;
+    TraceEventIter iter;
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(name);
     CPUState *cpu;
@@ -86,8 +89,8 @@ TraceEventInfoList *qmp_trace_event_get_state(const char *name,
     }
 
     /* Get states (all errors checked above) */
-    ev = NULL;
-    while ((ev = trace_event_pattern(name, ev)) != NULL) {
+    trace_event_iter_init(&iter, name);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
         TraceEventInfoList *elem;
         bool is_vcpu = trace_event_is_vcpu(ev);
         if (has_vcpu && !is_vcpu) {
@@ -132,6 +135,7 @@ void qmp_trace_event_set_state(const char *name, bool enable,
                                Error **errp)
 {
     Error *err = NULL;
+    TraceEventIter iter;
     TraceEvent *ev;
     bool is_pattern = trace_event_is_pattern(name);
     CPUState *cpu;
@@ -150,8 +154,8 @@ void qmp_trace_event_set_state(const char *name, bool enable,
     }
 
     /* Apply changes (all errors checked above) */
-    ev = NULL;
-    while ((ev = trace_event_pattern(name, ev)) != NULL) {
+    trace_event_iter_init(&iter, name);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
         if (!trace_event_get_state_static(ev) ||
             (has_vcpu && !trace_event_is_vcpu(ev))) {
             continue;
diff --git a/trace/simple.c b/trace/simple.c
index 2f09dafcbc..b263622fa9 100644
--- a/trace/simple.c
+++ b/trace/simple.c
@@ -17,14 +17,14 @@
 #include "trace/control.h"
 #include "trace/simple.h"
 
-/** Trace file header event ID */
-#define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */
+/** Trace file header event ID, picked to avoid conflict with real event IDs */
+#define HEADER_EVENT_ID (~(uint64_t)0)
 
 /** Trace file magic number */
 #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL
 
 /** Trace file version number, bump if format changes */
-#define HEADER_VERSION 3
+#define HEADER_VERSION 4
 
 /** Records were dropped event ID */
 #define DROPPED_EVENT_ID (~(uint64_t)0 - 1)
@@ -56,9 +56,12 @@ static uint32_t trace_pid;
 static FILE *trace_fp;
 static char *trace_file_name;
 
+#define TRACE_RECORD_TYPE_MAPPING 0
+#define TRACE_RECORD_TYPE_EVENT   1
+
 /* * Trace buffer entry */
 typedef struct {
-    uint64_t event; /*   TraceEventID */
+    uint64_t event; /* event ID value */
     uint64_t timestamp_ns;
     uint32_t length;   /*    in bytes */
     uint32_t pid;
@@ -160,6 +163,7 @@ static gpointer writeout_thread(gpointer opaque)
     unsigned int idx = 0;
     int dropped_count;
     size_t unused __attribute__ ((unused));
+    uint64_t type = TRACE_RECORD_TYPE_EVENT;
 
     for (;;) {
         wait_for_trace_records_available();
@@ -174,10 +178,12 @@ static gpointer writeout_thread(gpointer opaque)
             } while (!g_atomic_int_compare_and_exchange(&dropped_events,
                                                         dropped_count, 0));
             dropped.rec.arguments[0] = dropped_count;
+            unused = fwrite(&type, sizeof(type), 1, trace_fp);
             unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp);
         }
 
         while (get_trace_record(idx, &recordptr)) {
+            unused = fwrite(&type, sizeof(type), 1, trace_fp);
             unused = fwrite(recordptr, recordptr->length, 1, trace_fp);
             writeout_idx += recordptr->length;
             free(recordptr); /* don't use g_free, can deadlock when traced */
@@ -202,7 +208,7 @@ void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen
     rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen);
 }
 
-int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize)
+int trace_record_start(TraceBufferRecord *rec, uint32_t event, size_t datasize)
 {
     unsigned int idx, rec_off, old_idx, new_idx;
     uint32_t rec_len = sizeof(TraceRecord) + datasize;
@@ -273,6 +279,28 @@ void trace_record_finish(TraceBufferRecord *rec)
     }
 }
 
+static int st_write_event_mapping(void)
+{
+    uint64_t type = TRACE_RECORD_TYPE_MAPPING;
+    TraceEventIter iter;
+    TraceEvent *ev;
+
+    trace_event_iter_init(&iter, NULL);
+    while ((ev = trace_event_iter_next(&iter)) != NULL) {
+        uint64_t id = trace_event_get_id(ev);
+        const char *name = trace_event_get_name(ev);
+        uint32_t len = strlen(name);
+        if (fwrite(&type, sizeof(type), 1, trace_fp) != 1 ||
+            fwrite(&id, sizeof(id), 1, trace_fp) != 1 ||
+            fwrite(&len, sizeof(len), 1, trace_fp) != 1 ||
+            fwrite(name, len, 1, trace_fp) != 1) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 void st_set_trace_file_enabled(bool enable)
 {
     if (enable == !!trace_fp) {
@@ -297,7 +325,8 @@ void st_set_trace_file_enabled(bool enable)
             return;
         }
 
-        if (fwrite(&header, sizeof header, 1, trace_fp) != 1) {
+        if (fwrite(&header, sizeof header, 1, trace_fp) != 1 ||
+            st_write_event_mapping() < 0) {
             fclose(trace_fp);
             trace_fp = NULL;
             return;
diff --git a/trace/simple.h b/trace/simple.h
index 1e7de45575..9931808c05 100644
--- a/trace/simple.h
+++ b/trace/simple.h
@@ -11,10 +11,6 @@
 #ifndef TRACE_SIMPLE_H
 #define TRACE_SIMPLE_H
 
-
-#include "trace/generated-events.h"
-
-
 void st_print_trace_file_status(FILE *stream, fprintf_function stream_printf);
 void st_set_trace_file_enabled(bool enable);
 void st_set_trace_file(const char *file);
@@ -33,7 +29,7 @@ typedef struct {
  *
  * @arglen  number of bytes required for arguments
  */
-int trace_record_start(TraceBufferRecord *rec, TraceEventID id, size_t arglen);
+int trace_record_start(TraceBufferRecord *rec, uint32_t id, size_t arglen);
 
 /**
  * Append a 64-bit argument to a trace record
diff --git a/translate-all.c b/translate-all.c
index 0dd6466e07..3dd9214904 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -31,6 +31,7 @@
 #include "tcg.h"
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
+#include "exec/exec-all.h"
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 #include <sys/param.h>
 #if __FreeBSD_version >= 700104
@@ -56,16 +57,39 @@
 #include "qemu/timer.h"
 #include "exec/log.h"
 
-//#define DEBUG_TB_INVALIDATE
-//#define DEBUG_FLUSH
+/* #define DEBUG_TB_INVALIDATE */
+/* #define DEBUG_TB_FLUSH */
+/* #define DEBUG_LOCKING */
 /* make various TB consistency checks */
-//#define DEBUG_TB_CHECK
+/* #define DEBUG_TB_CHECK */
 
 #if !defined(CONFIG_USER_ONLY)
 /* TB consistency checks only implemented for usermode emulation.  */
 #undef DEBUG_TB_CHECK
 #endif
 
+/* Access to the various translations structures need to be serialised via locks
+ * for consistency. This is automatic for SoftMMU based system
+ * emulation due to its single threaded nature. In user-mode emulation
+ * access to the memory related structures are protected with the
+ * mmap_lock.
+ */
+#ifdef DEBUG_LOCKING
+#define DEBUG_MEM_LOCKS 1
+#else
+#define DEBUG_MEM_LOCKS 0
+#endif
+
+#ifdef CONFIG_SOFTMMU
+#define assert_memory_lock() do { /* nothing */ } while (0)
+#else
+#define assert_memory_lock() do {               \
+        if (DEBUG_MEM_LOCKS) {                  \
+            g_assert(have_mmap_lock());         \
+        }                                       \
+    } while (0)
+#endif
+
 #define SMC_BITMAP_USE_THRESHOLD 10
 
 typedef struct PageDesc {
@@ -97,34 +121,54 @@ typedef struct PageDesc {
 #define V_L2_BITS 10
 #define V_L2_SIZE (1 << V_L2_BITS)
 
-/* The bits remaining after N lower levels of page tables.  */
-#define V_L1_BITS_REM \
-    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS)
-
-#if V_L1_BITS_REM < 4
-#define V_L1_BITS  (V_L1_BITS_REM + V_L2_BITS)
-#else
-#define V_L1_BITS  V_L1_BITS_REM
-#endif
-
-#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
-
-#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
-
 uintptr_t qemu_host_page_size;
 intptr_t qemu_host_page_mask;
 
-/* The bottom level has pointers to PageDesc */
-static void *l1_map[V_L1_SIZE];
+/*
+ * L1 Mapping properties
+ */
+static int v_l1_size;
+static int v_l1_shift;
+static int v_l2_levels;
+
+/* The bottom level has pointers to PageDesc, and is indexed by
+ * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
+ */
+#define V_L1_MIN_BITS 4
+#define V_L1_MAX_BITS (V_L2_BITS + 3)
+#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
+
+static void *l1_map[V_L1_MAX_SIZE];
 
 /* code generation context */
 TCGContext tcg_ctx;
+bool parallel_cpus;
 
 /* translation block context */
 #ifdef CONFIG_USER_ONLY
 __thread int have_tb_lock;
 #endif
 
+static void page_table_config_init(void)
+{
+    uint32_t v_l1_bits;
+
+    assert(TARGET_PAGE_BITS);
+    /* The bits remaining after N lower levels of page tables.  */
+    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
+    if (v_l1_bits < V_L1_MIN_BITS) {
+        v_l1_bits += V_L2_BITS;
+    }
+
+    v_l1_size = 1 << v_l1_bits;
+    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
+    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
+
+    assert(v_l1_bits <= V_L1_MAX_BITS);
+    assert(v_l1_shift % V_L2_BITS == 0);
+    assert(v_l2_levels >= 0);
+}
+
 void tb_lock(void)
 {
 #ifdef CONFIG_USER_ONLY
@@ -153,6 +197,23 @@ void tb_lock_reset(void)
 #endif
 }
 
+#ifdef DEBUG_LOCKING
+#define DEBUG_TB_LOCKS 1
+#else
+#define DEBUG_TB_LOCKS 0
+#endif
+
+#ifdef CONFIG_SOFTMMU
+#define assert_tb_lock() do { /* nothing */ } while (0)
+#else
+#define assert_tb_lock() do {               \
+        if (DEBUG_TB_LOCKS) {               \
+            g_assert(have_tb_lock);         \
+        }                                   \
+    } while (0)
+#endif
+
+
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
 
 void cpu_gen_init(void)
@@ -247,7 +308,9 @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
     return p - block;
 }
 
-/* The cpu state corresponding to 'searched_pc' is restored.  */
+/* The cpu state corresponding to 'searched_pc' is restored.
+ * Called with tb_lock held.
+ */
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc)
 {
@@ -260,6 +323,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     int64_t ti = profile_getclock();
 #endif
 
+    searched_pc -= GETPC_ADJ;
+
     if (searched_pc < host_pc) {
         return -1;
     }
@@ -298,7 +363,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
 {
     TranslationBlock *tb;
+    bool r = false;
 
+    tb_lock();
     tb = tb_find_pc(retaddr);
     if (tb) {
         cpu_restore_state_from_tb(cpu, tb, retaddr);
@@ -307,9 +374,11 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
             tb_phys_invalidate(tb, -1);
             tb_free(tb);
         }
-        return true;
+        r = true;
     }
-    return false;
+    tb_unlock();
+
+    return r;
 }
 
 void page_size_init(void)
@@ -330,6 +399,8 @@ void page_size_init(void)
 static void page_init(void)
 {
     page_size_init();
+    page_table_config_init();
+
 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
     {
 #ifdef HAVE_KINFO_GETVMMAP
@@ -397,6 +468,7 @@ static void page_init(void)
 }
 
 /* If alloc=1:
+ * Called with tb_lock held for system emulation.
  * Called with mmap_lock held for user-mode emulation.
  */
 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
@@ -405,11 +477,15 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
     void **lp;
     int i;
 
+    if (alloc) {
+        assert_memory_lock();
+    }
+
     /* Level 1.  Always allocated.  */
-    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 
     /* Level 2..N-1.  */
-    for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) {
+    for (i = v_l2_levels; i > 0; i--) {
         void **p = atomic_rcu_read(lp);
 
         if (p == NULL) {
@@ -761,23 +837,33 @@ bool tcg_enabled(void)
     return tcg_ctx.code_gen_buffer != NULL;
 }
 
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
+/*
+ * Allocate a new translation block. Flush the translation buffer if
+ * too many translation blocks or too much generated code.
+ *
+ * Called with tb_lock held.
+ */
 static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
+    assert_tb_lock();
+
     if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
         return NULL;
     }
     tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
     tb->pc = pc;
     tb->cflags = 0;
+    tb->invalid = false;
     return tb;
 }
 
+/* Called with tb_lock held.  */
 void tb_free(TranslationBlock *tb)
 {
+    assert_tb_lock();
+
     /* In practice this is mostly used for single use temporary TB
        Ignore the hard cases and just back up if this TB happens to
        be the last one generated.  */
@@ -823,21 +909,26 @@ static void page_flush_tb_1(int level, void **lp)
 
 static void page_flush_tb(void)
 {
-    int i;
+    int i, l1_sz = v_l1_size;
 
-    for (i = 0; i < V_L1_SIZE; i++) {
-        page_flush_tb_1(V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
+    for (i = 0; i < l1_sz; i++) {
+        page_flush_tb_1(v_l2_levels, l1_map + i);
     }
 }
 
 /* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUState *cpu)
+static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 {
-    if (!tcg_enabled()) {
-        return;
+    tb_lock();
+
+    /* If it is already been done on request of another CPU,
+     * just retry.
+     */
+    if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_count.host_int) {
+        goto done;
     }
-#if defined(DEBUG_FLUSH)
+
+#if defined(DEBUG_TB_FLUSH)
     printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
            (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
@@ -848,20 +939,36 @@ void tb_flush(CPUState *cpu)
         > tcg_ctx.code_gen_buffer_size) {
         cpu_abort(cpu, "Internal error: code buffer overflow\n");
     }
-    tcg_ctx.tb_ctx.nb_tbs = 0;
 
     CPU_FOREACH(cpu) {
-        memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
-        cpu->tb_flushed = true;
+        int i;
+
+        for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
+            atomic_set(&cpu->tb_jmp_cache[i], NULL);
+        }
     }
 
+    tcg_ctx.tb_ctx.nb_tbs = 0;
     qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
     /* XXX: flush processor icache at this point if cache flush is
        expensive */
-    tcg_ctx.tb_ctx.tb_flush_count++;
+    atomic_mb_set(&tcg_ctx.tb_ctx.tb_flush_count,
+                  tcg_ctx.tb_ctx.tb_flush_count + 1);
+
+done:
+    tb_unlock();
+}
+
+void tb_flush(CPUState *cpu)
+{
+    if (tcg_enabled()) {
+        unsigned tb_flush_count = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count);
+        async_safe_run_on_cpu(cpu, do_tb_flush,
+                              RUN_ON_CPU_HOST_INT(tb_flush_count));
+    }
 }
 
 #ifdef DEBUG_TB_CHECK
@@ -878,6 +985,10 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp)
     }
 }
 
+/* verify that all the pages have correct rights for code
+ *
+ * Called with tb_lock held.
+ */
 static void tb_invalidate_check(target_ulong address)
 {
     address &= TARGET_PAGE_MASK;
@@ -982,7 +1093,10 @@ static inline void tb_jmp_unlink(TranslationBlock *tb)
     }
 }
 
-/* invalidate one TB */
+/* invalidate one TB
+ *
+ * Called with tb_lock held.
+ */
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
     CPUState *cpu;
@@ -990,6 +1104,10 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     uint32_t h;
     tb_page_addr_t phys_pc;
 
+    assert_tb_lock();
+
+    atomic_set(&tb->invalid, true);
+
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_hash_func(phys_pc, tb->pc, tb->flags);
@@ -1010,8 +1128,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
     CPU_FOREACH(cpu) {
-        if (cpu->tb_jmp_cache[h] == tb) {
-            cpu->tb_jmp_cache[h] = NULL;
+        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
+            atomic_set(&cpu->tb_jmp_cache[h], NULL);
         }
     }
 
@@ -1045,7 +1163,7 @@ static void build_page_bitmap(PageDesc *p)
             tb_end = tb_start + tb->size;
             if (tb_end > TARGET_PAGE_SIZE) {
                 tb_end = TARGET_PAGE_SIZE;
-            }
+             }
         } else {
             tb_start = 0;
             tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
@@ -1068,6 +1186,8 @@ static inline void tb_alloc_page(TranslationBlock *tb,
     bool page_already_protected;
 #endif
 
+    assert_memory_lock();
+
     tb->page_addr[n] = page_addr;
     p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
     tb->page_next[n] = p->first_tb;
@@ -1124,9 +1244,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 {
     uint32_t h;
 
-    /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
-    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
+    assert_memory_lock();
 
     /* add in the page list */
     tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
@@ -1136,6 +1254,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
         tb->page_addr[1] = -1;
     }
 
+    /* add in the hash table */
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
+
 #ifdef DEBUG_TB_CHECK
     tb_page_check();
 #endif
@@ -1155,6 +1277,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 #ifdef CONFIG_PROFILER
     int64_t ti;
 #endif
+    assert_memory_lock();
 
     phys_pc = get_page_addr_code(env, pc);
     if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) {
@@ -1166,9 +1289,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  buffer_overflow:
         /* flush must be done */
         tb_flush(cpu);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        assert(tb != NULL);
+        mmap_unlock();
+        cpu_loop_exit(cpu);
     }
 
     gen_code_buf = tcg_ctx.code_gen_ptr;
@@ -1233,10 +1355,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(tb->pc)) {
+        qemu_log_lock();
         qemu_log("OUT: [size=%d]\n", gen_code_size);
         log_disas(tb->tc_ptr, gen_code_size);
         qemu_log("\n");
         qemu_log_flush();
+        qemu_log_unlock();
     }
 #endif
 
@@ -1280,9 +1404,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  * access: the virtual CPU will exit the current TB if code is modified inside
  * this TB.
  *
- * Called with mmap_lock held for user-mode emulation
+ * Called with mmap_lock held for user-mode emulation, grabs tb_lock
+ * Called with tb_lock held for system-mode emulation
  */
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end)
 {
     while (start < end) {
         tb_invalidate_phys_page_range(start, end, 0);
@@ -1291,6 +1416,21 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
     }
 }
 
+#ifdef CONFIG_SOFTMMU
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+{
+    assert_tb_lock();
+    tb_invalidate_phys_range_1(start, end);
+}
+#else
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
+{
+    assert_memory_lock();
+    tb_lock();
+    tb_invalidate_phys_range_1(start, end);
+    tb_unlock();
+}
+#endif
 /*
  * Invalidate all TBs which intersect with the target physical address range
  * [start;end[. NOTE: start and end must refer to the *same* physical page.
@@ -1298,7 +1438,8 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
  * access: the virtual CPU will exit the current TB if code is modified inside
  * this TB.
  *
- * Called with mmap_lock held for user-mode emulation
+ * Called with tb_lock/mmap_lock held for user-mode emulation
+ * Called with tb_lock held for system-mode emulation
  */
 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                                    int is_cpu_write_access)
@@ -1320,6 +1461,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
     uint32_t current_flags = 0;
 #endif /* TARGET_HAS_PRECISE_SMC */
 
+    assert_memory_lock();
+    assert_tb_lock();
+
     p = page_find(start >> TARGET_PAGE_BITS);
     if (!p) {
         return;
@@ -1395,7 +1539,10 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 }
 
 #ifdef CONFIG_SOFTMMU
-/* len must be <= 8 and start must be a multiple of len */
+/* len must be <= 8 and start must be a multiple of len.
+ * Called via softmmu_template.h when code areas are written to with
+ * tb_lock held.
+ */
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
 {
     PageDesc *p;
@@ -1409,13 +1556,17 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
                   (intptr_t)cpu_single_env->segs[R_CS].base);
     }
 #endif
+    assert_memory_lock();
+
     p = page_find(start >> TARGET_PAGE_BITS);
     if (!p) {
         return;
     }
     if (!p->code_bitmap &&
         ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
-        /* build code bitmap */
+        /* build code bitmap.  FIXME: writes should be protected by
+         * tb_lock, reads by tb_lock or RCU.
+         */
         build_page_bitmap(p);
     }
     if (p->code_bitmap) {
@@ -1454,11 +1605,15 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
     uint32_t current_flags = 0;
 #endif
 
+    assert_memory_lock();
+
     addr &= TARGET_PAGE_MASK;
     p = page_find(addr >> TARGET_PAGE_BITS);
     if (!p) {
         return false;
     }
+
+    tb_lock();
     tb = p->first_tb;
 #ifdef TARGET_HAS_PRECISE_SMC
     if (tb && pc != 0) {
@@ -1496,9 +1651,13 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc)
            modifying the memory. It will ensure that it cannot modify
            itself */
         tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+        /* tb_lock will be reset after cpu_loop_exit_noexc longjmps
+         * back into the cpu_exec loop. */
         return true;
     }
 #endif
+    tb_unlock();
+
     return false;
 }
 #endif
@@ -1551,11 +1710,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
         return;
     }
     ram_addr = memory_region_get_ram_addr(mr) + addr;
+    tb_lock();
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+    tb_unlock();
     rcu_read_unlock();
 }
 #endif /* !defined(CONFIG_USER_ONLY) */
 
+/* Called with tb_lock held.  */
 void tb_check_watchpoint(CPUState *cpu)
 {
     TranslationBlock *tb;
@@ -1592,6 +1754,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     target_ulong pc, cs_base;
     uint32_t flags;
 
+    tb_lock();
     tb = tb_find_pc(retaddr);
     if (!tb) {
         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
@@ -1643,11 +1806,16 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     /* FIXME: In theory this could raise an exception.  In practice
        we have already translated the block once so it's probably ok.  */
     tb_gen_code(cpu, pc, cs_base, flags, cflags);
+
     /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
-       the first in the TB) then we end up generating a whole new TB and
-       repeating the fault, which is horribly inefficient.
-       Better would be to execute just this insn uncached, or generate a
-       second new TB.  */
+     * the first in the TB) then we end up generating a whole new TB and
+     *  repeating the fault, which is horribly inefficient.
+     *  Better would be to execute just this insn uncached, or generate a
+     *  second new TB.
+     *
+     * cpu_loop_exit_noexc will longjmp back to cpu_exec where the
+     * tb_lock gets reset.
+     */
     cpu_loop_exit_noexc(cpu);
 }
 
@@ -1711,6 +1879,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     TranslationBlock *tb;
     struct qht_stats hst;
 
+    tb_lock();
+
     target_code_size = 0;
     max_target_code_size = 0;
     cross_page = 0;
@@ -1766,11 +1936,14 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     qht_statistics_destroy(&hst);
 
     cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB flush count      %u\n",
+            atomic_read(&tcg_ctx.tb_ctx.tb_flush_count));
     cpu_fprintf(f, "TB invalidate count %d\n",
             tcg_ctx.tb_ctx.tb_phys_invalidate_count);
     cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
     tcg_dump_info(f, cpu_fprintf);
+
+    tb_unlock();
 }
 
 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
@@ -1856,16 +2029,16 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
 {
     struct walk_memory_regions_data data;
-    uintptr_t i;
+    uintptr_t i, l1_sz = v_l1_size;
 
     data.fn = fn;
     data.priv = priv;
     data.start = -1u;
     data.prot = 0;
 
-    for (i = 0; i < V_L1_SIZE; i++) {
-        int rc = walk_memory_regions_1(&data, (target_ulong)i << (V_L1_SHIFT + TARGET_PAGE_BITS),
-                                       V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
+    for (i = 0; i < l1_sz; i++) {
+        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
+        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
         if (rc != 0) {
             return rc;
         }
@@ -1923,6 +2096,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
     assert(end < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
 #endif
     assert(start < end);
+    assert_memory_lock();
 
     start = start & TARGET_PAGE_MASK;
     end = TARGET_PAGE_ALIGN(end);
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 36c6bf0cb0..26d4a1c07f 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -33,6 +33,7 @@
 #include "sysemu/sysemu.h"
 #include "qmp-commands.h"
 #include "sysemu/blockdev.h"
+#include "qemu-version.h"
 #include <Carbon/Carbon.h>
 
 #ifndef MAC_OS_X_VERSION_10_5
@@ -63,7 +64,7 @@
     int bitsPerPixel;
 } QEMUScreen;
 
-NSWindow *normalWindow;
+NSWindow *normalWindow, *about_window;
 static DisplayChangeListener *dcl;
 static int last_buttons;
 
@@ -670,7 +671,9 @@ - (void) handleEvent:(NSEvent *)event
         case NSLeftMouseUp:
             mouse_event = true;
             if (!isMouseGrabbed && [self screenContainsPoint:p]) {
-                [self grabMouse];
+                if([[self window] isKeyWindow]) {
+                    [self grabMouse];
+                }
             }
             break;
         case NSRightMouseUp:
@@ -811,7 +814,6 @@ - (void)startEmulationWithArgc:(int)argc argv:(char**)argv;
 - (void)doToggleFullScreen:(id)sender;
 - (void)toggleFullScreen:(id)sender;
 - (void)showQEMUDoc:(id)sender;
-- (void)showQEMUTec:(id)sender;
 - (void)zoomToFit:(id) sender;
 - (void)displayConsole:(id)sender;
 - (void)pauseQEMU:(id)sender;
@@ -824,6 +826,8 @@ - (void)ejectDeviceMedia:(id)sender;
 - (void)changeDeviceMedia:(id)sender;
 - (BOOL)verifyQuit;
 - (void)openDocumentation:(NSString *)filename;
+- (IBAction) do_about_menu_item: (id) sender;
+- (void)make_about_window;
 @end
 
 @implementation QemuCocoaAppController
@@ -876,6 +880,7 @@ - (id) init
         supportedImageFileTypes = [NSArray arrayWithObjects: @"img", @"iso", @"dmg",
                                  @"qcow", @"qcow2", @"cloop", @"vmdk", @"cdr",
                                   nil];
+        [self make_about_window];
     }
     return self;
 }
@@ -992,13 +997,6 @@ - (void)showQEMUDoc:(id)sender
     [self openDocumentation: @"qemu-doc.html"];
 }
 
-- (void)showQEMUTec:(id)sender
-{
-    COCOA_DEBUG("QemuCocoaAppController: showQEMUTec\n");
-
-    [self openDocumentation: @"qemu-tech.html"];
-}
-
 /* Stretches video to fit host monitor size */
 - (void)zoomToFit:(id) sender
 {
@@ -1079,7 +1077,8 @@ - (void)ejectDeviceMedia:(id)sender
     }
 
     Error *err = NULL;
-    qmp_eject([drive cStringUsingEncoding: NSASCIIStringEncoding], false, false, &err);
+    qmp_eject(true, [drive cStringUsingEncoding: NSASCIIStringEncoding],
+              false, NULL, false, false, &err);
     handleAnyDeviceErrors(err);
 }
 
@@ -1112,8 +1111,10 @@ - (void)changeDeviceMedia:(id)sender
         }
 
         Error *err = NULL;
-        qmp_blockdev_change_medium([drive cStringUsingEncoding:
+        qmp_blockdev_change_medium(true,
+                                   [drive cStringUsingEncoding:
                                           NSASCIIStringEncoding],
+                                   false, NULL,
                                    [file cStringUsingEncoding:
                                          NSASCIIStringEncoding],
                                    true, "raw",
@@ -1138,6 +1139,101 @@ - (BOOL)verifyQuit
     }
 }
 
+/* The action method for the About menu item */
+- (IBAction) do_about_menu_item: (id) sender
+{
+    [about_window makeKeyAndOrderFront: nil];
+}
+
+/* Create and display the about dialog */
+- (void)make_about_window
+{
+    /* Make the window */
+    int x = 0, y = 0, about_width = 400, about_height = 200;
+    NSRect window_rect = NSMakeRect(x, y, about_width, about_height);
+    about_window = [[NSWindow alloc] initWithContentRect:window_rect
+                    styleMask:NSTitledWindowMask | NSClosableWindowMask |
+                    NSMiniaturizableWindowMask
+                    backing:NSBackingStoreBuffered
+                    defer:NO];
+    [about_window setTitle: @"About"];
+    [about_window setReleasedWhenClosed: NO];
+    [about_window center];
+    NSView *superView = [about_window contentView];
+
+    /* Create the dimensions of the picture */
+    int picture_width = 80, picture_height = 80;
+    x = (about_width - picture_width)/2;
+    y = about_height - picture_height - 10;
+    NSRect picture_rect = NSMakeRect(x, y, picture_width, picture_height);
+
+    /* Get the path to the QEMU binary */
+    NSString *binary_name = [NSString stringWithCString: gArgv[0]
+                                      encoding: NSASCIIStringEncoding];
+    binary_name = [binary_name lastPathComponent];
+    NSString *program_path = [[NSString alloc] initWithFormat: @"%@/%@",
+    [[NSBundle mainBundle] bundlePath], binary_name];
+
+    /* Make the picture of QEMU */
+    NSImageView *picture_view = [[NSImageView alloc] initWithFrame:
+                                                     picture_rect];
+    NSImage *qemu_image = [[NSWorkspace sharedWorkspace] iconForFile:
+                                                         program_path];
+    [picture_view setImage: qemu_image];
+    [picture_view setImageScaling: NSImageScaleProportionallyUpOrDown];
+    [superView addSubview: picture_view];
+
+    /* Make the name label */
+    x = 0;
+    y = y - 25;
+    int name_width = about_width, name_height = 20;
+    NSRect name_rect = NSMakeRect(x, y, name_width, name_height);
+    NSTextField *name_label = [[NSTextField alloc] initWithFrame: name_rect];
+    [name_label setEditable: NO];
+    [name_label setBezeled: NO];
+    [name_label setDrawsBackground: NO];
+    [name_label setAlignment: NSCenterTextAlignment];
+    NSString *qemu_name = [[NSString alloc] initWithCString: gArgv[0]
+                                            encoding: NSASCIIStringEncoding];
+    qemu_name = [qemu_name lastPathComponent];
+    [name_label setStringValue: qemu_name];
+    [superView addSubview: name_label];
+
+    /* Set the version label's attributes */
+    x = 0;
+    y = 50;
+    int version_width = about_width, version_height = 20;
+    NSRect version_rect = NSMakeRect(x, y, version_width, version_height);
+    NSTextField *version_label = [[NSTextField alloc] initWithFrame:
+                                                      version_rect];
+    [version_label setEditable: NO];
+    [version_label setBezeled: NO];
+    [version_label setAlignment: NSCenterTextAlignment];
+    [version_label setDrawsBackground: NO];
+
+    /* Create the version string*/
+    NSString *version_string;
+    version_string = [[NSString alloc] initWithFormat:
+    @"QEMU emulator version %s%s", QEMU_VERSION, QEMU_PKGVERSION];
+    [version_label setStringValue: version_string];
+    [superView addSubview: version_label];
+
+    /* Make copyright label */
+    x = 0;
+    y = 35;
+    int copyright_width = about_width, copyright_height = 20;
+    NSRect copyright_rect = NSMakeRect(x, y, copyright_width, copyright_height);
+    NSTextField *copyright_label = [[NSTextField alloc] initWithFrame:
+                                                        copyright_rect];
+    [copyright_label setEditable: NO];
+    [copyright_label setBezeled: NO];
+    [copyright_label setDrawsBackground: NO];
+    [copyright_label setAlignment: NSCenterTextAlignment];
+    [copyright_label setStringValue: [NSString stringWithFormat: @"%s",
+                                     QEMU_COPYRIGHT]];
+    [superView addSubview: copyright_label];
+}
+
 @end
 
 
@@ -1185,7 +1281,7 @@ int main (int argc, const char * argv[]) {
 
     // Application menu
     menu = [[NSMenu alloc] initWithTitle:@""];
-    [menu addItemWithTitle:@"About QEMU" action:@selector(orderFrontStandardAboutPanel:) keyEquivalent:@""]; // About QEMU
+    [menu addItemWithTitle:@"About QEMU" action:@selector(do_about_menu_item:) keyEquivalent:@""]; // About QEMU
     [menu addItem:[NSMenuItem separatorItem]]; //Separator
     [menu addItemWithTitle:@"Hide QEMU" action:@selector(hide:) keyEquivalent:@"h"]; //Hide QEMU
     menuItem = (NSMenuItem *)[menu addItemWithTitle:@"Hide Others" action:@selector(hideOtherApplications:) keyEquivalent:@"h"]; // Hide Others
@@ -1231,7 +1327,6 @@ int main (int argc, const char * argv[]) {
     // Help menu
     menu = [[NSMenu alloc] initWithTitle:@"Help"];
     [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"QEMU Documentation" action:@selector(showQEMUDoc:) keyEquivalent:@"?"] autorelease]]; // QEMU Help
-    [menu addItem: [[[NSMenuItem alloc] initWithTitle:@"QEMU Technology" action:@selector(showQEMUTec:) keyEquivalent:@""] autorelease]]; // QEMU Help
     menuItem = [[[NSMenuItem alloc] initWithTitle:@"Window" action:nil keyEquivalent:@""] autorelease];
     [menuItem setSubmenu:menu];
     [[NSApp mainMenu] addItem:menuItem];
diff --git a/ui/console.c b/ui/console.c
index c24bfe422d..ed888e55ea 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -123,6 +123,7 @@ struct QemuConsole {
     DisplaySurface *surface;
     int dcls;
     DisplayChangeListener *gl;
+    bool gl_block;
 
     /* Graphic console state.  */
     Object *device;
@@ -264,10 +265,10 @@ void graphic_hw_update(QemuConsole *con)
 
 void graphic_hw_gl_block(QemuConsole *con, bool block)
 {
-    if (!con) {
-        con = active_console;
-    }
-    if (con && con->hw_ops->gl_block) {
+    assert(con != NULL);
+
+    con->gl_block = block;
+    if (con->hw_ops->gl_block) {
         con->hw_ops->gl_block(con->hw, block);
     }
 }
@@ -1082,6 +1083,7 @@ static void kbd_send_chars(void *opaque)
 void kbd_put_keysym_console(QemuConsole *s, int keysym)
 {
     uint8_t buf[16], *q;
+    CharBackend *be;
     int c;
 
     if (!s || (s->console_type == GRAPHIC_CONSOLE))
@@ -1124,7 +1126,8 @@ void kbd_put_keysym_console(QemuConsole *s, int keysym)
         if (s->echo) {
             console_puts(s->chr, buf, q - buf);
         }
-        if (s->chr->chr_read) {
+        be = s->chr->be;
+        if (be && be->chr_read) {
             qemu_fifo_write(&s->out_fifo, buf, q - buf);
             kbd_send_chars(s);
         }
@@ -1142,6 +1145,7 @@ static const int qcode_to_keysym[Q_KEY_CODE__MAX] = {
     [Q_KEY_CODE_PGUP]   = QEMU_KEY_PAGEUP,
     [Q_KEY_CODE_PGDN]   = QEMU_KEY_PAGEDOWN,
     [Q_KEY_CODE_DELETE] = QEMU_KEY_DELETE,
+    [Q_KEY_CODE_BACKSPACE] = QEMU_KEY_BACKSPACE,
 };
 
 bool kbd_put_qcode_console(QemuConsole *s, int qcode)
@@ -1878,6 +1882,12 @@ bool qemu_console_is_fixedsize(QemuConsole *con)
     return con && (con->console_type != TEXT_CONSOLE);
 }
 
+bool qemu_console_is_gl_blocked(QemuConsole *con)
+{
+    assert(con != NULL);
+    return con->gl_block;
+}
+
 char *qemu_console_get_label(QemuConsole *con)
 {
     if (con->console_type == GRAPHIC_CONSOLE) {
@@ -2025,8 +2035,6 @@ static void text_console_do_init(CharDriverState *chr, DisplayState *ds)
     }
 
     qemu_chr_be_generic_open(chr);
-    if (chr->init)
-        chr->init(chr);
 }
 
 static CharDriverState *text_console_init(ChardevVC *vc, Error **errp)
@@ -2071,10 +2079,6 @@ static CharDriverState *text_console_init(ChardevVC *vc, Error **errp)
     s->chr = chr;
     chr->opaque = s;
     chr->chr_set_echo = text_console_set_echo;
-    /* console/chardev init sometimes completes elsewhere in a 2nd
-     * stage, so defer OPENED events until they are fully initialized
-     */
-    chr->explicit_be_open = true;
 
     if (display_state) {
         text_console_do_init(chr, display_state);
@@ -2085,8 +2089,13 @@ static CharDriverState *text_console_init(ChardevVC *vc, Error **errp)
 static VcHandler *vc_handler = text_console_init;
 
 static CharDriverState *vc_init(const char *id, ChardevBackend *backend,
-                                ChardevReturn *ret, Error **errp)
+                                ChardevReturn *ret, bool *be_opened,
+                                Error **errp)
 {
+    /* console/chardev init sometimes completes elsewhere in a 2nd
+     * stage, so defer OPENED events until they are fully initialized
+     */
+    *be_opened = false;
     return vc_handler(backend->u.vc.data, errp);
 }
 
@@ -2100,6 +2109,13 @@ void qemu_console_resize(QemuConsole *s, int width, int height)
     DisplaySurface *surface;
 
     assert(s->console_type == GRAPHIC_CONSOLE);
+
+    if (s->surface &&
+        pixman_image_get_width(s->surface->image) == width &&
+        pixman_image_get_height(s->surface->image) == height) {
+        return;
+    }
+
     surface = qemu_create_displaysurface(width, height);
     dpy_gfx_replace_surface(s, surface);
 }
diff --git a/ui/curses.c b/ui/curses.c
index b47558956c..2e132a7bfa 100644
--- a/ui/curses.c
+++ b/ui/curses.c
@@ -181,7 +181,7 @@ static kbd_layout_t *kbd_layout = NULL;
 
 static void curses_refresh(DisplayChangeListener *dcl)
 {
-    int chr, nextchr, keysym, keycode, keycode_alt;
+    int chr, keysym, keycode, keycode_alt;
 
     curses_winch_check();
 
@@ -195,15 +195,9 @@ static void curses_refresh(DisplayChangeListener *dcl)
 
     graphic_hw_text_update(NULL, screen);
 
-    nextchr = ERR;
     while (1) {
         /* while there are any pending key strokes to process */
-        if (nextchr == ERR)
-            chr = getch();
-        else {
-            chr = nextchr;
-            nextchr = ERR;
-        }
+        chr = getch();
 
         if (chr == ERR)
             break;
@@ -224,13 +218,12 @@ static void curses_refresh(DisplayChangeListener *dcl)
 
         /* alt key */
         if (keycode == 1) {
-            nextchr = getch();
+            int nextchr = getch();
 
             if (nextchr != ERR) {
                 chr = nextchr;
                 keycode_alt = ALT;
-                keycode = curses2keycode[nextchr];
-                nextchr = ERR;
+                keycode = curses2keycode[chr];
 
                 if (keycode != -1) {
                     keycode |= ALT;
@@ -317,7 +310,10 @@ static void curses_refresh(DisplayChangeListener *dcl)
                 qemu_input_event_send_key_delay(0);
             }
         } else {
-            keysym = curses2qemu[chr];
+            keysym = -1;
+            if (chr < CURSES_KEYS) {
+                keysym = curses2qemu[chr];
+            }
             if (keysym == -1)
                 keysym = chr;
 
@@ -373,10 +369,10 @@ static void curses_setup(void)
     /* ACS_* is not constant. So, we can't initialize statically. */
     vga_to_curses['\0'] = ' ';
     vga_to_curses[0x04] = ACS_DIAMOND;
-    vga_to_curses[0x0a] = ACS_RARROW;
-    vga_to_curses[0x0b] = ACS_LARROW;
     vga_to_curses[0x18] = ACS_UARROW;
     vga_to_curses[0x19] = ACS_DARROW;
+    vga_to_curses[0x1a] = ACS_RARROW;
+    vga_to_curses[0x1b] = ACS_LARROW;
     vga_to_curses[0x9c] = ACS_STERLING;
     vga_to_curses[0xb0] = ACS_BOARD;
     vga_to_curses[0xb1] = ACS_CKBOARD;
diff --git a/ui/gtk.c b/ui/gtk.c
index 58d20eed62..a216216d8b 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -94,7 +94,7 @@
 #define GDK_IS_WIN32_DISPLAY(dpy) (dpy == dpy)
 #endif
 
-#ifndef GDK_KEY_0
+#if !GTK_CHECK_VERSION(2, 22, 0)
 #define GDK_KEY_0 GDK_0
 #define GDK_KEY_1 GDK_1
 #define GDK_KEY_2 GDK_2
@@ -104,6 +104,7 @@
 #define GDK_KEY_plus GDK_plus
 #define GDK_KEY_minus GDK_minus
 #define GDK_KEY_Pause GDK_Pause
+#define GDK_KEY_Delete GDK_Delete
 #endif
 
 /* Some older mingw versions lack this constant or have
@@ -912,9 +913,28 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
 
     if (!qemu_input_is_absolute() && s->ptr_owner == vc) {
         GdkScreen *screen = gtk_widget_get_screen(vc->gfx.drawing_area);
+        int screen_width, screen_height;
+
         int x = (int)motion->x_root;
         int y = (int)motion->y_root;
 
+#if GTK_CHECK_VERSION(3, 22, 0)
+        {
+            GdkDisplay *dpy = gtk_widget_get_display(widget);
+            GdkWindow *win = gtk_widget_get_window(widget);
+            GdkMonitor *monitor = gdk_display_get_monitor_at_window(dpy, win);
+            GdkRectangle geometry;
+            gdk_monitor_get_geometry(monitor, &geometry);
+            screen_width = geometry.width;
+            screen_height = geometry.height;
+        }
+#else
+        {
+            screen_width = gdk_screen_get_width(screen);
+            screen_height = gdk_screen_get_height(screen);
+        }
+#endif
+
         /* In relative mode check to see if client pointer hit
          * one of the screen edges, and if so move it back by
          * 200 pixels. This is important because the pointer
@@ -928,10 +948,10 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
         if (y == 0) {
             y += 200;
         }
-        if (x == (gdk_screen_get_width(screen) - 1)) {
+        if (x == (screen_width - 1)) {
             x -= 200;
         }
-        if (y == (gdk_screen_get_height(screen) - 1)) {
+        if (y == (screen_height - 1)) {
             y -= 200;
         }
 
@@ -1051,7 +1071,9 @@ static gboolean gd_text_key_down(GtkWidget *widget,
     VirtualConsole *vc = opaque;
     QemuConsole *con = vc->gfx.dcl.con;
 
-    if (key->length) {
+    if (key->keyval == GDK_KEY_Delete) {
+        kbd_put_qcode_console(con, Q_KEY_CODE_DELETE);
+    } else if (key->length) {
         kbd_put_string_console(con, key->string, key->length);
     } else {
         int num = gd_map_keycode(vc->s, gtk_widget_get_display(widget),
@@ -1559,6 +1581,9 @@ static void gd_change_page(GtkNotebook *nb, gpointer arg1, guint arg2,
                                        TRUE);
     }
     gtk_widget_set_sensitive(s->grab_item, on_vga);
+#ifdef CONFIG_VTE
+    gtk_widget_set_sensitive(s->copy_item, vc->type == GD_VC_VTE);
+#endif
 
     gd_update_windowsize(vc);
     gd_update_cursor(vc);
@@ -1685,9 +1710,6 @@ static CharDriverState *gd_vc_handler(ChardevVC *vc, Error **errp)
     /* Temporary, until gd_vc_vte_init runs.  */
     chr->opaque = g_new0(VirtualConsole, 1);
 
-    /* defer OPENED events until our vc is fully initialized */
-    chr->explicit_be_open = true;
-
     vcs[nb_vcs++] = chr;
 
     return chr;
@@ -1789,9 +1811,6 @@ static GSList *gd_vc_vte_init(GtkDisplayState *s, VirtualConsole *vc,
                              gtk_label_new(vc->label));
 
     qemu_chr_be_generic_open(vc->vte.chr);
-    if (vc->vte.chr->init) {
-        vc->vte.chr->init(vc->vte.chr);
-    }
 
     return group;
 }
@@ -2230,6 +2249,11 @@ void gtk_display_init(DisplayState *ds, bool full_screen, bool grab_on_hover)
     }
 #endif
 
+#ifdef CONFIG_VTE
+    gtk_widget_set_sensitive(s->copy_item,
+                             gd_vc_find_current(s)->type == GD_VC_VTE);
+#endif
+
     if (full_screen) {
         gtk_menu_item_activate(GTK_MENU_ITEM(s->full_screen_item));
     }
diff --git a/ui/input-linux.c b/ui/input-linux.c
index 0e230ce699..f345317794 100644
--- a/ui/input-linux.c
+++ b/ui/input-linux.c
@@ -347,7 +347,8 @@ static void input_linux_event(void *opaque)
 static void input_linux_complete(UserCreatable *uc, Error **errp)
 {
     InputLinux *il = INPUT_LINUX(uc);
-    uint8_t evtmap, relmap, absmap, keymap[KEY_CNT / 8];
+    uint8_t evtmap, relmap, absmap;
+    uint8_t keymap[KEY_CNT / 8], keystate[KEY_CNT / 8];
     unsigned int i;
     int rc, ver;
 
@@ -394,6 +395,7 @@ static void input_linux_complete(UserCreatable *uc, Error **errp)
     if (evtmap & (1 << EV_KEY)) {
         memset(keymap, 0, sizeof(keymap));
         rc = ioctl(il->fd, EVIOCGBIT(EV_KEY, sizeof(keymap)), keymap);
+        rc = ioctl(il->fd, EVIOCGKEY(sizeof(keystate)), keystate);
         for (i = 0; i < KEY_CNT; i++) {
             if (keymap[i / 8] & (1 << (i % 8))) {
                 if (linux_is_button(i)) {
@@ -401,12 +403,21 @@ static void input_linux_complete(UserCreatable *uc, Error **errp)
                 } else {
                     il->num_keys++;
                 }
+                if (keystate[i / 8] & (1 << (i % 8))) {
+                    il->keydown[i] = true;
+                    il->keycount++;
+                }
             }
         }
     }
 
     qemu_set_fd_handler(il->fd, input_linux_event, NULL, il);
-    input_linux_toggle_grab(il);
+    if (il->keycount) {
+        /* delay grab until all keys are released */
+        il->grab_request = true;
+    } else {
+        input_linux_toggle_grab(il);
+    }
     QTAILQ_INSERT_TAIL(&inputs, il, next);
     il->initialized = true;
     return;
diff --git a/ui/spice-core.c b/ui/spice-core.c
index da0505434a..1452e77fd1 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -796,7 +796,7 @@ void qemu_spice_init(void)
     qemu_opt_foreach(opts, add_channel, &tls_port, NULL);
 
     spice_server_set_name(spice_server, qemu_name);
-    spice_server_set_uuid(spice_server, qemu_uuid);
+    spice_server_set_uuid(spice_server, (unsigned char *)&qemu_uuid);
 
     seamless_migration = qemu_opt_get_bool(opts, "seamless-migration", 0);
     spice_server_set_seamless_migration(spice_server, seamless_migration);
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 99132b69b6..5e6f78a219 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -850,6 +850,74 @@ static void qemu_spice_gl_block_timer(void *opaque)
     fprintf(stderr, "WARNING: spice: no gl-draw-done within one second\n");
 }
 
+static void spice_gl_refresh(DisplayChangeListener *dcl)
+{
+    SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
+    uint64_t cookie;
+
+    if (!ssd->ds || qemu_console_is_gl_blocked(ssd->dcl.con)) {
+        return;
+    }
+
+    graphic_hw_update(dcl->con);
+    if (ssd->gl_updates && ssd->have_surface) {
+        qemu_spice_gl_block(ssd, true);
+        cookie = (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_GL_DRAW_DONE, 0);
+        spice_qxl_gl_draw_async(&ssd->qxl, 0, 0,
+                                surface_width(ssd->ds),
+                                surface_height(ssd->ds),
+                                cookie);
+        ssd->gl_updates = 0;
+    }
+}
+
+static void spice_gl_update(DisplayChangeListener *dcl,
+                            int x, int y, int w, int h)
+{
+    SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
+
+    surface_gl_update_texture(ssd->gls, ssd->ds, x, y, w, h);
+    ssd->gl_updates++;
+}
+
+static void spice_gl_switch(DisplayChangeListener *dcl,
+                            struct DisplaySurface *new_surface)
+{
+    SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
+    EGLint stride, fourcc;
+    int fd;
+
+    if (ssd->ds) {
+        surface_gl_destroy_texture(ssd->gls, ssd->ds);
+    }
+    ssd->ds = new_surface;
+    if (ssd->ds) {
+        surface_gl_create_texture(ssd->gls, ssd->ds);
+        fd = egl_get_fd_for_texture(ssd->ds->texture,
+                                    &stride, &fourcc);
+        if (fd < 0) {
+            surface_gl_destroy_texture(ssd->gls, ssd->ds);
+            return;
+        }
+
+        dprint(1, "%s: %dx%d (stride %d/%d, fourcc 0x%x)\n", __func__,
+               surface_width(ssd->ds), surface_height(ssd->ds),
+               surface_stride(ssd->ds), stride, fourcc);
+
+        /* note: spice server will close the fd */
+        spice_qxl_gl_scanout(&ssd->qxl, fd,
+                             surface_width(ssd->ds),
+                             surface_height(ssd->ds),
+                             stride, fourcc, false);
+        ssd->have_surface = true;
+        ssd->have_scanout = false;
+
+        qemu_spice_gl_monitor_config(ssd, 0, 0,
+                                     surface_width(ssd->ds),
+                                     surface_height(ssd->ds));
+    }
+}
+
 static QEMUGLContext qemu_spice_gl_create_context(DisplayChangeListener *dcl,
                                                   QEMUGLParams *params)
 {
@@ -887,6 +955,8 @@ static void qemu_spice_gl_scanout(DisplayChangeListener *dcl,
     /* note: spice server will close the fd */
     spice_qxl_gl_scanout(&ssd->qxl, fd, backing_width, backing_height,
                          stride, fourcc, y_0_top);
+    ssd->have_surface = false;
+    ssd->have_scanout = (tex_id != 0);
 
     qemu_spice_gl_monitor_config(ssd, x, y, w, h);
 }
@@ -897,6 +967,10 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl,
     SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
     uint64_t cookie;
 
+    if (!ssd->have_scanout) {
+        return;
+    }
+
     dprint(2, "%s: %dx%d+%d+%d\n", __func__, w, h, x, y);
     qemu_spice_gl_block(ssd, true);
     cookie = (uintptr_t)qxl_cookie_new(QXL_COOKIE_TYPE_GL_DRAW_DONE, 0);
@@ -904,13 +978,13 @@ static void qemu_spice_gl_update(DisplayChangeListener *dcl,
 }
 
 static const DisplayChangeListenerOps display_listener_gl_ops = {
-    .dpy_name             = "spice-egl",
-    .dpy_gfx_update       = display_update,
-    .dpy_gfx_switch       = display_switch,
-    .dpy_gfx_check_format = qemu_pixman_check_format,
-    .dpy_refresh          = display_refresh,
-    .dpy_mouse_set        = display_mouse_set,
-    .dpy_cursor_define    = display_mouse_define,
+    .dpy_name                = "spice-egl",
+    .dpy_gfx_update          = spice_gl_update,
+    .dpy_gfx_switch          = spice_gl_switch,
+    .dpy_gfx_check_format    = console_gl_check_format,
+    .dpy_refresh             = spice_gl_refresh,
+    .dpy_mouse_set           = display_mouse_set,
+    .dpy_cursor_define       = display_mouse_define,
 
     .dpy_gl_ctx_create       = qemu_spice_gl_create_context,
     .dpy_gl_ctx_destroy      = qemu_egl_destroy_context,
@@ -933,10 +1007,12 @@ static void qemu_spice_display_init_one(QemuConsole *con)
 #ifdef HAVE_SPICE_GL
     if (display_opengl) {
         ssd->dcl.ops = &display_listener_gl_ops;
-        ssd->dmabuf_fd = -1;
         ssd->gl_unblock_bh = qemu_bh_new(qemu_spice_gl_unblock_bh, ssd);
         ssd->gl_unblock_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
                                              qemu_spice_gl_block_timer, ssd);
+        ssd->gls = console_gl_init_context();
+        ssd->have_surface = false;
+        ssd->have_scanout = false;
     }
 #endif
     ssd->dcl.con = con;
diff --git a/ui/vnc-auth-vencrypt.c b/ui/vnc-auth-vencrypt.c
index 11c8c9a819..c0c29a5119 100644
--- a/ui/vnc-auth-vencrypt.c
+++ b/ui/vnc-auth-vencrypt.c
@@ -116,6 +116,7 @@ static int protocol_client_vencrypt_auth(VncState *vs, uint8_t *data, size_t len
             return 0;
         }
 
+        qio_channel_set_name(QIO_CHANNEL(tls), "vnc-server-tls");
         VNC_DEBUG("Start TLS VeNCrypt handshake process\n");
         object_unref(OBJECT(vs->ioc));
         vs->ioc = QIO_CHANNEL(tls);
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index 49df85e763..1e53b1cf84 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -707,10 +707,8 @@ check_solid_tile32(VncState *vs, int x, int y, int w, int h,
 static bool check_solid_tile(VncState *vs, int x, int y, int w, int h,
                              uint32_t* color, bool samecolor)
 {
-    switch (VNC_SERVER_FB_BYTES) {
-    case 4:
-        return check_solid_tile32(vs, x, y, w, h, color, samecolor);
-    }
+    QEMU_BUILD_BUG_ON(VNC_SERVER_FB_BYTES != 4);
+    return check_solid_tile32(vs, x, y, w, h, color, samecolor);
 }
 
 static void find_best_solid_area(VncState *vs, int x, int y, int w, int h,
diff --git a/ui/vnc-ws.c b/ui/vnc-ws.c
index 3bac46e774..bffb484a8d 100644
--- a/ui/vnc-ws.c
+++ b/ui/vnc-ws.c
@@ -67,6 +67,8 @@ gboolean vncws_tls_handshake_io(QIOChannel *ioc G_GNUC_UNUSED,
         return TRUE;
     }
 
+    qio_channel_set_name(QIO_CHANNEL(tls), "vnc-ws-server-tls");
+
     VNC_DEBUG("Start TLS WS handshake process\n");
     object_unref(OBJECT(vs->ioc));
     vs->ioc = QIO_CHANNEL(tls);
@@ -92,7 +94,7 @@ static void vncws_handshake_done(Object *source,
         vnc_client_error(vs);
     } else {
         VNC_DEBUG("Websock handshake complete, starting VNC protocol\n");
-        vnc_init_state(vs);
+        vnc_start_protocol(vs);
         vs->ioc_tag = qio_channel_add_watch(
             vs->ioc, G_IO_IN, vnc_client_io, vs, NULL);
     }
@@ -113,6 +115,7 @@ gboolean vncws_handshake_io(QIOChannel *ioc G_GNUC_UNUSED,
     }
 
     wioc = qio_channel_websock_new_server(vs->ioc);
+    qio_channel_set_name(QIO_CHANNEL(wioc), "vnc-ws-server-websock");
 
     object_unref(OBJECT(vs->ioc));
     vs->ioc = QIO_CHANNEL(wioc);
diff --git a/ui/vnc.c b/ui/vnc.c
index d1087c93a5..2c28a59ff7 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -371,7 +371,7 @@ VncInfo *qmp_query_vnc(Error **errp)
     VncDisplay *vd = vnc_display_find(NULL);
     SocketAddress *addr = NULL;
 
-    if (vd == NULL || !vd->enabled) {
+    if (vd == NULL || !vd->lsock) {
         info->enabled = false;
     } else {
         info->enabled = true;
@@ -911,6 +911,10 @@ static void vnc_dpy_copy(DisplayChangeListener *dcl,
         }
     }
 
+    if (!vd->server) {
+        /* no client connected */
+        return;
+    }
     /* do bitblit op on the local surface too */
     pitch = vnc_server_fb_stride(vd);
     src_row = vnc_server_fb_ptr(vd, src_x, src_y);
@@ -1218,13 +1222,13 @@ void vnc_disconnect_finish(VncState *vs)
     audio_del(vs);
     vnc_release_modifiers(vs);
 
-    if (vs->initialized) {
-        QTAILQ_REMOVE(&vs->vd->clients, vs, next);
+    if (vs->mouse_mode_notifier.notify != NULL) {
         qemu_remove_mouse_mode_change_notifier(&vs->mouse_mode_notifier);
-        if (QTAILQ_EMPTY(&vs->vd->clients)) {
-            /* last client gone */
-            vnc_update_server_surface(vs->vd);
-        }
+    }
+    QTAILQ_REMOVE(&vs->vd->clients, vs, next);
+    if (QTAILQ_EMPTY(&vs->vd->clients)) {
+        /* last client gone */
+        vnc_update_server_surface(vs->vd);
     }
 
     if (vs->vd->lock_key_sync)
@@ -2974,6 +2978,7 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc,
                         bool skipauth, bool websocket)
 {
     VncState *vs = g_new0(VncState, 1);
+    bool first_client = QTAILQ_EMPTY(&vd->clients);
     int i;
 
     vs->sioc = sioc;
@@ -3025,7 +3030,7 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc,
     qio_channel_set_blocking(vs->ioc, false, NULL);
     if (websocket) {
         vs->websocket = 1;
-        if (vd->ws_tls) {
+        if (vd->tlscreds) {
             vs->ioc_tag = qio_channel_add_watch(
                 vs->ioc, G_IO_IN, vncws_tls_handshake_io, vs, NULL);
         } else {
@@ -3041,26 +3046,6 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc,
     vnc_qmp_event(vs, QAPI_EVENT_VNC_CONNECTED);
     vnc_set_share_mode(vs, VNC_SHARE_MODE_CONNECTING);
 
-    if (!vs->websocket) {
-        vnc_init_state(vs);
-    }
-
-    if (vd->num_connecting > vd->connections_limit) {
-        QTAILQ_FOREACH(vs, &vd->clients, next) {
-            if (vs->share_mode == VNC_SHARE_MODE_CONNECTING) {
-                vnc_disconnect_start(vs);
-                return;
-            }
-        }
-    }
-}
-
-void vnc_init_state(VncState *vs)
-{
-    vs->initialized = true;
-    VncDisplay *vd = vs->vd;
-    bool first_client = QTAILQ_EMPTY(&vd->clients);
-
     vs->last_x = -1;
     vs->last_y = -1;
 
@@ -3079,34 +3064,48 @@ void vnc_init_state(VncState *vs)
 
     graphic_hw_update(vd->dcl.con);
 
+    if (!vs->websocket) {
+        vnc_start_protocol(vs);
+    }
+
+    if (vd->num_connecting > vd->connections_limit) {
+        QTAILQ_FOREACH(vs, &vd->clients, next) {
+            if (vs->share_mode == VNC_SHARE_MODE_CONNECTING) {
+                vnc_disconnect_start(vs);
+                return;
+            }
+        }
+    }
+}
+
+void vnc_start_protocol(VncState *vs)
+{
     vnc_write(vs, "RFB 003.008\n", 12);
     vnc_flush(vs);
     vnc_read_when(vs, protocol_version, 12);
-    reset_keys(vs);
     if (vs->vd->lock_key_sync)
         vs->led = qemu_add_led_event_handler(kbd_leds, vs);
 
     vs->mouse_mode_notifier.notify = check_pointer_type_change;
     qemu_add_mouse_mode_change_notifier(&vs->mouse_mode_notifier);
-
-    /* vs might be free()ed here */
 }
 
 static gboolean vnc_listen_io(QIOChannel *ioc,
                               GIOCondition condition,
                               void *opaque)
 {
-    VncDisplay *vs = opaque;
+    VncDisplay *vd = opaque;
     QIOChannelSocket *sioc = NULL;
     Error *err = NULL;
 
-    /* Catch-up */
-    graphic_hw_update(vs->dcl.con);
     sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc), &err);
     if (sioc != NULL) {
+        qio_channel_set_name(QIO_CHANNEL(sioc),
+                             ioc != QIO_CHANNEL(vd->lsock) ?
+                             "vnc-ws-server" : "vnc-server");
         qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-        vnc_connect(vs, sioc, false,
-                    ioc != QIO_CHANNEL(vs->lsock));
+        vnc_connect(vd, sioc, false,
+                    ioc != QIO_CHANNEL(vd->lsock));
         object_unref(OBJECT(sioc));
     } else {
         /* client probably closed connection before we got there */
@@ -3129,108 +3128,108 @@ static const DisplayChangeListenerOps dcl_ops = {
 
 void vnc_display_init(const char *id)
 {
-    VncDisplay *vs;
+    VncDisplay *vd;
 
     if (vnc_display_find(id) != NULL) {
         return;
     }
-    vs = g_malloc0(sizeof(*vs));
+    vd = g_malloc0(sizeof(*vd));
 
-    vs->id = strdup(id);
-    QTAILQ_INSERT_TAIL(&vnc_displays, vs, next);
+    vd->id = strdup(id);
+    QTAILQ_INSERT_TAIL(&vnc_displays, vd, next);
 
-    QTAILQ_INIT(&vs->clients);
-    vs->expires = TIME_MAX;
+    QTAILQ_INIT(&vd->clients);
+    vd->expires = TIME_MAX;
 
     if (keyboard_layout) {
         trace_vnc_key_map_init(keyboard_layout);
-        vs->kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout);
+        vd->kbd_layout = init_keyboard_layout(name2keysym, keyboard_layout);
     } else {
-        vs->kbd_layout = init_keyboard_layout(name2keysym, "en-us");
+        vd->kbd_layout = init_keyboard_layout(name2keysym, "en-us");
     }
 
-    if (!vs->kbd_layout)
+    if (!vd->kbd_layout) {
         exit(1);
+    }
 
-    vs->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
-    vs->connections_limit = 32;
+    vd->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
+    vd->connections_limit = 32;
 
-    qemu_mutex_init(&vs->mutex);
+    qemu_mutex_init(&vd->mutex);
     vnc_start_worker_thread();
 
-    vs->dcl.ops = &dcl_ops;
-    register_displaychangelistener(&vs->dcl);
+    vd->dcl.ops = &dcl_ops;
+    register_displaychangelistener(&vd->dcl);
 }
 
 
-static void vnc_display_close(VncDisplay *vs)
+static void vnc_display_close(VncDisplay *vd)
 {
-    if (!vs)
+    if (!vd) {
         return;
-    vs->enabled = false;
-    vs->is_unix = false;
-    if (vs->lsock != NULL) {
-        if (vs->lsock_tag) {
-            g_source_remove(vs->lsock_tag);
+    }
+    vd->is_unix = false;
+    if (vd->lsock != NULL) {
+        if (vd->lsock_tag) {
+            g_source_remove(vd->lsock_tag);
         }
-        object_unref(OBJECT(vs->lsock));
-        vs->lsock = NULL;
+        object_unref(OBJECT(vd->lsock));
+        vd->lsock = NULL;
     }
-    vs->ws_enabled = false;
-    if (vs->lwebsock != NULL) {
-        if (vs->lwebsock_tag) {
-            g_source_remove(vs->lwebsock_tag);
+    if (vd->lwebsock != NULL) {
+        if (vd->lwebsock_tag) {
+            g_source_remove(vd->lwebsock_tag);
         }
-        object_unref(OBJECT(vs->lwebsock));
-        vs->lwebsock = NULL;
+        object_unref(OBJECT(vd->lwebsock));
+        vd->lwebsock = NULL;
     }
-    vs->auth = VNC_AUTH_INVALID;
-    vs->subauth = VNC_AUTH_INVALID;
-    if (vs->tlscreds) {
-        object_unparent(OBJECT(vs->tlscreds));
-        vs->tlscreds = NULL;
+    vd->auth = VNC_AUTH_INVALID;
+    vd->subauth = VNC_AUTH_INVALID;
+    if (vd->tlscreds) {
+        object_unparent(OBJECT(vd->tlscreds));
+        vd->tlscreds = NULL;
     }
-    g_free(vs->tlsaclname);
-    vs->tlsaclname = NULL;
+    g_free(vd->tlsaclname);
+    vd->tlsaclname = NULL;
 }
 
 int vnc_display_password(const char *id, const char *password)
 {
-    VncDisplay *vs = vnc_display_find(id);
+    VncDisplay *vd = vnc_display_find(id);
 
-    if (!vs) {
+    if (!vd) {
         return -EINVAL;
     }
-    if (vs->auth == VNC_AUTH_NONE) {
+    if (vd->auth == VNC_AUTH_NONE) {
         error_printf_unless_qmp("If you want use passwords please enable "
                                 "password auth using '-vnc ${dpy},password'.\n");
         return -EINVAL;
     }
 
-    g_free(vs->password);
-    vs->password = g_strdup(password);
+    g_free(vd->password);
+    vd->password = g_strdup(password);
 
     return 0;
 }
 
 int vnc_display_pw_expire(const char *id, time_t expires)
 {
-    VncDisplay *vs = vnc_display_find(id);
+    VncDisplay *vd = vnc_display_find(id);
 
-    if (!vs) {
+    if (!vd) {
         return -EINVAL;
     }
 
-    vs->expires = expires;
+    vd->expires = expires;
     return 0;
 }
 
-static void vnc_display_print_local_addr(VncDisplay *vs)
+static void vnc_display_print_local_addr(VncDisplay *vd)
 {
     SocketAddress *addr;
     Error *err = NULL;
 
-    addr = qio_channel_socket_get_local_address(vs->lsock, &err);
+    addr = qio_channel_socket_get_local_address(vd->lsock, &err);
     if (!addr) {
         return;
     }
@@ -3323,7 +3322,9 @@ static QemuOptsList qemu_vnc_opts = {
 
 
 static int
-vnc_display_setup_auth(VncDisplay *vs,
+vnc_display_setup_auth(int *auth,
+                       int *subauth,
+                       QCryptoTLSCreds *tlscreds,
                        bool password,
                        bool sasl,
                        bool websocket,
@@ -3376,95 +3377,56 @@ vnc_display_setup_auth(VncDisplay *vs,
      * VNC auth mechs for plain VNC vs websockets VNC, the end
      * result has the same security characteristics.
      */
-    if (password) {
-        if (vs->tlscreds) {
-            vs->auth = VNC_AUTH_VENCRYPT;
-            if (websocket) {
-                vs->ws_tls = true;
-            }
-            if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                    TYPE_QCRYPTO_TLS_CREDS_X509)) {
-                VNC_DEBUG("Initializing VNC server with x509 password auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_X509VNC;
-            } else if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                           TYPE_QCRYPTO_TLS_CREDS_ANON)) {
-                VNC_DEBUG("Initializing VNC server with TLS password auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_TLSVNC;
-            } else {
-                error_setg(errp,
-                           "Unsupported TLS cred type %s",
-                           object_get_typename(OBJECT(vs->tlscreds)));
-                return -1;
-            }
-        } else {
+    if (websocket || !tlscreds) {
+        if (password) {
             VNC_DEBUG("Initializing VNC server with password auth\n");
-            vs->auth = VNC_AUTH_VNC;
-            vs->subauth = VNC_AUTH_INVALID;
-        }
-        if (websocket) {
-            vs->ws_auth = VNC_AUTH_VNC;
+            *auth = VNC_AUTH_VNC;
+        } else if (sasl) {
+            VNC_DEBUG("Initializing VNC server with SASL auth\n");
+            *auth = VNC_AUTH_SASL;
         } else {
-            vs->ws_auth = VNC_AUTH_INVALID;
+            VNC_DEBUG("Initializing VNC server with no auth\n");
+            *auth = VNC_AUTH_NONE;
         }
-    } else if (sasl) {
-        if (vs->tlscreds) {
-            vs->auth = VNC_AUTH_VENCRYPT;
-            if (websocket) {
-                vs->ws_tls = true;
+        *subauth = VNC_AUTH_INVALID;
+    } else {
+        bool is_x509 = object_dynamic_cast(OBJECT(tlscreds),
+                                           TYPE_QCRYPTO_TLS_CREDS_X509) != NULL;
+        bool is_anon = object_dynamic_cast(OBJECT(tlscreds),
+                                           TYPE_QCRYPTO_TLS_CREDS_ANON) != NULL;
+
+        if (!is_x509 && !is_anon) {
+            error_setg(errp,
+                       "Unsupported TLS cred type %s",
+                       object_get_typename(OBJECT(tlscreds)));
+            return -1;
+        }
+        *auth = VNC_AUTH_VENCRYPT;
+        if (password) {
+            if (is_x509) {
+                VNC_DEBUG("Initializing VNC server with x509 password auth\n");
+                *subauth = VNC_AUTH_VENCRYPT_X509VNC;
+            } else {
+                VNC_DEBUG("Initializing VNC server with TLS password auth\n");
+                *subauth = VNC_AUTH_VENCRYPT_TLSVNC;
             }
-            if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                    TYPE_QCRYPTO_TLS_CREDS_X509)) {
+
+        } else if (sasl) {
+            if (is_x509) {
                 VNC_DEBUG("Initializing VNC server with x509 SASL auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_X509SASL;
-            } else if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                           TYPE_QCRYPTO_TLS_CREDS_ANON)) {
-                VNC_DEBUG("Initializing VNC server with TLS SASL auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_TLSSASL;
+                *subauth = VNC_AUTH_VENCRYPT_X509SASL;
             } else {
-                error_setg(errp,
-                           "Unsupported TLS cred type %s",
-                           object_get_typename(OBJECT(vs->tlscreds)));
-                return -1;
+                VNC_DEBUG("Initializing VNC server with TLS SASL auth\n");
+                *subauth = VNC_AUTH_VENCRYPT_TLSSASL;
             }
         } else {
-            VNC_DEBUG("Initializing VNC server with SASL auth\n");
-            vs->auth = VNC_AUTH_SASL;
-            vs->subauth = VNC_AUTH_INVALID;
-        }
-        if (websocket) {
-            vs->ws_auth = VNC_AUTH_SASL;
-        } else {
-            vs->ws_auth = VNC_AUTH_INVALID;
-        }
-    } else {
-        if (vs->tlscreds) {
-            vs->auth = VNC_AUTH_VENCRYPT;
-            if (websocket) {
-                vs->ws_tls = true;
-            }
-            if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                    TYPE_QCRYPTO_TLS_CREDS_X509)) {
+            if (is_x509) {
                 VNC_DEBUG("Initializing VNC server with x509 no auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_X509NONE;
-            } else if (object_dynamic_cast(OBJECT(vs->tlscreds),
-                                           TYPE_QCRYPTO_TLS_CREDS_ANON)) {
-                VNC_DEBUG("Initializing VNC server with TLS no auth\n");
-                vs->subauth = VNC_AUTH_VENCRYPT_TLSNONE;
+                *subauth = VNC_AUTH_VENCRYPT_X509NONE;
             } else {
-                error_setg(errp,
-                           "Unsupported TLS cred type %s",
-                           object_get_typename(OBJECT(vs->tlscreds)));
-                return -1;
+                VNC_DEBUG("Initializing VNC server with TLS no auth\n");
+                *subauth = VNC_AUTH_VENCRYPT_TLSNONE;
             }
-        } else {
-            VNC_DEBUG("Initializing VNC server with no auth\n");
-            vs->auth = VNC_AUTH_NONE;
-            vs->subauth = VNC_AUTH_INVALID;
-        }
-        if (websocket) {
-            vs->ws_auth = VNC_AUTH_NONE;
-        } else {
-            vs->ws_auth = VNC_AUTH_INVALID;
         }
     }
     return 0;
@@ -3518,7 +3480,7 @@ vnc_display_create_creds(bool x509,
 
 void vnc_display_open(const char *id, Error **errp)
 {
-    VncDisplay *vs = vnc_display_find(id);
+    VncDisplay *vd = vnc_display_find(id);
     QemuOpts *opts = qemu_opts_find(&qemu_vnc_opts, id);
     SocketAddress *saddr = NULL, *wsaddr = NULL;
     const char *share, *device_id;
@@ -3536,12 +3498,13 @@ void vnc_display_open(const char *id, Error **errp)
     int acl = 0;
     int lock_key_sync = 1;
     int key_delay_ms;
+    bool ws_enabled = false;
 
-    if (!vs) {
+    if (!vd) {
         error_setg(errp, "VNC display not active");
         return;
     }
-    vnc_display_close(vs);
+    vnc_display_close(vd);
 
     if (!opts) {
         return;
@@ -3571,7 +3534,7 @@ void vnc_display_open(const char *id, Error **errp)
             }
 
             wsaddr = g_new0(SocketAddress, 1);
-            vs->ws_enabled = true;
+            ws_enabled = true;
         }
 
         if (strncmp(vnc, "unix:", 5) == 0) {
@@ -3579,7 +3542,7 @@ void vnc_display_open(const char *id, Error **errp)
             saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
             saddr->u.q_unix.data->path = g_strdup(vnc + 5);
 
-            if (vs->ws_enabled) {
+            if (ws_enabled) {
                 error_setg(errp, "UNIX sockets not supported with websock");
                 goto fail;
             }
@@ -3615,7 +3578,7 @@ void vnc_display_open(const char *id, Error **errp)
             inet->ipv6 = ipv6;
             inet->has_ipv6 = has_ipv6;
 
-            if (vs->ws_enabled) {
+            if (ws_enabled) {
                 wsaddr->type = SOCKET_ADDRESS_KIND_INET;
                 inet = wsaddr->u.inet.data = g_new0(InetSocketAddress, 1);
                 inet->host = g_strdup(saddr->u.inet.data->host);
@@ -3646,7 +3609,7 @@ void vnc_display_open(const char *id, Error **errp)
             goto fail;
         }
         if (!qcrypto_cipher_supports(
-                QCRYPTO_CIPHER_ALG_DES_RFB)) {
+                QCRYPTO_CIPHER_ALG_DES_RFB, QCRYPTO_CIPHER_MODE_ECB)) {
             error_setg(errp,
                        "Cipher backend does not support DES RFB algorithm");
             goto fail;
@@ -3682,17 +3645,17 @@ void vnc_display_open(const char *id, Error **errp)
                        credid);
             goto fail;
         }
-        vs->tlscreds = (QCryptoTLSCreds *)
+        vd->tlscreds = (QCryptoTLSCreds *)
             object_dynamic_cast(creds,
                                 TYPE_QCRYPTO_TLS_CREDS);
-        if (!vs->tlscreds) {
+        if (!vd->tlscreds) {
             error_setg(errp, "Object with id '%s' is not TLS credentials",
                        credid);
             goto fail;
         }
-        object_ref(OBJECT(vs->tlscreds));
+        object_ref(OBJECT(vd->tlscreds));
 
-        if (vs->tlscreds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+        if (vd->tlscreds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
             error_setg(errp,
                        "Expecting TLS credentials with a server endpoint");
             goto fail;
@@ -3713,12 +3676,12 @@ void vnc_display_open(const char *id, Error **errp)
                     x509verify = true;
                 }
             }
-            vs->tlscreds = vnc_display_create_creds(x509,
+            vd->tlscreds = vnc_display_create_creds(x509,
                                                     x509verify,
                                                     path,
-                                                    vs->id,
+                                                    vd->id,
                                                     errp);
-            if (!vs->tlscreds) {
+            if (!vd->tlscreds) {
                 goto fail;
             }
         }
@@ -3728,54 +3691,62 @@ void vnc_display_open(const char *id, Error **errp)
     share = qemu_opt_get(opts, "share");
     if (share) {
         if (strcmp(share, "ignore") == 0) {
-            vs->share_policy = VNC_SHARE_POLICY_IGNORE;
+            vd->share_policy = VNC_SHARE_POLICY_IGNORE;
         } else if (strcmp(share, "allow-exclusive") == 0) {
-            vs->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
+            vd->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
         } else if (strcmp(share, "force-shared") == 0) {
-            vs->share_policy = VNC_SHARE_POLICY_FORCE_SHARED;
+            vd->share_policy = VNC_SHARE_POLICY_FORCE_SHARED;
         } else {
             error_setg(errp, "unknown vnc share= option");
             goto fail;
         }
     } else {
-        vs->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
+        vd->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
     }
-    vs->connections_limit = qemu_opt_get_number(opts, "connections", 32);
+    vd->connections_limit = qemu_opt_get_number(opts, "connections", 32);
 
 #ifdef CONFIG_VNC_JPEG
-    vs->lossy = qemu_opt_get_bool(opts, "lossy", false);
+    vd->lossy = qemu_opt_get_bool(opts, "lossy", false);
 #endif
-    vs->non_adaptive = qemu_opt_get_bool(opts, "non-adaptive", false);
+    vd->non_adaptive = qemu_opt_get_bool(opts, "non-adaptive", false);
     /* adaptive updates are only used with tight encoding and
      * if lossy updates are enabled so we can disable all the
      * calculations otherwise */
-    if (!vs->lossy) {
-        vs->non_adaptive = true;
+    if (!vd->lossy) {
+        vd->non_adaptive = true;
     }
 
     if (acl) {
-        if (strcmp(vs->id, "default") == 0) {
-            vs->tlsaclname = g_strdup("vnc.x509dname");
+        if (strcmp(vd->id, "default") == 0) {
+            vd->tlsaclname = g_strdup("vnc.x509dname");
         } else {
-            vs->tlsaclname = g_strdup_printf("vnc.%s.x509dname", vs->id);
+            vd->tlsaclname = g_strdup_printf("vnc.%s.x509dname", vd->id);
         }
-        qemu_acl_init(vs->tlsaclname);
+        qemu_acl_init(vd->tlsaclname);
     }
 #ifdef CONFIG_VNC_SASL
     if (acl && sasl) {
         char *aclname;
 
-        if (strcmp(vs->id, "default") == 0) {
+        if (strcmp(vd->id, "default") == 0) {
             aclname = g_strdup("vnc.username");
         } else {
-            aclname = g_strdup_printf("vnc.%s.username", vs->id);
+            aclname = g_strdup_printf("vnc.%s.username", vd->id);
         }
-        vs->sasl.acl = qemu_acl_init(aclname);
+        vd->sasl.acl = qemu_acl_init(aclname);
         g_free(aclname);
     }
 #endif
 
-    if (vnc_display_setup_auth(vs, password, sasl, vs->ws_enabled, errp) < 0) {
+    if (vnc_display_setup_auth(&vd->auth, &vd->subauth,
+                               vd->tlscreds, password,
+                               sasl, false, errp) < 0) {
+        goto fail;
+    }
+
+    if (vnc_display_setup_auth(&vd->ws_auth, &vd->ws_subauth,
+                               vd->tlscreds, password,
+                               sasl, true, errp) < 0) {
         goto fail;
     }
 
@@ -3786,8 +3757,8 @@ void vnc_display_open(const char *id, Error **errp)
         goto fail;
     }
 #endif
-    vs->lock_key_sync = lock_key_sync;
-    vs->key_delay_ms = key_delay_ms;
+    vd->lock_key_sync = lock_key_sync;
+    vd->key_delay_ms = key_delay_ms;
 
     device_id = qemu_opt_get(opts, "display");
     if (device_id) {
@@ -3803,58 +3774,60 @@ void vnc_display_open(const char *id, Error **errp)
         con = NULL;
     }
 
-    if (con != vs->dcl.con) {
-        unregister_displaychangelistener(&vs->dcl);
-        vs->dcl.con = con;
-        register_displaychangelistener(&vs->dcl);
+    if (con != vd->dcl.con) {
+        unregister_displaychangelistener(&vd->dcl);
+        vd->dcl.con = con;
+        register_displaychangelistener(&vd->dcl);
     }
 
     if (reverse) {
         /* connect to viewer */
         QIOChannelSocket *sioc = NULL;
-        vs->lsock = NULL;
-        vs->lwebsock = NULL;
-        if (vs->ws_enabled) {
+        vd->lsock = NULL;
+        vd->lwebsock = NULL;
+        if (ws_enabled) {
             error_setg(errp, "Cannot use websockets in reverse mode");
             goto fail;
         }
-        vs->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+        vd->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
         sioc = qio_channel_socket_new();
+        qio_channel_set_name(QIO_CHANNEL(sioc), "vnc-reverse");
         if (qio_channel_socket_connect_sync(sioc, saddr, errp) < 0) {
             goto fail;
         }
-        vnc_connect(vs, sioc, false, false);
+        vnc_connect(vd, sioc, false, false);
         object_unref(OBJECT(sioc));
     } else {
-        vs->lsock = qio_channel_socket_new();
-        if (qio_channel_socket_listen_sync(vs->lsock, saddr, errp) < 0) {
+        vd->lsock = qio_channel_socket_new();
+        qio_channel_set_name(QIO_CHANNEL(vd->lsock), "vnc-listen");
+        if (qio_channel_socket_listen_sync(vd->lsock, saddr, errp) < 0) {
             goto fail;
         }
-        vs->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-        vs->enabled = true;
+        vd->is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
 
-        if (vs->ws_enabled) {
-            vs->lwebsock = qio_channel_socket_new();
-            if (qio_channel_socket_listen_sync(vs->lwebsock,
+        if (ws_enabled) {
+            vd->lwebsock = qio_channel_socket_new();
+            qio_channel_set_name(QIO_CHANNEL(vd->lwebsock), "vnc-ws-listen");
+            if (qio_channel_socket_listen_sync(vd->lwebsock,
                                                wsaddr, errp) < 0) {
-                object_unref(OBJECT(vs->lsock));
-                vs->lsock = NULL;
+                object_unref(OBJECT(vd->lsock));
+                vd->lsock = NULL;
                 goto fail;
             }
         }
 
-        vs->lsock_tag = qio_channel_add_watch(
-            QIO_CHANNEL(vs->lsock),
-            G_IO_IN, vnc_listen_io, vs, NULL);
-        if (vs->ws_enabled) {
-            vs->lwebsock_tag = qio_channel_add_watch(
-                QIO_CHANNEL(vs->lwebsock),
-                G_IO_IN, vnc_listen_io, vs, NULL);
+        vd->lsock_tag = qio_channel_add_watch(
+            QIO_CHANNEL(vd->lsock),
+            G_IO_IN, vnc_listen_io, vd, NULL);
+        if (ws_enabled) {
+            vd->lwebsock_tag = qio_channel_add_watch(
+                QIO_CHANNEL(vd->lwebsock),
+                G_IO_IN, vnc_listen_io, vd, NULL);
         }
     }
 
     if (show_vnc_port) {
-        vnc_display_print_local_addr(vs);
+        vnc_display_print_local_addr(vd);
     }
 
     qapi_free_SocketAddress(saddr);
@@ -3864,22 +3837,22 @@ void vnc_display_open(const char *id, Error **errp)
 fail:
     qapi_free_SocketAddress(saddr);
     qapi_free_SocketAddress(wsaddr);
-    vs->enabled = false;
-    vs->ws_enabled = false;
+    ws_enabled = false;
 }
 
 void vnc_display_add_client(const char *id, int csock, bool skipauth)
 {
-    VncDisplay *vs = vnc_display_find(id);
+    VncDisplay *vd = vnc_display_find(id);
     QIOChannelSocket *sioc;
 
-    if (!vs) {
+    if (!vd) {
         return;
     }
 
     sioc = qio_channel_socket_new_fd(csock, NULL);
     if (sioc) {
-        vnc_connect(vs, sioc, skipauth, false);
+        qio_channel_set_name(QIO_CHANNEL(sioc), "vnc-server");
+        vnc_connect(vd, sioc, skipauth, false);
         object_unref(OBJECT(sioc));
     }
 }
diff --git a/ui/vnc.h b/ui/vnc.h
index ab5f244116..d20b154a77 100644
--- a/ui/vnc.h
+++ b/ui/vnc.h
@@ -150,7 +150,6 @@ struct VncDisplay
     guint lsock_tag;
     QIOChannelSocket *lwebsock;
     guint lwebsock_tag;
-    bool ws_enabled;
     DisplaySurface *ds;
     DisplayChangeListener dcl;
     kbd_layout_t *kbd_layout;
@@ -167,14 +166,13 @@ struct VncDisplay
 
     const char *id;
     QTAILQ_ENTRY(VncDisplay) next;
-    bool enabled;
     bool is_unix;
     char *password;
     time_t expires;
     int auth;
     int subauth; /* Used by VeNCrypt */
     int ws_auth; /* Used by websockets */
-    bool ws_tls; /* Used by websockets */
+    int ws_subauth; /* Used by websockets */
     bool lossy;
     bool non_adaptive;
     QCryptoTLSCreds *tlscreds;
@@ -309,7 +307,6 @@ struct VncState
     QEMUPutLEDEntry *led;
 
     bool abort;
-    bool initialized;
     QemuMutex output_mutex;
     QEMUBH *bh;
     Buffer jobs_buffer;
@@ -518,7 +515,7 @@ void vnc_write_u8(VncState *vs, uint8_t value);
 void vnc_flush(VncState *vs);
 void vnc_read_when(VncState *vs, VncReadEvent *func, size_t expecting);
 void vnc_disconnect_finish(VncState *vs);
-void vnc_init_state(VncState *vs);
+void vnc_start_protocol(VncState *vs);
 
 
 /* Buffer I/O functions */
diff --git a/user-exec.c b/user-exec.c
index 95f9f97c5c..6db075884d 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -105,8 +105,11 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
     if (ret == 0) {
         return 1; /* the MMU fault was handled without causing real CPU fault */
     }
-    /* now we have a real cpu fault */
-    cpu_restore_state(cpu, pc);
+
+    /* Now we have a real cpu fault.  Since this is the exact location of
+     * the exception, we must undo the adjustment done by cpu_restore_state
+     * for handling call return addresses.  */
+    cpu_restore_state(cpu, pc + GETPC_ADJ);
 
     sigprocmask(SIG_SETMASK, old_set, NULL);
     cpu_loop_exit(cpu);
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 96cb1e0e58..ad0f9c7fe4 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -1,4 +1,5 @@
 util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
+util-obj-y += bufferiszero.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
 util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
 util-obj-$(CONFIG_POSIX) += mmap-alloc.o
@@ -20,10 +21,10 @@ util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
 util-obj-y += qemu-option.o qemu-progress.o
 util-obj-y += hexdump.o
 util-obj-y += crc32c.o
+util-obj-y += uuid.o
 util-obj-y += throttle.o
 util-obj-y += getauxval.o
 util-obj-y += readline.o
-util-obj-y += rfifolock.o
 util-obj-y += rcu.o
 util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
 util-obj-y += qemu-coroutine-sleep.o
diff --git a/util/bitmap.c b/util/bitmap.c
index 40aadfb4f3..43ed011720 100644
--- a/util/bitmap.c
+++ b/util/bitmap.c
@@ -157,8 +157,6 @@ int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
     return result != 0;
 }
 
-#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
-
 void bitmap_set(unsigned long *map, long start, long nr)
 {
     unsigned long *p = map + BIT_WORD(start);
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
new file mode 100644
index 0000000000..eb974b7849
--- /dev/null
+++ b/util/bufferiszero.c
@@ -0,0 +1,311 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/bswap.h"
+
+static bool
+buffer_zero_int(const void *buf, size_t len)
+{
+    if (unlikely(len < 8)) {
+        /* For a very small buffer, simply accumulate all the bytes.  */
+        const unsigned char *p = buf;
+        const unsigned char *e = buf + len;
+        unsigned char t = 0;
+
+        do {
+            t |= *p++;
+        } while (p < e);
+
+        return t == 0;
+    } else {
+        /* Otherwise, use the unaligned memory access functions to
+           handle the beginning and end of the buffer, with a couple
+           of loops handling the middle aligned section.  */
+        uint64_t t = ldq_he_p(buf);
+        const uint64_t *p = (uint64_t *)(((uintptr_t)buf + 8) & -8);
+        const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
+
+        for (; p + 8 <= e; p += 8) {
+            __builtin_prefetch(p + 8);
+            if (t) {
+                return false;
+            }
+            t = p[0] | p[1] | p[2] | p[3] | p[4] | p[5] | p[6] | p[7];
+        }
+        while (p < e) {
+            t |= *p++;
+        }
+        t |= ldq_he_p(buf + len - 8);
+
+        return t == 0;
+    }
+}
+
+#if defined(CONFIG_AVX2_OPT) || defined(__SSE2__)
+/* Do not use push_options pragmas unnecessarily, because clang
+ * does not support them.
+ */
+#ifdef CONFIG_AVX2_OPT
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#endif
+#include <emmintrin.h>
+
+/* Note that each of these vectorized functions require len >= 64.  */
+
+static bool
+buffer_zero_sse2(const void *buf, size_t len)
+{
+    __m128i t = _mm_loadu_si128(buf);
+    __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
+    __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
+    __m128i zero = _mm_setzero_si128();
+
+    /* Loop over 16-byte aligned blocks of 64.  */
+    while (likely(p <= e)) {
+        __builtin_prefetch(p);
+        t = _mm_cmpeq_epi8(t, zero);
+        if (unlikely(_mm_movemask_epi8(t) != 0xFFFF)) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    /* Finish the aligned tail.  */
+    t |= e[-3];
+    t |= e[-2];
+    t |= e[-1];
+
+    /* Finish the unaligned tail.  */
+    t |= _mm_loadu_si128(buf + len - 16);
+
+    return _mm_movemask_epi8(_mm_cmpeq_epi8(t, zero)) == 0xFFFF;
+}
+#ifdef CONFIG_AVX2_OPT
+#pragma GCC pop_options
+#endif
+
+#ifdef CONFIG_AVX2_OPT
+/* Note that due to restrictions/bugs wrt __builtin functions in gcc <= 4.8,
+ * the includes have to be within the corresponding push_options region, and
+ * therefore the regions themselves have to be ordered with increasing ISA.
+ */
+#pragma GCC push_options
+#pragma GCC target("sse4")
+#include <smmintrin.h>
+
+static bool
+buffer_zero_sse4(const void *buf, size_t len)
+{
+    __m128i t = _mm_loadu_si128(buf);
+    __m128i *p = (__m128i *)(((uintptr_t)buf + 5 * 16) & -16);
+    __m128i *e = (__m128i *)(((uintptr_t)buf + len) & -16);
+
+    /* Loop over 16-byte aligned blocks of 64.  */
+    while (likely(p <= e)) {
+        __builtin_prefetch(p);
+        if (unlikely(!_mm_testz_si128(t, t))) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    /* Finish the aligned tail.  */
+    t |= e[-3];
+    t |= e[-2];
+    t |= e[-1];
+
+    /* Finish the unaligned tail.  */
+    t |= _mm_loadu_si128(buf + len - 16);
+
+    return _mm_testz_si128(t, t);
+}
+
+#pragma GCC pop_options
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#include <immintrin.h>
+
+static bool
+buffer_zero_avx2(const void *buf, size_t len)
+{
+    /* Begin with an unaligned head of 32 bytes.  */
+    __m256i t = _mm256_loadu_si256(buf);
+    __m256i *p = (__m256i *)(((uintptr_t)buf + 5 * 32) & -32);
+    __m256i *e = (__m256i *)(((uintptr_t)buf + len) & -32);
+
+    if (likely(p <= e)) {
+        /* Loop over 32-byte aligned blocks of 128.  */
+        do {
+            __builtin_prefetch(p);
+            if (unlikely(!_mm256_testz_si256(t, t))) {
+                return false;
+            }
+            t = p[-4] | p[-3] | p[-2] | p[-1];
+            p += 4;
+        } while (p <= e);
+    } else {
+        t |= _mm256_loadu_si256(buf + 32);
+        if (len <= 128) {
+            goto last2;
+        }
+    }
+
+    /* Finish the last block of 128 unaligned.  */
+    t |= _mm256_loadu_si256(buf + len - 4 * 32);
+    t |= _mm256_loadu_si256(buf + len - 3 * 32);
+ last2:
+    t |= _mm256_loadu_si256(buf + len - 2 * 32);
+    t |= _mm256_loadu_si256(buf + len - 1 * 32);
+
+    return _mm256_testz_si256(t, t);
+}
+#pragma GCC pop_options
+#endif /* CONFIG_AVX2_OPT */
+
+/* Note that for test_buffer_is_zero_next_accel, the most preferred
+ * ISA must have the least significant bit.
+ */
+#define CACHE_AVX2    1
+#define CACHE_SSE4    2
+#define CACHE_SSE2    4
+
+/* Make sure that these variables are appropriately initialized when
+ * SSE2 is enabled on the compiler command-line, but the compiler is
+ * too old to support <cpuid.h>.
+ */
+#ifdef CONFIG_AVX2_OPT
+# define INIT_CACHE 0
+# define INIT_ACCEL buffer_zero_int
+#else
+# ifndef __SSE2__
+#  error "ISA selection confusion"
+# endif
+# define INIT_CACHE CACHE_SSE2
+# define INIT_ACCEL buffer_zero_sse2
+#endif
+
+static unsigned cpuid_cache = INIT_CACHE;
+static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
+
+static void init_accel(unsigned cache)
+{
+    bool (*fn)(const void *, size_t) = buffer_zero_int;
+    if (cache & CACHE_SSE2) {
+        fn = buffer_zero_sse2;
+    }
+#ifdef CONFIG_AVX2_OPT
+    if (cache & CACHE_SSE4) {
+        fn = buffer_zero_sse4;
+    }
+    if (cache & CACHE_AVX2) {
+        fn = buffer_zero_avx2;
+    }
+#endif
+    buffer_accel = fn;
+}
+
+#ifdef CONFIG_AVX2_OPT
+#include <cpuid.h>
+static void __attribute__((constructor)) init_cpuid_cache(void)
+{
+    int max = __get_cpuid_max(0, NULL);
+    int a, b, c, d;
+    unsigned cache = 0;
+
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+        if (d & bit_SSE2) {
+            cache |= CACHE_SSE2;
+        }
+#ifdef CONFIG_AVX2_OPT
+        if (c & bit_SSE4_1) {
+            cache |= CACHE_SSE4;
+        }
+
+        /* We must check that AVX is not just available, but usable.  */
+        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
+            int bv;
+            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
+            __cpuid_count(7, 0, a, b, c, d);
+            if ((bv & 6) == 6 && (b & bit_AVX2)) {
+                cache |= CACHE_AVX2;
+            }
+        }
+#endif
+    }
+    cpuid_cache = cache;
+    init_accel(cache);
+}
+#endif /* CONFIG_AVX2_OPT */
+
+bool test_buffer_is_zero_next_accel(void)
+{
+    /* If no bits set, we just tested buffer_zero_int, and there
+       are no more acceleration options to test.  */
+    if (cpuid_cache == 0) {
+        return false;
+    }
+    /* Disable the accelerator we used before and select a new one.  */
+    cpuid_cache &= cpuid_cache - 1;
+    init_accel(cpuid_cache);
+    return true;
+}
+
+static bool select_accel_fn(const void *buf, size_t len)
+{
+    if (likely(len >= 64)) {
+        return buffer_accel(buf, len);
+    }
+    return buffer_zero_int(buf, len);
+}
+
+#else
+#define select_accel_fn  buffer_zero_int
+bool test_buffer_is_zero_next_accel(void)
+{
+    return false;
+}
+#endif
+
+/*
+ * Checks if a buffer is all zeroes
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+    if (unlikely(len == 0)) {
+        return true;
+    }
+
+    /* Fetch the beginning of the buffer while we select the accelerator.  */
+    __builtin_prefetch(buf);
+
+    /* Use an optimized zero check if possible.  Note that this also
+       includes a check for an unrolled loop over 64-bit integers.  */
+    return select_accel_fn(buf, len);
+}
diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c
index a7c3366553..f6fc49a0e5 100644
--- a/util/coroutine-sigaltstack.c
+++ b/util/coroutine-sigaltstack.c
@@ -33,8 +33,9 @@
 typedef struct {
     Coroutine base;
     void *stack;
+    size_t stack_size;
     sigjmp_buf env;
-} CoroutineUContext;
+} CoroutineSigAltStack;
 
 /**
  * Per-thread coroutine bookkeeping
@@ -44,7 +45,7 @@ typedef struct {
     Coroutine *current;
 
     /** The default coroutine */
-    CoroutineUContext leader;
+    CoroutineSigAltStack leader;
 
     /** Information for the signal handler (trampoline) */
     sigjmp_buf tr_reenter;
@@ -89,7 +90,7 @@ static void __attribute__((constructor)) coroutine_init(void)
  * (from the signal handler when it is not signal handling, read ahead
  * for more information).
  */
-static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co)
 {
     /* Initialize longjmp environment and switch back the caller */
     if (!sigsetjmp(self->env, 0)) {
@@ -109,7 +110,7 @@ static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
  */
 static void coroutine_trampoline(int signal)
 {
-    CoroutineUContext *self;
+    CoroutineSigAltStack *self;
     Coroutine *co;
     CoroutineThreadState *coTS;
 
@@ -143,8 +144,7 @@ static void coroutine_trampoline(int signal)
 
 Coroutine *qemu_coroutine_new(void)
 {
-    const size_t stack_size = 1 << 20;
-    CoroutineUContext *co;
+    CoroutineSigAltStack *co;
     CoroutineThreadState *coTS;
     struct sigaction sa;
     struct sigaction osa;
@@ -164,7 +164,8 @@ Coroutine *qemu_coroutine_new(void)
      */
 
     co = g_malloc0(sizeof(*co));
-    co->stack = g_malloc(stack_size);
+    co->stack_size = COROUTINE_STACK_SIZE;
+    co->stack = qemu_alloc_stack(&co->stack_size);
     co->base.entry_arg = &old_env; /* stash away our jmp_buf */
 
     coTS = coroutine_get_thread_state();
@@ -189,7 +190,7 @@ Coroutine *qemu_coroutine_new(void)
      * Set the new stack.
      */
     ss.ss_sp = co->stack;
-    ss.ss_size = stack_size;
+    ss.ss_size = co->stack_size;
     ss.ss_flags = 0;
     if (sigaltstack(&ss, &oss) < 0) {
         abort();
@@ -251,17 +252,17 @@ Coroutine *qemu_coroutine_new(void)
 
 void qemu_coroutine_delete(Coroutine *co_)
 {
-    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+    CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_);
 
-    g_free(co->stack);
+    qemu_free_stack(co->stack, co->stack_size);
     g_free(co);
 }
 
 CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
                                       CoroutineAction action)
 {
-    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
-    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_);
+    CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_);
     CoroutineThreadState *s = coroutine_get_thread_state();
     int ret;
 
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index 2bb7e10d4b..6621f3f692 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -34,6 +34,7 @@
 typedef struct {
     Coroutine base;
     void *stack;
+    size_t stack_size;
     sigjmp_buf env;
 
 #ifdef CONFIG_VALGRIND_H
@@ -82,7 +83,6 @@ static void coroutine_trampoline(int i0, int i1)
 
 Coroutine *qemu_coroutine_new(void)
 {
-    const size_t stack_size = 1 << 20;
     CoroutineUContext *co;
     ucontext_t old_uc, uc;
     sigjmp_buf old_env;
@@ -101,17 +101,18 @@ Coroutine *qemu_coroutine_new(void)
     }
 
     co = g_malloc0(sizeof(*co));
-    co->stack = g_malloc(stack_size);
+    co->stack_size = COROUTINE_STACK_SIZE;
+    co->stack = qemu_alloc_stack(&co->stack_size);
     co->base.entry_arg = &old_env; /* stash away our jmp_buf */
 
     uc.uc_link = &old_uc;
     uc.uc_stack.ss_sp = co->stack;
-    uc.uc_stack.ss_size = stack_size;
+    uc.uc_stack.ss_size = co->stack_size;
     uc.uc_stack.ss_flags = 0;
 
 #ifdef CONFIG_VALGRIND_H
     co->valgrind_stack_id =
-        VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
 #endif
 
     arg.p = co;
@@ -149,7 +150,7 @@ void qemu_coroutine_delete(Coroutine *co_)
     valgrind_stack_deregister(co);
 #endif
 
-    g_free(co->stack);
+    qemu_free_stack(co->stack, co->stack_size);
     g_free(co);
 }
 
diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c
index 02e28e825f..de6bd4fd3e 100644
--- a/util/coroutine-win32.c
+++ b/util/coroutine-win32.c
@@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_)
 
 Coroutine *qemu_coroutine_new(void)
 {
-    const size_t stack_size = 1 << 20;
+    const size_t stack_size = COROUTINE_STACK_SIZE;
     CoroutineWin32 *co;
 
     co = g_malloc0(sizeof(*co));
diff --git a/util/cutils.c b/util/cutils.c
index 7505fdaa81..4fefcf3be3 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -161,250 +161,6 @@ int qemu_fdatasync(int fd)
 #endif
 }
 
-/* vector definitions */
-#ifdef __ALTIVEC__
-#include <altivec.h>
-/* The altivec.h header says we're allowed to undef these for
- * C++ compatibility.  Here we don't care about C++, but we
- * undef them anyway to avoid namespace pollution.
- */
-#undef vector
-#undef pixel
-#undef bool
-#define VECTYPE        __vector unsigned char
-#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
-#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
-#define VEC_OR(v1, v2) ((v1) | (v2))
-/* altivec.h may redefine the bool macro as vector type.
- * Reset it to POSIX semantics. */
-#define bool _Bool
-#elif defined __SSE2__
-#include <emmintrin.h>
-#define VECTYPE        __m128i
-#define SPLAT(p)       _mm_set1_epi8(*(p))
-#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
-#define VEC_OR(v1, v2) (_mm_or_si128(v1, v2))
-#elif defined(__aarch64__)
-#include "arm_neon.h"
-#define VECTYPE        uint64x2_t
-#define ALL_EQ(v1, v2) \
-        ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
-         (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))
-#define VEC_OR(v1, v2) ((v1) | (v2))
-#else
-#define VECTYPE        unsigned long
-#define SPLAT(p)       (*(p) * (~0UL / 255))
-#define ALL_EQ(v1, v2) ((v1) == (v2))
-#define VEC_OR(v1, v2) ((v1) | (v2))
-#endif
-
-#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
-
-static bool
-can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len)
-{
-    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
-                   * sizeof(VECTYPE)) == 0
-            && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
-}
-
-/*
- * Searches for an area with non-zero content in a buffer
- *
- * Attention! The len must be a multiple of
- * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
- * and addr must be a multiple of sizeof(VECTYPE) due to
- * restriction of optimizations in this function.
- *
- * can_use_buffer_find_nonzero_offset_inner() can be used to
- * check these requirements.
- *
- * The return value is the offset of the non-zero area rounded
- * down to a multiple of sizeof(VECTYPE) for the first
- * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
- * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
- * afterwards.
- *
- * If the buffer is all zero the return value is equal to len.
- */
-
-static size_t buffer_find_nonzero_offset_inner(const void *buf, size_t len)
-{
-    const VECTYPE *p = buf;
-    const VECTYPE zero = (VECTYPE){0};
-    size_t i;
-
-    assert(can_use_buffer_find_nonzero_offset_inner(buf, len));
-
-    if (!len) {
-        return 0;
-    }
-
-    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
-        if (!ALL_EQ(p[i], zero)) {
-            return i * sizeof(VECTYPE);
-        }
-    }
-
-    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
-         i < len / sizeof(VECTYPE);
-         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
-        VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
-        VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
-        VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
-        VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
-        VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
-        VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
-        if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
-            break;
-        }
-    }
-
-    return i * sizeof(VECTYPE);
-}
-
-#if defined CONFIG_AVX2_OPT
-#pragma GCC push_options
-#pragma GCC target("avx2")
-#include <cpuid.h>
-#include <immintrin.h>
-
-#define AVX2_VECTYPE        __m256i
-#define AVX2_SPLAT(p)       _mm256_set1_epi8(*(p))
-#define AVX2_ALL_EQ(v1, v2) \
-    (_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF)
-#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2))
-
-static bool
-can_use_buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
-{
-    return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
-                   * sizeof(AVX2_VECTYPE)) == 0
-            && ((uintptr_t) buf) % sizeof(AVX2_VECTYPE) == 0);
-}
-
-static size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
-{
-    const AVX2_VECTYPE *p = buf;
-    const AVX2_VECTYPE zero = (AVX2_VECTYPE){0};
-    size_t i;
-
-    assert(can_use_buffer_find_nonzero_offset_avx2(buf, len));
-
-    if (!len) {
-        return 0;
-    }
-
-    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
-        if (!AVX2_ALL_EQ(p[i], zero)) {
-            return i * sizeof(AVX2_VECTYPE);
-        }
-    }
-
-    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
-         i < len / sizeof(AVX2_VECTYPE);
-         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
-        AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]);
-        AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]);
-        AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]);
-        AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]);
-        AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1);
-        AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3);
-        if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) {
-            break;
-        }
-    }
-
-    return i * sizeof(AVX2_VECTYPE);
-}
-
-static bool avx2_support(void)
-{
-    int a, b, c, d;
-
-    if (__get_cpuid_max(0, NULL) < 7) {
-        return false;
-    }
-
-    __cpuid_count(7, 0, a, b, c, d);
-
-    return b & bit_AVX2;
-}
-
-bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len) \
-         __attribute__ ((ifunc("can_use_buffer_find_nonzero_offset_ifunc")));
-size_t buffer_find_nonzero_offset(const void *buf, size_t len) \
-         __attribute__ ((ifunc("buffer_find_nonzero_offset_ifunc")));
-
-static void *buffer_find_nonzero_offset_ifunc(void)
-{
-    typeof(buffer_find_nonzero_offset) *func = (avx2_support()) ?
-        buffer_find_nonzero_offset_avx2 : buffer_find_nonzero_offset_inner;
-
-    return func;
-}
-
-static void *can_use_buffer_find_nonzero_offset_ifunc(void)
-{
-    typeof(can_use_buffer_find_nonzero_offset) *func = (avx2_support()) ?
-        can_use_buffer_find_nonzero_offset_avx2 :
-        can_use_buffer_find_nonzero_offset_inner;
-
-    return func;
-}
-#pragma GCC pop_options
-#else
-bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
-{
-    return can_use_buffer_find_nonzero_offset_inner(buf, len);
-}
-
-size_t buffer_find_nonzero_offset(const void *buf, size_t len)
-{
-    return buffer_find_nonzero_offset_inner(buf, len);
-}
-#endif
-
-/*
- * Checks if a buffer is all zeroes
- *
- * Attention! The len must be a multiple of 4 * sizeof(long) due to
- * restriction of optimizations in this function.
- */
-bool buffer_is_zero(const void *buf, size_t len)
-{
-    /*
-     * Use long as the biggest available internal data type that fits into the
-     * CPU register and unroll the loop to smooth out the effect of memory
-     * latency.
-     */
-
-    size_t i;
-    long d0, d1, d2, d3;
-    const long * const data = buf;
-
-    /* use vector optimized zero check if possible */
-    if (can_use_buffer_find_nonzero_offset(buf, len)) {
-        return buffer_find_nonzero_offset(buf, len) == len;
-    }
-
-    assert(len % (4 * sizeof(long)) == 0);
-    len /= sizeof(long);
-
-    for (i = 0; i < len; i += 4) {
-        d0 = data[i + 0];
-        d1 = data[i + 1];
-        d2 = data[i + 2];
-        d3 = data[i + 3];
-
-        if (d0 || d1 || d2 || d3) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
 #ifndef _WIN32
 /* Sets a specific flag */
 int fcntl_setfl(int fd, int flag)
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 99fd2ba37b..9f691b76bd 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -78,6 +78,9 @@ struct HBitmap {
      */
     int granularity;
 
+    /* A meta dirty bitmap to track the dirtiness of bits in this HBitmap. */
+    HBitmap *meta;
+
     /* A number of progressively less coarse bitmaps (i.e. level 0 is the
      * coarsest).  Each bit in level N represents a word in level N+1 that
      * has a set bit, except the last level where each bit represents the
@@ -209,25 +212,27 @@ static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
 }
 
 /* Setting starts at the last layer and propagates up if an element
- * changes from zero to non-zero.
+ * changes.
  */
 static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
 {
     unsigned long mask;
-    bool changed;
+    unsigned long old;
 
     assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
     assert(start <= last);
 
     mask = 2UL << (last & (BITS_PER_LONG - 1));
     mask -= 1UL << (start & (BITS_PER_LONG - 1));
-    changed = (*elem == 0);
+    old = *elem;
     *elem |= mask;
-    return changed;
+    return old != *elem;
 }
 
-/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
-static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)...
+ * Returns true if at least one bit is changed. */
+static bool hb_set_between(HBitmap *hb, int level, uint64_t start,
+                           uint64_t last)
 {
     size_t pos = start >> BITS_PER_LEVEL;
     size_t lastpos = last >> BITS_PER_LEVEL;
@@ -256,23 +261,28 @@ static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last
     if (level > 0 && changed) {
         hb_set_between(hb, level - 1, pos, lastpos);
     }
+    return changed;
 }
 
 void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
 {
     /* Compute range in the last layer.  */
+    uint64_t first, n;
     uint64_t last = start + count - 1;
 
     trace_hbitmap_set(hb, start, count,
                       start >> hb->granularity, last >> hb->granularity);
 
-    start >>= hb->granularity;
+    first = start >> hb->granularity;
     last >>= hb->granularity;
-    count = last - start + 1;
     assert(last < hb->size);
+    n = last - first + 1;
 
-    hb->count += count - hb_count_between(hb, start, last);
-    hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+    hb->count += n - hb_count_between(hb, first, last);
+    if (hb_set_between(hb, HBITMAP_LEVELS - 1, first, last) &&
+        hb->meta) {
+        hbitmap_set(hb->meta, start, count);
+    }
 }
 
 /* Resetting works the other way round: propagate up if the new
@@ -293,8 +303,10 @@ static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t l
     return blanked;
 }
 
-/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
-static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)...
+ * Returns true if at least one bit is changed. */
+static bool hb_reset_between(HBitmap *hb, int level, uint64_t start,
+                             uint64_t last)
 {
     size_t pos = start >> BITS_PER_LEVEL;
     size_t lastpos = last >> BITS_PER_LEVEL;
@@ -337,22 +349,29 @@ static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t la
     if (level > 0 && changed) {
         hb_reset_between(hb, level - 1, pos, lastpos);
     }
+
+    return changed;
+
 }
 
 void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
 {
     /* Compute range in the last layer.  */
+    uint64_t first;
     uint64_t last = start + count - 1;
 
     trace_hbitmap_reset(hb, start, count,
                         start >> hb->granularity, last >> hb->granularity);
 
-    start >>= hb->granularity;
+    first = start >> hb->granularity;
     last >>= hb->granularity;
     assert(last < hb->size);
 
-    hb->count -= hb_count_between(hb, start, last);
-    hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+    hb->count -= hb_count_between(hb, first, last);
+    if (hb_reset_between(hb, HBITMAP_LEVELS - 1, first, last) &&
+        hb->meta) {
+        hbitmap_set(hb->meta, start, count);
+    }
 }
 
 void hbitmap_reset_all(HBitmap *hb)
@@ -378,9 +397,151 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item)
     return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
 }
 
+uint64_t hbitmap_serialization_granularity(const HBitmap *hb)
+{
+    /* Must hold true so that the shift below is defined
+     * (ld(64) == 6, i.e. 1 << 6 == 64) */
+    assert(hb->granularity < 64 - 6);
+
+    /* Require at least 64 bit granularity to be safe on both 64 bit and 32 bit
+     * hosts. */
+    return UINT64_C(64) << hb->granularity;
+}
+
+/* Start should be aligned to serialization granularity, chunk size should be
+ * aligned to serialization granularity too, except for last chunk.
+ */
+static void serialization_chunk(const HBitmap *hb,
+                                uint64_t start, uint64_t count,
+                                unsigned long **first_el, uint64_t *el_count)
+{
+    uint64_t last = start + count - 1;
+    uint64_t gran = hbitmap_serialization_granularity(hb);
+
+    assert((start & (gran - 1)) == 0);
+    assert((last >> hb->granularity) < hb->size);
+    if ((last >> hb->granularity) != hb->size - 1) {
+        assert((count & (gran - 1)) == 0);
+    }
+
+    start = (start >> hb->granularity) >> BITS_PER_LEVEL;
+    last = (last >> hb->granularity) >> BITS_PER_LEVEL;
+
+    *first_el = &hb->levels[HBITMAP_LEVELS - 1][start];
+    *el_count = last - start + 1;
+}
+
+uint64_t hbitmap_serialization_size(const HBitmap *hb,
+                                    uint64_t start, uint64_t count)
+{
+    uint64_t el_count;
+    unsigned long *cur;
+
+    if (!count) {
+        return 0;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+
+    return el_count * sizeof(unsigned long);
+}
+
+void hbitmap_serialize_part(const HBitmap *hb, uint8_t *buf,
+                            uint64_t start, uint64_t count)
+{
+    uint64_t el_count;
+    unsigned long *cur, *end;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+    end = cur + el_count;
+
+    while (cur != end) {
+        unsigned long el =
+            (BITS_PER_LONG == 32 ? cpu_to_le32(*cur) : cpu_to_le64(*cur));
+
+        memcpy(buf, &el, sizeof(el));
+        buf += sizeof(el);
+        cur++;
+    }
+}
+
+void hbitmap_deserialize_part(HBitmap *hb, uint8_t *buf,
+                              uint64_t start, uint64_t count,
+                              bool finish)
+{
+    uint64_t el_count;
+    unsigned long *cur, *end;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &cur, &el_count);
+    end = cur + el_count;
+
+    while (cur != end) {
+        memcpy(cur, buf, sizeof(*cur));
+
+        if (BITS_PER_LONG == 32) {
+            le32_to_cpus((uint32_t *)cur);
+        } else {
+            le64_to_cpus((uint64_t *)cur);
+        }
+
+        buf += sizeof(unsigned long);
+        cur++;
+    }
+    if (finish) {
+        hbitmap_deserialize_finish(hb);
+    }
+}
+
+void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t start, uint64_t count,
+                                bool finish)
+{
+    uint64_t el_count;
+    unsigned long *first;
+
+    if (!count) {
+        return;
+    }
+    serialization_chunk(hb, start, count, &first, &el_count);
+
+    memset(first, 0, el_count * sizeof(unsigned long));
+    if (finish) {
+        hbitmap_deserialize_finish(hb);
+    }
+}
+
+void hbitmap_deserialize_finish(HBitmap *bitmap)
+{
+    int64_t i, size, prev_size;
+    int lev;
+
+    /* restore levels starting from penultimate to zero level, assuming
+     * that the last level is ok */
+    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
+        prev_size = size;
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
+
+        for (i = 0; i < prev_size; ++i) {
+            if (bitmap->levels[lev + 1][i]) {
+                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
+                    1UL << (i & (BITS_PER_LONG - 1));
+            }
+        }
+    }
+
+    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+}
+
 void hbitmap_free(HBitmap *hb)
 {
     unsigned i;
+    assert(!hb->meta);
     for (i = HBITMAP_LEVELS; i-- > 0; ) {
         g_free(hb->levels[i]);
     }
@@ -437,7 +598,7 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
     if (shrink) {
         /* Don't clear partial granularity groups;
          * start at the first full one. */
-        uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity);
+        uint64_t start = ROUND_UP(num_elements, UINT64_C(1) << hb->granularity);
         uint64_t fix_count = (hb->size << hb->granularity) - start;
 
         assert(fix_count);
@@ -458,6 +619,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
                    (size - old) * sizeof(*hb->levels[i]));
         }
     }
+    if (hb->meta) {
+        hbitmap_truncate(hb->meta, hb->size << hb->granularity);
+    }
 }
 
 
@@ -493,3 +657,19 @@ bool hbitmap_merge(HBitmap *a, const HBitmap *b)
 
     return true;
 }
+
+HBitmap *hbitmap_create_meta(HBitmap *hb, int chunk_size)
+{
+    assert(!(chunk_size & (chunk_size - 1)));
+    assert(!hb->meta);
+    hb->meta = hbitmap_alloc(hb->size << hb->granularity,
+                             hb->granularity + ctz32(chunk_size));
+    return hb->meta;
+}
+
+void hbitmap_free_meta(HBitmap *hb)
+{
+    assert(hb->meta);
+    hbitmap_free(hb->meta);
+    hb->meta = NULL;
+}
diff --git a/util/log.c b/util/log.c
index f4866be97d..d1c201f910 100644
--- a/util/log.c
+++ b/util/log.c
@@ -285,53 +285,42 @@ const QEMULogItem qemu_log_items[] = {
     { 0, NULL, NULL },
 };
 
-static int cmp1(const char *s1, int n, const char *s2)
-{
-    if (strlen(s2) != n) {
-        return 0;
-    }
-    return memcmp(s1, s2, n) == 0;
-}
-
 /* takes a comma separated list of log masks. Return 0 if error. */
 int qemu_str_to_log_mask(const char *str)
 {
     const QEMULogItem *item;
-    int mask;
-    const char *p, *p1;
+    int mask = 0;
+    char **parts = g_strsplit(str, ",", 0);
+    char **tmp;
 
-    p = str;
-    mask = 0;
-    for (;;) {
-        p1 = strchr(p, ',');
-        if (!p1) {
-            p1 = p + strlen(p);
-        }
-        if (cmp1(p,p1-p,"all")) {
+    for (tmp = parts; tmp && *tmp; tmp++) {
+        if (g_str_equal(*tmp, "all")) {
             for (item = qemu_log_items; item->mask != 0; item++) {
                 mask |= item->mask;
             }
 #ifdef CONFIG_TRACE_LOG
-        } else if (strncmp(p, "trace:", 6) == 0 && p + 6 != p1) {
-            trace_enable_events(p + 6);
+        } else if (g_str_has_prefix(*tmp, "trace:") && (*tmp)[6] != '\0') {
+            trace_enable_events((*tmp) + 6);
             mask |= LOG_TRACE;
 #endif
         } else {
             for (item = qemu_log_items; item->mask != 0; item++) {
-                if (cmp1(p, p1 - p, item->name)) {
+                if (g_str_equal(*tmp, item->name)) {
                     goto found;
                 }
             }
-            return 0;
+            goto error;
         found:
             mask |= item->mask;
         }
-        if (*p1 != ',') {
-            break;
-        }
-        p = p1 + 1;
     }
+
+    g_strfreev(parts);
     return mask;
+
+ error:
+    g_strfreev(parts);
+    return 0;
 }
 
 void qemu_print_log_usage(FILE *f)
diff --git a/util/module.c b/util/module.c
index 86e3f7aba0..c90973721f 100644
--- a/util/module.c
+++ b/util/module.c
@@ -87,14 +87,11 @@ void register_dso_module_init(void (*fn)(void), module_init_type type)
     QTAILQ_INSERT_TAIL(&dso_init_list, e, node);
 }
 
-static void module_load(module_init_type type);
-
 void module_call_init(module_init_type type)
 {
     ModuleTypeList *l;
     ModuleEntry *e;
 
-    module_load(type);
     l = find_type(type);
 
     QTAILQ_FOREACH(e, l, node) {
@@ -145,6 +142,7 @@ static int module_load_file(const char *fname)
         ret = -EINVAL;
     } else {
         QTAILQ_FOREACH(e, &dso_init_list, node) {
+            e->init();
             register_module_init(e->init, e->type);
         }
         ret = 0;
@@ -159,32 +157,33 @@ static int module_load_file(const char *fname)
 }
 #endif
 
-static void module_load(module_init_type type)
+void module_load_one(const char *prefix, const char *lib_name)
 {
 #ifdef CONFIG_MODULES
     char *fname = NULL;
-    const char **mp;
-    static const char *block_modules[] = {
-        CONFIG_BLOCK_MODULES
-    };
     char *exec_dir;
     char *dirs[3];
+    char *module_name;
     int i = 0;
     int ret;
+    static GHashTable *loaded_modules;
 
     if (!g_module_supported()) {
         fprintf(stderr, "Module is not supported by system.\n");
         return;
     }
 
-    switch (type) {
-    case MODULE_INIT_BLOCK:
-        mp = block_modules;
-        break;
-    default:
-        /* no other types have dynamic modules for now*/
+    if (!loaded_modules) {
+        loaded_modules = g_hash_table_new(g_str_hash, g_str_equal);
+    }
+
+    module_name = g_strdup_printf("%s%s", prefix, lib_name);
+
+    if (g_hash_table_lookup(loaded_modules, module_name)) {
+        g_free(module_name);
         return;
     }
+    g_hash_table_insert(loaded_modules, module_name, module_name);
 
     exec_dir = qemu_get_exec_dir();
     dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
@@ -194,16 +193,15 @@ static void module_load(module_init_type type)
     g_free(exec_dir);
     exec_dir = NULL;
 
-    for ( ; *mp; mp++) {
-        for (i = 0; i < ARRAY_SIZE(dirs); i++) {
-            fname = g_strdup_printf("%s/%s%s", dirs[i], *mp, HOST_DSOSUF);
-            ret = module_load_file(fname);
-            g_free(fname);
-            fname = NULL;
-            /* Try loading until loaded a module file */
-            if (!ret) {
-                break;
-            }
+    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+        fname = g_strdup_printf("%s/%s%s",
+                dirs[i], module_name, HOST_DSOSUF);
+        ret = module_load_file(fname);
+        g_free(fname);
+        fname = NULL;
+        /* Try loading until loaded a module file */
+        if (!ret) {
+            break;
         }
     }
 
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index f2d4e9e592..f63146407f 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -28,7 +28,6 @@
 
 #include "qemu/osdep.h"
 #include <termios.h>
-#include <termios.h>
 
 #include <glib/gprintf.h>
 
@@ -46,10 +45,16 @@
 
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
+#include <sys/user.h>
+#include <libutil.h>
 #endif
 
 #include "qemu/mmap-alloc.h"
 
+#ifdef CONFIG_DEBUG_STACK_USAGE
+#include "qemu/error-report.h"
+#endif
+
 int qemu_get_thread_id(void)
 {
 #if defined(__linux__)
@@ -430,6 +435,32 @@ int qemu_read_password(char *buf, int buf_size)
 }
 
 
+char *qemu_get_pid_name(pid_t pid)
+{
+    char *name = NULL;
+
+#if defined(__FreeBSD__)
+    /* BSDs don't have /proc, but they provide a nice substitute */
+    struct kinfo_proc *proc = kinfo_getproc(pid);
+
+    if (proc) {
+        name = g_strdup(proc->ki_comm);
+        free(proc);
+    }
+#else
+    /* Assume a system with reasonable procfs */
+    char *pid_path;
+    size_t len;
+
+    pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
+    g_file_get_contents(pid_path, &name, &len, NULL);
+    g_free(pid_path);
+#endif
+
+    return name;
+}
+
+
 pid_t qemu_fork(Error **errp)
 {
     sigset_t oldmask, newmask;
@@ -499,3 +530,76 @@ pid_t qemu_fork(Error **errp)
     }
     return pid;
 }
+
+void *qemu_alloc_stack(size_t *sz)
+{
+    void *ptr, *guardpage;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    void *ptr2;
+#endif
+    size_t pagesz = getpagesize();
+#ifdef _SC_THREAD_STACK_MIN
+    /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
+    long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
+    *sz = MAX(MAX(min_stack_sz, 0), *sz);
+#endif
+    /* adjust stack size to a multiple of the page size */
+    *sz = ROUND_UP(*sz, pagesz);
+    /* allocate one extra page for the guard page */
+    *sz += pagesz;
+
+    ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
+               MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (ptr == MAP_FAILED) {
+        abort();
+    }
+
+#if defined(HOST_IA64)
+    /* separate register stack */
+    guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
+#elif defined(HOST_HPPA)
+    /* stack grows up */
+    guardpage = ptr + *sz - pagesz;
+#else
+    /* stack grows down */
+    guardpage = ptr;
+#endif
+    if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
+        abort();
+    }
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
+        *(uint32_t *)ptr2 = 0xdeadbeaf;
+    }
+#endif
+
+    return ptr;
+}
+
+#ifdef CONFIG_DEBUG_STACK_USAGE
+static __thread unsigned int max_stack_usage;
+#endif
+
+void qemu_free_stack(void *stack, size_t sz)
+{
+#ifdef CONFIG_DEBUG_STACK_USAGE
+    unsigned int usage;
+    void *ptr;
+
+    for (ptr = stack + getpagesize(); ptr < stack + sz;
+         ptr += sizeof(uint32_t)) {
+        if (*(uint32_t *)ptr != 0xdeadbeaf) {
+            break;
+        }
+    }
+    usage = sz - (uintptr_t) (ptr - stack);
+    if (usage > max_stack_usage) {
+        error_report("thread %d max stack usage increased from %u to %u",
+                     qemu_get_thread_id(), max_stack_usage, usage);
+        max_stack_usage = usage;
+    }
+#endif
+
+    munmap(stack, sz);
+}
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 56bff72da8..6f22287754 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -584,6 +584,13 @@ int qemu_read_password(char *buf, int buf_size)
 }
 
 
+char *qemu_get_pid_name(pid_t pid)
+{
+    /* XXX Implement me */
+    abort();
+}
+
+
 pid_t qemu_fork(Error **errp)
 {
     errno = ENOSYS;
diff --git a/util/qemu-config.c b/util/qemu-config.c
index fb973074d3..5527100a01 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -6,7 +6,7 @@
 #include "qmp-commands.h"
 
 static QemuOptsList *vm_config_groups[48];
-static QemuOptsList *drive_config_groups[4];
+static QemuOptsList *drive_config_groups[5];
 
 static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
                                Error **errp)
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
index 22aa9abb30..14cf9ce458 100644
--- a/util/qemu-coroutine-lock.c
+++ b/util/qemu-coroutine-lock.c
@@ -129,6 +129,8 @@ void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
     }
 
     mutex->locked = true;
+    mutex->holder = self;
+    self->locks_held++;
 
     trace_qemu_co_mutex_lock_return(mutex, self);
 }
@@ -140,9 +142,12 @@ void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
     trace_qemu_co_mutex_unlock_entry(mutex, self);
 
     assert(mutex->locked == true);
+    assert(mutex->holder == self);
     assert(qemu_in_coroutine());
 
     mutex->locked = false;
+    mutex->holder = NULL;
+    self->locks_held--;
     qemu_co_queue_next(&mutex->queue);
 
     trace_qemu_co_mutex_unlock_return(mutex, self);
@@ -156,14 +161,19 @@ void qemu_co_rwlock_init(CoRwlock *lock)
 
 void qemu_co_rwlock_rdlock(CoRwlock *lock)
 {
+    Coroutine *self = qemu_coroutine_self();
+
     while (lock->writer) {
         qemu_co_queue_wait(&lock->queue);
     }
     lock->reader++;
+    self->locks_held++;
 }
 
 void qemu_co_rwlock_unlock(CoRwlock *lock)
 {
+    Coroutine *self = qemu_coroutine_self();
+
     assert(qemu_in_coroutine());
     if (lock->writer) {
         lock->writer = false;
@@ -176,12 +186,16 @@ void qemu_co_rwlock_unlock(CoRwlock *lock)
             qemu_co_queue_next(&lock->queue);
         }
     }
+    self->locks_held--;
 }
 
 void qemu_co_rwlock_wrlock(CoRwlock *lock)
 {
+    Coroutine *self = qemu_coroutine_self();
+
     while (lock->writer || lock->reader) {
         qemu_co_queue_wait(&lock->queue);
     }
     lock->writer = true;
+    self->locks_held++;
 }
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index 89f21a9cec..737bffa984 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -122,6 +122,7 @@ void qemu_coroutine_enter(Coroutine *co)
     case COROUTINE_YIELD:
         return;
     case COROUTINE_TERMINATE:
+        assert(!co->locks_held);
         trace_qemu_coroutine_terminate(co);
         coroutine_delete(co);
         return;
@@ -145,3 +146,8 @@ void coroutine_fn qemu_coroutine_yield(void)
     self->caller = NULL;
     qemu_coroutine_switch(self, to, COROUTINE_YIELD);
 }
+
+bool qemu_coroutine_entered(Coroutine *co)
+{
+    return co->caller;
+}
diff --git a/util/qemu-error.c b/util/qemu-error.c
index 1ef35664af..b331f8f4a4 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -14,24 +14,6 @@
 #include "monitor/monitor.h"
 #include "qemu/error-report.h"
 
-/*
- * Print to current monitor if we have one, else to stderr.
- * TODO should return int, so callers can calculate width, but that
- * requires surgery to monitor_vprintf().  Left for another day.
- */
-void error_vprintf(const char *fmt, va_list ap)
-{
-    if (cur_mon && !monitor_cur_is_qmp()) {
-        monitor_vprintf(cur_mon, fmt, ap);
-    } else {
-        vfprintf(stderr, fmt, ap);
-    }
-}
-
-/*
- * Print to current monitor if we have one, else to stderr.
- * TODO just like error_vprintf()
- */
 void error_printf(const char *fmt, ...)
 {
     va_list ap;
@@ -45,11 +27,9 @@ void error_printf_unless_qmp(const char *fmt, ...)
 {
     va_list ap;
 
-    if (!monitor_cur_is_qmp()) {
-        va_start(ap, fmt);
-        error_vprintf(fmt, ap);
-        va_end(ap);
-    }
+    va_start(ap, fmt);
+    error_vprintf_unless_qmp(fmt, ap);
+    va_end(ap);
 }
 
 static Location std_loc = {
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 114df226f6..e37feb02b2 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -20,12 +20,16 @@
 
 #include "qemu/osdep.h"
 
+#ifdef CONFIG_AF_VSOCK
+#include <linux/vm_sockets.h>
+#endif /* CONFIG_AF_VSOCK */
+
 #include "monitor/monitor.h"
 #include "qapi/error.h"
 #include "qemu/sockets.h"
 #include "qemu/main-loop.h"
-#include "qapi/qmp-input-visitor.h"
-#include "qapi/qmp-output-visitor.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qapi-visit.h"
 #include "qemu/cutils.h"
 
@@ -78,6 +82,9 @@ NetworkAddressFamily inet_netfamily(int family)
     case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6;
     case PF_INET:  return NETWORK_ADDRESS_FAMILY_IPV4;
     case PF_UNIX:  return NETWORK_ADDRESS_FAMILY_UNIX;
+#ifdef CONFIG_AF_VSOCK
+    case PF_VSOCK: return NETWORK_ADDRESS_FAMILY_VSOCK;
+#endif /* CONFIG_AF_VSOCK */
     }
     return NETWORK_ADDRESS_FAMILY_UNKNOWN;
 }
@@ -415,8 +422,8 @@ static struct addrinfo *inet_parse_connect_saddr(InetSocketAddress *saddr,
  * function succeeds, callback will be called when the connection
  * completes, with the file descriptor on success, or -1 on error.
  */
-static int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
-                              NonBlockingConnectHandler *callback, void *opaque)
+int inet_connect_saddr(InetSocketAddress *saddr, Error **errp,
+                       NonBlockingConnectHandler *callback, void *opaque)
 {
     Error *local_err = NULL;
     struct addrinfo *res, *e;
@@ -494,10 +501,10 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr,
         goto err;
     }
 
-    if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) {
+    if ((rc = getaddrinfo(addr, port, &ai, &peer)) != 0) {
         error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
                    gai_strerror(rc));
-	goto err;
+        goto err;
     }
 
     /* lookup local addr */
@@ -520,7 +527,7 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr,
         port = "0";
     }
 
-    if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) {
+    if ((rc = getaddrinfo(addr, port, &ai, &local)) != 0) {
         error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
                    gai_strerror(rc));
         goto err;
@@ -551,12 +558,16 @@ static int inet_dgram_saddr(InetSocketAddress *sraddr,
     return sock;
 
 err:
-    if (-1 != sock)
+    if (sock != -1) {
         closesocket(sock);
-    if (local)
+    }
+    if (local) {
         freeaddrinfo(local);
-    if (peer)
+    }
+    if (peer) {
         freeaddrinfo(peer);
+    }
+
     return -1;
 }
 
@@ -576,20 +587,20 @@ InetSocketAddress *inet_parse(const char *str, Error **errp)
     if (str[0] == ':') {
         /* no host given */
         host[0] = '\0';
-        if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) {
+        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
             error_setg(errp, "error parsing port in address '%s'", str);
             goto fail;
         }
     } else if (str[0] == '[') {
         /* IPv6 addr */
-        if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) {
+        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
             error_setg(errp, "error parsing IPv6 address '%s'", str);
             goto fail;
         }
         addr->ipv6 = addr->has_ipv6 = true;
     } else {
         /* hostname or IPv4 addr */
-        if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) {
+        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
             error_setg(errp, "error parsing address '%s'", str);
             goto fail;
         }
@@ -649,6 +660,181 @@ int inet_connect(const char *str, Error **errp)
     return sock;
 }
 
+#ifdef CONFIG_AF_VSOCK
+static bool vsock_parse_vaddr_to_sockaddr(const VsockSocketAddress *vaddr,
+                                          struct sockaddr_vm *svm,
+                                          Error **errp)
+{
+    unsigned long long val;
+
+    memset(svm, 0, sizeof(*svm));
+    svm->svm_family = AF_VSOCK;
+
+    if (parse_uint_full(vaddr->cid, &val, 10) < 0 ||
+        val > UINT32_MAX) {
+        error_setg(errp, "Failed to parse cid '%s'", vaddr->cid);
+        return false;
+    }
+    svm->svm_cid = val;
+
+    if (parse_uint_full(vaddr->port, &val, 10) < 0 ||
+        val > UINT32_MAX) {
+        error_setg(errp, "Failed to parse port '%s'", vaddr->port);
+        return false;
+    }
+    svm->svm_port = val;
+
+    return true;
+}
+
+static int vsock_connect_addr(const struct sockaddr_vm *svm, bool *in_progress,
+                              ConnectState *connect_state, Error **errp)
+{
+    int sock, rc;
+
+    *in_progress = false;
+
+    sock = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+    if (connect_state != NULL) {
+        qemu_set_nonblock(sock);
+    }
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, (const struct sockaddr *)svm, sizeof(*svm)) < 0) {
+            rc = -errno;
+        }
+    } while (rc == -EINTR);
+
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+        *in_progress = true;
+    } else if (rc < 0) {
+        error_setg_errno(errp, errno, "Failed to connect socket");
+        closesocket(sock);
+        return -1;
+    }
+    return sock;
+}
+
+static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp,
+                               NonBlockingConnectHandler *callback,
+                               void *opaque)
+{
+    struct sockaddr_vm svm;
+    int sock = -1;
+    bool in_progress;
+    ConnectState *connect_state = NULL;
+
+    if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) {
+        return -1;
+    }
+
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
+    }
+
+    sock = vsock_connect_addr(&svm, &in_progress, connect_state, errp);
+    if (sock < 0) {
+        /* do nothing */
+    } else if (in_progress) {
+        /* wait_for_connect() will do the rest */
+        return sock;
+    } else {
+        if (callback) {
+            callback(sock, NULL, opaque);
+        }
+    }
+    g_free(connect_state);
+    return sock;
+}
+
+static int vsock_listen_saddr(VsockSocketAddress *vaddr,
+                              Error **errp)
+{
+    struct sockaddr_vm svm;
+    int slisten;
+
+    if (!vsock_parse_vaddr_to_sockaddr(vaddr, &svm, errp)) {
+        return -1;
+    }
+
+    slisten = qemu_socket(AF_VSOCK, SOCK_STREAM, 0);
+    if (slisten < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+
+    if (bind(slisten, (const struct sockaddr *)&svm, sizeof(svm)) != 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        closesocket(slisten);
+        return -1;
+    }
+
+    if (listen(slisten, 1) != 0) {
+        error_setg_errno(errp, errno, "Failed to listen on socket");
+        closesocket(slisten);
+        return -1;
+    }
+    return slisten;
+}
+
+static VsockSocketAddress *vsock_parse(const char *str, Error **errp)
+{
+    VsockSocketAddress *addr = NULL;
+    char cid[33];
+    char port[33];
+    int n;
+
+    if (sscanf(str, "%32[^:]:%32[^,]%n", cid, port, &n) != 2) {
+        error_setg(errp, "error parsing address '%s'", str);
+        return NULL;
+    }
+    if (str[n] != '\0') {
+        error_setg(errp, "trailing characters in address '%s'", str);
+        return NULL;
+    }
+
+    addr = g_new0(VsockSocketAddress, 1);
+    addr->cid = g_strdup(cid);
+    addr->port = g_strdup(port);
+    return addr;
+}
+#else
+static void vsock_unsupported(Error **errp)
+{
+    error_setg(errp, "socket family AF_VSOCK unsupported");
+}
+
+static int vsock_connect_saddr(VsockSocketAddress *vaddr, Error **errp,
+                               NonBlockingConnectHandler *callback,
+                               void *opaque)
+{
+    vsock_unsupported(errp);
+    return -1;
+}
+
+static int vsock_listen_saddr(VsockSocketAddress *vaddr,
+                              Error **errp)
+{
+    vsock_unsupported(errp);
+    return -1;
+}
+
+static VsockSocketAddress *vsock_parse(const char *str, Error **errp)
+{
+    vsock_unsupported(errp);
+    return NULL;
+}
+#endif /* CONFIG_AF_VSOCK */
+
 #ifndef _WIN32
 
 static int unix_listen_saddr(UnixSocketAddress *saddr,
@@ -819,8 +1005,10 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp)
 
     sock = unix_listen_saddr(saddr, true, errp);
 
-    if (sock != -1 && ostr)
+    if (sock != -1 && ostr) {
         snprintf(ostr, olen, "%s%s", saddr->path, optstr ? optstr : "");
+    }
+
     qapi_free_UnixSocketAddress(saddr);
     return sock;
 }
@@ -861,6 +1049,12 @@ SocketAddress *socket_parse(const char *str, Error **errp)
             addr->u.fd.data = g_new(String, 1);
             addr->u.fd.data->str = g_strdup(str + 3);
         }
+    } else if (strstart(str, "vsock:", NULL)) {
+        addr->type = SOCKET_ADDRESS_KIND_VSOCK;
+        addr->u.vsock.data = vsock_parse(str + strlen("vsock:"), errp);
+        if (addr->u.vsock.data == NULL) {
+            goto fail;
+        }
     } else {
         addr->type = SOCKET_ADDRESS_KIND_INET;
         addr->u.inet.data = inet_parse(str, errp);
@@ -897,6 +1091,10 @@ int socket_connect(SocketAddress *addr, Error **errp,
         }
         break;
 
+    case SOCKET_ADDRESS_KIND_VSOCK:
+        fd = vsock_connect_saddr(addr->u.vsock.data, errp, callback, opaque);
+        break;
+
     default:
         abort();
     }
@@ -920,6 +1118,10 @@ int socket_listen(SocketAddress *addr, Error **errp)
         fd = monitor_get_fd(cur_mon, addr->u.fd.data->str, errp);
         break;
 
+    case SOCKET_ADDRESS_KIND_VSOCK:
+        fd = vsock_listen_saddr(addr->u.vsock.data, errp);
+        break;
+
     default:
         abort();
     }
@@ -1019,6 +1221,26 @@ socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
 }
 #endif /* WIN32 */
 
+#ifdef CONFIG_AF_VSOCK
+static SocketAddress *
+socket_sockaddr_to_address_vsock(struct sockaddr_storage *sa,
+                                 socklen_t salen,
+                                 Error **errp)
+{
+    SocketAddress *addr;
+    VsockSocketAddress *vaddr;
+    struct sockaddr_vm *svm = (struct sockaddr_vm *)sa;
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_VSOCK;
+    addr->u.vsock.data = vaddr = g_new0(VsockSocketAddress, 1);
+    vaddr->cid = g_strdup_printf("%u", svm->svm_cid);
+    vaddr->port = g_strdup_printf("%u", svm->svm_port);
+
+    return addr;
+}
+#endif /* CONFIG_AF_VSOCK */
+
 SocketAddress *
 socket_sockaddr_to_address(struct sockaddr_storage *sa,
                            socklen_t salen,
@@ -1034,6 +1256,11 @@ socket_sockaddr_to_address(struct sockaddr_storage *sa,
         return socket_sockaddr_to_address_unix(sa, salen, errp);
 #endif /* WIN32 */
 
+#ifdef CONFIG_AF_VSOCK
+    case AF_VSOCK:
+        return socket_sockaddr_to_address_vsock(sa, salen, errp);
+#endif
+
     default:
         error_setg(errp, "socket family %d unsupported",
                    sa->ss_family);
@@ -1100,6 +1327,12 @@ char *socket_address_to_string(struct SocketAddress *addr, Error **errp)
         buf = g_strdup(addr->u.fd.data->str);
         break;
 
+    case SOCKET_ADDRESS_KIND_VSOCK:
+        buf = g_strdup_printf("%s:%s",
+                              addr->u.vsock.data->cid,
+                              addr->u.vsock.data->port);
+        break;
+
     default:
         error_setg(errp, "socket family %d unsupported",
                    addr->type);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 27adfd9776..5724b2723d 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -88,6 +88,20 @@ void qemu_mutex_unlock(QemuMutex *mutex)
         error_exit(err, __func__);
 }
 
+void qemu_rec_mutex_init(QemuRecMutex *mutex)
+{
+    int err;
+    pthread_mutexattr_t attr;
+
+    pthread_mutexattr_init(&attr);
+    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+    err = pthread_mutex_init(&mutex->lock, &attr);
+    pthread_mutexattr_destroy(&attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+}
+
 void qemu_cond_init(QemuCond *cond)
 {
     int err;
@@ -368,7 +382,11 @@ void qemu_event_destroy(QemuEvent *ev)
 
 void qemu_event_set(QemuEvent *ev)
 {
-    if (atomic_mb_read(&ev->value) != EV_SET) {
+    /* qemu_event_set has release semantics, but because it *loads*
+     * ev->value we need a full memory barrier here.
+     */
+    smp_mb();
+    if (atomic_read(&ev->value) != EV_SET) {
         if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
             /* There were waiters, wake them up.  */
             futex_wake(ev, INT_MAX);
@@ -378,7 +396,11 @@ void qemu_event_set(QemuEvent *ev)
 
 void qemu_event_reset(QemuEvent *ev)
 {
-    if (atomic_mb_read(&ev->value) == EV_SET) {
+    unsigned value;
+
+    value = atomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value == EV_SET) {
         /*
          * If there was a concurrent reset (or even reset+wait),
          * do nothing.  Otherwise change EV_SET->EV_FREE.
@@ -391,7 +413,8 @@ void qemu_event_wait(QemuEvent *ev)
 {
     unsigned value;
 
-    value = atomic_mb_read(&ev->value);
+    value = atomic_read(&ev->value);
+    smp_mb_acquire();
     if (value != EV_SET) {
         if (value == EV_FREE) {
             /*
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 98a5ddff82..728e76b5b2 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -79,6 +79,31 @@ void qemu_mutex_unlock(QemuMutex *mutex)
     LeaveCriticalSection(&mutex->lock);
 }
 
+void qemu_rec_mutex_init(QemuRecMutex *mutex)
+{
+    InitializeCriticalSection(&mutex->lock);
+}
+
+void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
+{
+    DeleteCriticalSection(&mutex->lock);
+}
+
+void qemu_rec_mutex_lock(QemuRecMutex *mutex)
+{
+    EnterCriticalSection(&mutex->lock);
+}
+
+int qemu_rec_mutex_trylock(QemuRecMutex *mutex)
+{
+    return !TryEnterCriticalSection(&mutex->lock);
+}
+
+void qemu_rec_mutex_unlock(QemuRecMutex *mutex)
+{
+    LeaveCriticalSection(&mutex->lock);
+}
+
 void qemu_cond_init(QemuCond *cond)
 {
     memset(cond, 0, sizeof(*cond));
@@ -274,7 +299,11 @@ void qemu_event_destroy(QemuEvent *ev)
 
 void qemu_event_set(QemuEvent *ev)
 {
-    if (atomic_mb_read(&ev->value) != EV_SET) {
+    /* qemu_event_set has release semantics, but because it *loads*
+     * ev->value we need a full memory barrier here.
+     */
+    smp_mb();
+    if (atomic_read(&ev->value) != EV_SET) {
         if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
             /* There were waiters, wake them up.  */
             SetEvent(ev->event);
@@ -284,7 +313,11 @@ void qemu_event_set(QemuEvent *ev)
 
 void qemu_event_reset(QemuEvent *ev)
 {
-    if (atomic_mb_read(&ev->value) == EV_SET) {
+    unsigned value;
+
+    value = atomic_read(&ev->value);
+    smp_mb_acquire();
+    if (value == EV_SET) {
         /* If there was a concurrent reset (or even reset+wait),
          * do nothing.  Otherwise change EV_SET->EV_FREE.
          */
@@ -296,7 +329,8 @@ void qemu_event_wait(QemuEvent *ev)
 {
     unsigned value;
 
-    value = atomic_mb_read(&ev->value);
+    value = atomic_read(&ev->value);
+    smp_mb_acquire();
     if (value != EV_SET) {
         if (value == EV_FREE) {
             /* qemu_event_set is not yet going to call SetEvent, but we are
diff --git a/util/qht.c b/util/qht.c
index 16a8d7950e..ff4d2e6974 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -133,7 +133,8 @@ struct qht_map {
 /* trigger a resize when n_added_buckets > n_buckets / div */
 #define QHT_NR_ADDED_BUCKETS_THRESHOLD_DIV 8
 
-static void qht_do_resize(struct qht *ht, struct qht_map *new);
+static void qht_do_resize_reset(struct qht *ht, struct qht_map *new,
+                                bool reset);
 static void qht_grow_maybe(struct qht *ht);
 
 #ifdef QHT_DEBUG
@@ -379,7 +380,7 @@ static void qht_bucket_reset__locked(struct qht_bucket *head)
             if (b->pointers[i] == NULL) {
                 goto done;
             }
-            b->hashes[i] = 0;
+            atomic_set(&b->hashes[i], 0);
             atomic_set(&b->pointers[i], NULL);
         }
         b = b->next;
@@ -408,12 +409,21 @@ void qht_reset(struct qht *ht)
     qht_map_unlock_buckets(map);
 }
 
+static inline void qht_do_resize(struct qht *ht, struct qht_map *new)
+{
+    qht_do_resize_reset(ht, new, false);
+}
+
+static inline void qht_do_resize_and_reset(struct qht *ht, struct qht_map *new)
+{
+    qht_do_resize_reset(ht, new, true);
+}
+
 bool qht_reset_size(struct qht *ht, size_t n_elems)
 {
-    struct qht_map *new;
+    struct qht_map *new = NULL;
     struct qht_map *map;
     size_t n_buckets;
-    bool resize = false;
 
     n_buckets = qht_elems_to_buckets(n_elems);
 
@@ -421,18 +431,11 @@ bool qht_reset_size(struct qht *ht, size_t n_elems)
     map = ht->map;
     if (n_buckets != map->n_buckets) {
         new = qht_map_create(n_buckets);
-        resize = true;
-    }
-
-    qht_map_lock_buckets(map);
-    qht_map_reset__all_locked(map);
-    if (resize) {
-        qht_do_resize(ht, new);
     }
-    qht_map_unlock_buckets(map);
+    qht_do_resize_and_reset(ht, new);
     qemu_mutex_unlock(&ht->lock);
 
-    return resize;
+    return !!new;
 }
 
 static inline
@@ -444,7 +447,7 @@ void *qht_do_lookup(struct qht_bucket *head, qht_lookup_func_t func,
 
     do {
         for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
-            if (b->hashes[i] == hash) {
+            if (atomic_read(&b->hashes[i]) == hash) {
                 /* The pointer is dereferenced before seqlock_read_retry,
                  * so (unlike qht_insert__locked) we need to use
                  * atomic_rcu_read here.
@@ -538,8 +541,8 @@ static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
     if (new) {
         atomic_rcu_set(&prev->next, b);
     }
-    b->hashes[i] = hash;
     /* smp_wmb() implicit in seqlock_write_begin.  */
+    atomic_set(&b->hashes[i], hash);
     atomic_set(&b->pointers[i], p);
     seqlock_write_end(&head->sequence);
     return true;
@@ -561,9 +564,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
     if (qht_map_needs_resize(map)) {
         struct qht_map *new = qht_map_create(map->n_buckets * 2);
 
-        qht_map_lock_buckets(map);
         qht_do_resize(ht, new);
-        qht_map_unlock_buckets(map);
     }
     qemu_mutex_unlock(&ht->lock);
 }
@@ -607,10 +608,10 @@ qht_entry_move(struct qht_bucket *to, int i, struct qht_bucket *from, int j)
     qht_debug_assert(to->pointers[i]);
     qht_debug_assert(from->pointers[j]);
 
-    to->hashes[i] = from->hashes[j];
+    atomic_set(&to->hashes[i], from->hashes[j]);
     atomic_set(&to->pointers[i], from->pointers[j]);
 
-    from->hashes[j] = 0;
+    atomic_set(&from->hashes[j], 0);
     atomic_set(&from->pointers[j], NULL);
 }
 
@@ -739,24 +740,31 @@ static void qht_map_copy(struct qht *ht, void *p, uint32_t hash, void *userp)
 }
 
 /*
- * Call with ht->lock and all bucket locks held.
- *
- * Creating the @new map here would add unnecessary delay while all the locks
- * are held--holding up the bucket locks is particularly bad, since no writes
- * can occur while these are held. Thus, we let callers create the new map,
- * hopefully without the bucket locks held.
+ * Atomically perform a resize and/or reset.
+ * Call with ht->lock held.
  */
-static void qht_do_resize(struct qht *ht, struct qht_map *new)
+static void qht_do_resize_reset(struct qht *ht, struct qht_map *new, bool reset)
 {
     struct qht_map *old;
 
     old = ht->map;
-    g_assert_cmpuint(new->n_buckets, !=, old->n_buckets);
+    qht_map_lock_buckets(old);
 
+    if (reset) {
+        qht_map_reset__all_locked(old);
+    }
+
+    if (new == NULL) {
+        qht_map_unlock_buckets(old);
+        return;
+    }
+
+    g_assert_cmpuint(new->n_buckets, !=, old->n_buckets);
     qht_map_iter__all_locked(ht, old, qht_map_copy, new);
     qht_map_debug__all_locked(new);
 
     atomic_rcu_set(&ht->map, new);
+    qht_map_unlock_buckets(old);
     call_rcu(old, qht_map_destroy, rcu);
 }
 
@@ -768,12 +776,9 @@ bool qht_resize(struct qht *ht, size_t n_elems)
     qemu_mutex_lock(&ht->lock);
     if (n_buckets != ht->map->n_buckets) {
         struct qht_map *new;
-        struct qht_map *old = ht->map;
 
         new = qht_map_create(n_buckets);
-        qht_map_lock_buckets(old);
         qht_do_resize(ht, new);
-        qht_map_unlock_buckets(old);
         ret = true;
     }
     qemu_mutex_unlock(&ht->lock);
diff --git a/util/rcu.c b/util/rcu.c
index bceb3e4720..9adc5e4a36 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -82,14 +82,16 @@ static void wait_for_readers(void)
         /* Instead of using atomic_mb_set for index->waiting, and
          * atomic_mb_read for index->ctr, memory barriers are placed
          * manually since writes to different threads are independent.
-         * atomic_mb_set has a smp_wmb before...
+         * qemu_event_reset has acquire semantics, so no memory barrier
+         * is needed here.
          */
-        smp_wmb();
         QLIST_FOREACH(index, &registry, node) {
             atomic_set(&index->waiting, true);
         }
 
-        /* ... and a smp_mb after.  */
+        /* Here, order the stores to index->waiting before the
+         * loads of index->ctr.
+         */
         smp_mb();
 
         QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
@@ -104,9 +106,6 @@ static void wait_for_readers(void)
             }
         }
 
-        /* atomic_mb_read has smp_rmb after.  */
-        smp_rmb();
-
         if (QLIST_EMPTY(&registry)) {
             break;
         }
diff --git a/util/rfifolock.c b/util/rfifolock.c
deleted file mode 100644
index 084c2f0ea1..0000000000
--- a/util/rfifolock.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Recursive FIFO lock
- *
- * Copyright Red Hat, Inc. 2013
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/rfifolock.h"
-
-void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque)
-{
-    qemu_mutex_init(&r->lock);
-    r->head = 0;
-    r->tail = 0;
-    qemu_cond_init(&r->cond);
-    r->nesting = 0;
-    r->cb = cb;
-    r->cb_opaque = opaque;
-}
-
-void rfifolock_destroy(RFifoLock *r)
-{
-    qemu_cond_destroy(&r->cond);
-    qemu_mutex_destroy(&r->lock);
-}
-
-/*
- * Theory of operation:
- *
- * In order to ensure FIFO ordering, implement a ticketlock.  Threads acquiring
- * the lock enqueue themselves by incrementing the tail index.  When the lock
- * is unlocked, the head is incremented and waiting threads are notified.
- *
- * Recursive locking does not take a ticket since the head is only incremented
- * when the outermost recursive caller unlocks.
- */
-void rfifolock_lock(RFifoLock *r)
-{
-    qemu_mutex_lock(&r->lock);
-
-    /* Take a ticket */
-    unsigned int ticket = r->tail++;
-
-    if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) {
-        r->tail--; /* put ticket back, we're nesting */
-    } else {
-        while (ticket != r->head) {
-            /* Invoke optional contention callback */
-            if (r->cb) {
-                r->cb(r->cb_opaque);
-            }
-            qemu_cond_wait(&r->cond, &r->lock);
-        }
-        qemu_thread_get_self(&r->owner_thread);
-    }
-
-    r->nesting++;
-    qemu_mutex_unlock(&r->lock);
-}
-
-void rfifolock_unlock(RFifoLock *r)
-{
-    qemu_mutex_lock(&r->lock);
-    assert(r->nesting > 0);
-    assert(qemu_thread_is_self(&r->owner_thread));
-    if (--r->nesting == 0) {
-        r->head++;
-        qemu_cond_broadcast(&r->cond);
-    }
-    qemu_mutex_unlock(&r->lock);
-}
diff --git a/util/trace-events b/util/trace-events
index 747e6baf75..ed06aee2ec 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -1,5 +1,24 @@
 # See docs/tracing.txt for syntax documentation.
 
+# util/buffer.c
+buffer_resize(const char *buf, size_t olen, size_t len) "%s: old %zd, new %zd"
+buffer_move_empty(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
+buffer_move(const char *buf, size_t len, const char *from) "%s: %zd bytes from %s"
+buffer_free(const char *buf, size_t len) "%s: capacity %zd"
+
+# util/qemu-coroutine.c
+qemu_coroutine_enter(void *from, void *to, void *opaque) "from %p to %p opaque %p"
+qemu_coroutine_yield(void *from, void *to) "from %p to %p"
+qemu_coroutine_terminate(void *co) "self %p"
+
+# util/qemu-coroutine-lock.c
+qemu_co_queue_run_restart(void *co) "co %p"
+qemu_co_queue_next(void *nxt) "next %p"
+qemu_co_mutex_lock_entry(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_lock_return(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_unlock_entry(void *mutex, void *self) "mutex %p self %p"
+qemu_co_mutex_unlock_return(void *mutex, void *self) "mutex %p self %p"
+
 # util/oslib-win32.c
 # util/oslib-posix.c
 qemu_memalign(size_t alignment, size_t size, void *ptr) "alignment %zu size %zu ptr %p"
diff --git a/util/uuid.c b/util/uuid.c
new file mode 100644
index 0000000000..dd6b5fdf05
--- /dev/null
+++ b/util/uuid.c
@@ -0,0 +1,114 @@
+/*
+ *  QEMU UUID functions
+ *
+ *  Copyright 2016 Red Hat, Inc.
+ *
+ *  Authors:
+ *   Fam Zheng <famz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/uuid.h"
+#include "qemu/bswap.h"
+
+void qemu_uuid_generate(QemuUUID *uuid)
+{
+    int i;
+    uint32_t tmp[4];
+
+    QEMU_BUILD_BUG_ON(sizeof(QemuUUID) != 16);
+
+    for (i = 0; i < 4; ++i) {
+        tmp[i] = g_random_int();
+    }
+    memcpy(uuid, tmp, sizeof(tmp));
+    /* Set the two most significant bits (bits 6 and 7) of the
+      clock_seq_hi_and_reserved to zero and one, respectively. */
+    uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80;
+    /* Set the four most significant bits (bits 12 through 15) of the
+      time_hi_and_version field to the 4-bit version number.
+      */
+    uuid->data[6] = (uuid->data[6] & 0xf) | 0x40;
+}
+
+int qemu_uuid_is_null(const QemuUUID *uu)
+{
+    static QemuUUID null_uuid;
+    return memcmp(uu, &null_uuid, sizeof(QemuUUID)) == 0;
+}
+
+void qemu_uuid_unparse(const QemuUUID *uuid, char *out)
+{
+    const unsigned char *uu = &uuid->data[0];
+    snprintf(out, UUID_FMT_LEN + 1, UUID_FMT,
+             uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
+             uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
+}
+
+char *qemu_uuid_unparse_strdup(const QemuUUID *uuid)
+{
+    const unsigned char *uu = &uuid->data[0];
+    return g_strdup_printf(UUID_FMT,
+                           uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6],
+                           uu[7], uu[8], uu[9], uu[10], uu[11], uu[12],
+                           uu[13], uu[14], uu[15]);
+}
+
+static bool qemu_uuid_is_valid(const char *str)
+{
+    int i;
+
+    for (i = 0; i < strlen(str); i++) {
+        const char c = str[i];
+        if (i == 8 || i == 13 || i == 18 || i == 23) {
+            if (str[i] != '-') {
+                return false;
+            }
+        } else {
+            if ((c >= '0' && c <= '9') ||
+                (c >= 'A' && c <= 'F') ||
+                (c >= 'a' && c <= 'f')) {
+                continue;
+            }
+            return false;
+        }
+    }
+    return i == 36;
+}
+
+int qemu_uuid_parse(const char *str, QemuUUID *uuid)
+{
+    unsigned char *uu = &uuid->data[0];
+    int ret;
+
+    if (!qemu_uuid_is_valid(str)) {
+        return -1;
+    }
+
+    ret = sscanf(str, UUID_FMT, &uu[0], &uu[1], &uu[2], &uu[3],
+                 &uu[4], &uu[5], &uu[6], &uu[7], &uu[8], &uu[9],
+                 &uu[10], &uu[11], &uu[12], &uu[13], &uu[14],
+                 &uu[15]);
+
+    if (ret != 16) {
+        return -1;
+    }
+    return 0;
+}
+
+/* Swap from UUID format endian (BE) to the opposite or vice versa.
+ */
+void qemu_uuid_bswap(QemuUUID *uuid)
+{
+    assert(QEMU_PTR_IS_ALIGNED(uuid, sizeof(uint32_t)));
+    bswap32s(&uuid->fields.time_low);
+    bswap16s(&uuid->fields.time_mid);
+    bswap16s(&uuid->fields.time_high_and_version);
+}
diff --git a/vl.c b/vl.c
index 2974fc73cd..63d53cfa7d 100644
--- a/vl.c
+++ b/vl.c
@@ -25,6 +25,7 @@
 #include "qemu-version.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
+#include "qemu/uuid.h"
 
 #ifdef CONFIG_SECCOMP
 #include "sysemu/seccomp.h"
@@ -166,6 +167,7 @@ int main(int argc, char **argv)
 #include "audio/audio.h"
 #include "migration/migration.h"
 #include "sysemu/cpus.h"
+#include "migration/colo.h"
 #include "sysemu/kvm.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/option.h"
@@ -185,7 +187,6 @@ int main(int argc, char **argv)
 #include "trace.h"
 #include "trace/control.h"
 #include "qemu/queue.h"
-#include "sysemu/cpus.h"
 #include "sysemu/arch_init.h"
 
 #include "ui/qemu-spice.h"
@@ -197,6 +198,7 @@ int main(int argc, char **argv)
 #include "crypto/init.h"
 #include "sysemu/replay.h"
 #include "qapi/qmp/qerror.h"
+#include "sysemu/iothread.h"
 
 #if defined(CONFIG_GNU_ARM_ECLIPSE)
 #include <strings.h>
@@ -285,10 +287,10 @@ uint8_t qemu_extra_params_fw[2];
 
 int icount_align_option;
 
-/* The bytes in qemu_uuid[] are in the order specified by RFC4122, _not_ in the
+/* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
  * little-endian "wire format" described in the SMBIOS 2.6 specification.
  */
-uint8_t qemu_uuid[16];
+QemuUUID qemu_uuid;
 bool qemu_uuid_set;
 
 static NotifierList exit_notifiers =
@@ -320,6 +322,7 @@ static struct {
     { .driver = "isa-serial",           .flag = &default_serial    },
     { .driver = "isa-parallel",         .flag = &default_parallel  },
     { .driver = "isa-fdc",              .flag = &default_floppy    },
+    { .driver = "floppy",               .flag = &default_floppy    },
     { .driver = "ide-cd",               .flag = &default_cdrom     },
     { .driver = "ide-hd",               .flag = &default_cdrom     },
     { .driver = "ide-drive",            .flag = &default_cdrom     },
@@ -610,6 +613,43 @@ static QemuOptsList qemu_fw_cfg_opts = {
     },
 };
 
+#ifdef CONFIG_LIBISCSI
+static QemuOptsList qemu_iscsi_opts = {
+    .name = "iscsi",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
+    .desc = {
+        {
+            .name = "user",
+            .type = QEMU_OPT_STRING,
+            .help = "username for CHAP authentication to target",
+        },{
+            .name = "password",
+            .type = QEMU_OPT_STRING,
+            .help = "password for CHAP authentication to target",
+        },{
+            .name = "password-secret",
+            .type = QEMU_OPT_STRING,
+            .help = "ID of the secret providing password for CHAP "
+                    "authentication to target",
+        },{
+            .name = "header-digest",
+            .type = QEMU_OPT_STRING,
+            .help = "HeaderDigest setting. "
+                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
+        },{
+            .name = "initiator-name",
+            .type = QEMU_OPT_STRING,
+            .help = "Initiator iqn name to use when connecting",
+        },{
+            .name = "timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Request timeout in seconds (default 0 = no timeout)",
+        },
+        { /* end of list */ }
+    },
+};
+#endif
+
 /**
  * Get machine options
  *
@@ -677,6 +717,7 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
     { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
     { RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
+    { RUN_STATE_INMIGRATE, RUN_STATE_COLO },
 
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
@@ -689,6 +730,7 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_PAUSED, RUN_STATE_RUNNING },
     { RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
     { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_PAUSED, RUN_STATE_COLO},
 
     { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
     { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
@@ -701,10 +743,13 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
     { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
     { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO},
 
     { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
     { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
 
+    { RUN_STATE_COLO, RUN_STATE_RUNNING },
+
     { RUN_STATE_RUNNING, RUN_STATE_DEBUG },
     { RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
     { RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
@@ -715,6 +760,7 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
     { RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
     { RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
+    { RUN_STATE_RUNNING, RUN_STATE_COLO},
 
     { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
 
@@ -727,10 +773,12 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
     { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
     { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_SUSPENDED, RUN_STATE_COLO},
 
     { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
     { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
     { RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_WATCHDOG, RUN_STATE_COLO},
 
     { RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
     { RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
@@ -849,6 +897,7 @@ void vm_start(void)
     if (runstate_is_running()) {
         qapi_event_send_stop(&error_abort);
     } else {
+        replay_enable_events();
         cpu_enable_ticks();
         runstate_set(RUN_STATE_RUNNING);
         vm_state_notify(1, RUN_STATE_RUNNING);
@@ -1797,8 +1846,12 @@ static void qemu_kill_report(void)
              */
             error_report("terminating on signal %d", shutdown_signal);
         } else {
-            error_report("terminating on signal %d from pid " FMT_pid,
-                         shutdown_signal, shutdown_pid);
+            char *shutdown_cmd = qemu_get_pid_name(shutdown_pid);
+
+            error_report("terminating on signal %d from pid " FMT_pid " (%s)",
+                         shutdown_signal, shutdown_pid,
+                         shutdown_cmd ? shutdown_cmd : "<unknown process>");
+            g_free(shutdown_cmd);
         }
         shutdown_signal = -1;
     }
@@ -1901,6 +1954,11 @@ void qemu_system_guest_panicked(void)
     }
     qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
     vm_stop(RUN_STATE_GUEST_PANICKED);
+    if (!no_shutdown) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
+                                       &error_abort);
+        qemu_system_shutdown_request();
+    }
 }
 
 void qemu_system_reset_request(void)
@@ -2156,13 +2214,14 @@ static void version(void)
 #endif
            QEMU_WORDSIZE
            "QEMU emulator version "
-           QEMU_VERSION QEMU_PKGVERSION
-           "\nCopyright (c) 2003-2008 Fabrice Bellard\n");
+           QEMU_VERSION QEMU_PKGVERSION "\n"
+           QEMU_COPYRIGHT "\n");   
     
 #else /* !defined(CONFIG_GNU_ARM_ECLIPSE) */
 
-    printf("QEMU emulator version " QEMU_VERSION QEMU_PKGVERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n");
-    
+	printf("QEMU emulator version " QEMU_VERSION QEMU_PKGVERSION "\n"
+           QEMU_COPYRIGHT "\n");   
+ 
 #endif /* defined(CONFIG_GNU_ARM_ECLIPSE) */
 }
 
@@ -2611,7 +2670,7 @@ static int chardev_init_func(void *opaque, QemuOpts *opts, Error **errp)
 {
     Error *local_err = NULL;
 
-    qemu_chr_new_from_opts(opts, NULL, &local_err);
+    qemu_chr_new_from_opts(opts, &local_err);
     if (local_err) {
         error_report_err(local_err);
         return -1;
@@ -2649,8 +2708,9 @@ static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp)
     if (qemu_opt_get_bool(opts, "pretty", 0))
         flags |= MONITOR_USE_PRETTY;
 
-    if (qemu_opt_get_bool(opts, "default", 0))
-        flags |= MONITOR_IS_DEFAULT;
+    if (qemu_opt_get_bool(opts, "default", 0)) {
+        error_report("option 'default' does nothing and is deprecated");
+    }
 
     chardev = qemu_opt_get(opts, "chardev");
     chr = qemu_chr_find(chardev);
@@ -2659,7 +2719,6 @@ static int mon_init_func(void *opaque, QemuOpts *opts, Error **errp)
         exit(1);
     }
 
-    qemu_chr_fe_claim_no_fail(chr);
     monitor_init(chr, flags);
     return 0;
 }
@@ -2670,16 +2729,12 @@ static void monitor_parse(const char *optarg, const char *mode, bool pretty)
     QemuOpts *opts;
     const char *p;
     char label[32];
-    int def = 0;
 
     if (strstart(optarg, "chardev:", &p)) {
         snprintf(label, sizeof(label), "%s", p);
     } else {
         snprintf(label, sizeof(label), "compat_monitor%d",
                  monitor_device_index);
-        if (monitor_device_index == 0) {
-            def = 1;
-        }
         opts = qemu_chr_parse_compat(label, optarg);
         if (!opts) {
             error_report("parse error: %s", optarg);
@@ -2691,8 +2746,6 @@ static void monitor_parse(const char *optarg, const char *mode, bool pretty)
     qemu_opt_set(opts, "mode", mode, &error_abort);
     qemu_opt_set(opts, "chardev", label, &error_abort);
     qemu_opt_set_bool(opts, "pretty", pretty, &error_abort);
-    if (def)
-        qemu_opt_set(opts, "default", "on", &error_abort);
     monitor_device_index++;
 }
 
@@ -2756,7 +2809,7 @@ static int serial_parse(const char *devname)
         exit(1);
     }
     snprintf(label, sizeof(label), "serial%d", index);
-    serial_hds[index] = qemu_chr_new(label, devname, NULL);
+    serial_hds[index] = qemu_chr_new(label, devname);
     if (!serial_hds[index]) {
         error_report("could not connect serial device"
                      " to character backend '%s'", devname);
@@ -2778,7 +2831,7 @@ static int parallel_parse(const char *devname)
         exit(1);
     }
     snprintf(label, sizeof(label), "parallel%d", index);
-    parallel_hds[index] = qemu_chr_new(label, devname, NULL);
+    parallel_hds[index] = qemu_chr_new(label, devname);
     if (!parallel_hds[index]) {
         error_report("could not connect parallel device"
                      " to character backend '%s'", devname);
@@ -2809,7 +2862,7 @@ static int virtcon_parse(const char *devname)
     qemu_opt_set(dev_opts, "driver", "virtconsole", &error_abort);
 
     snprintf(label, sizeof(label), "virtcon%d", index);
-    virtcon_hds[index] = qemu_chr_new(label, devname, NULL);
+    virtcon_hds[index] = qemu_chr_new(label, devname);
     if (!virtcon_hds[index]) {
         error_report("could not connect virtio console"
                      " to character backend '%s'", devname);
@@ -2842,7 +2895,7 @@ static int sclp_parse(const char *devname)
     qemu_opt_set(dev_opts, "driver", "sclpconsole", &error_abort);
 
     snprintf(label, sizeof(label), "sclpcon%d", index);
-    sclp_hds[index] = qemu_chr_new(label, devname, NULL);
+    sclp_hds[index] = qemu_chr_new(label, devname);
     if (!sclp_hds[index]) {
         error_report("could not connect sclp console"
                      " to character backend '%s'", devname);
@@ -2858,7 +2911,7 @@ static int debugcon_parse(const char *devname)
 {
     QemuOpts *opts;
 
-    if (!qemu_chr_new("debugcon", devname, NULL)) {
+    if (!qemu_chr_new("debugcon", devname)) {
         exit(1);
     }
     opts = qemu_opts_create(qemu_find_opts("device"), "debugcon", 1, NULL);
@@ -3089,17 +3142,16 @@ static int machine_set_property(void *opaque,
 {
     Object *obj = OBJECT(opaque);
     Error *local_err = NULL;
-    char *c, *qom_name;
+    char *p, *qom_name;
 
     if (strcmp(name, "type") == 0) {
         return 0;
     }
 
     qom_name = g_strdup(name);
-    c = qom_name;
-    while (*c++) {
-        if (*c == '_') {
-            *c = '-';
+    for (p = qom_name; *p; p++) {
+        if (*p == '_') {
+            *p = '-';
         }
     }
 
@@ -3135,7 +3187,22 @@ static bool object_create_initial(const char *type)
     if (g_str_equal(type, "filter-buffer") ||
         g_str_equal(type, "filter-dump") ||
         g_str_equal(type, "filter-mirror") ||
-        g_str_equal(type, "filter-redirector")) {
+        g_str_equal(type, "filter-redirector") ||
+        g_str_equal(type, "colo-compare") ||
+        g_str_equal(type, "filter-rewriter")) {
+        return false;
+    }
+
+    /* Memory allocation by backends needs to be done
+     * after configure_accelerator() (due to the tcg_enabled()
+     * checks at memory_region_init_*()).
+     *
+     * Also, allocation of large amounts of memory may delay
+     * chardev initialization for too long, and trigger timeouts
+     * on software that waits for a monitor socket to be created
+     * (e.g. libvirt).
+     */
+    if (g_str_has_prefix(type, "memory-backend-")) {
         return false;
     }
 
@@ -3312,7 +3379,10 @@ int main(int argc, char **argv, char **envp)
     semihosting.target = SEMIHOSTING_TARGET_NATIVE;
 #endif /* defined(CONFIG_GNU_ARM_ECLIPSE) */
 
-    qemu_init_cpu_loop();
+    module_call_init(MODULE_INIT_TRACE);
+
+    qemu_init_cpu_list();
+	qemu_init_cpu_loop();
     qemu_mutex_lock_iothread();
 
     atexit(qemu_run_exit_notifiers);
@@ -3336,11 +3406,13 @@ int main(int argc, char **argv, char **envp)
 #endif /* defined(CONFIG_VERBOSE) */
 
     module_call_init(MODULE_INIT_QOM);
+    module_call_init(MODULE_INIT_QAPI);
 
     qemu_add_opts(&qemu_drive_opts);
     qemu_add_drive_opts(&qemu_legacy_drive_opts);
     qemu_add_drive_opts(&qemu_common_drive_opts);
     qemu_add_drive_opts(&qemu_drive_opts);
+    qemu_add_drive_opts(&bdrv_runtime_opts);
     qemu_add_opts(&qemu_chardev_opts);
     qemu_add_opts(&qemu_device_opts);
     qemu_add_opts(&qemu_netdev_opts);
@@ -3365,6 +3437,9 @@ int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_icount_opts);
     qemu_add_opts(&qemu_semihosting_config_opts);
     qemu_add_opts(&qemu_fw_cfg_opts);
+#ifdef CONFIG_LIBISCSI
+    qemu_add_opts(&qemu_iscsi_opts);
+#endif
     module_call_init(MODULE_INIT_OPTS);
 
     runstate_init();
@@ -4148,7 +4223,7 @@ int main(int argc, char **argv, char **envp)
                 cursor_hide = 0;
                 break;
             case QEMU_OPTION_uuid:
-                if(qemu_uuid_parse(optarg, qemu_uuid) < 0) {
+                if (qemu_uuid_parse(optarg, &qemu_uuid) < 0) {
                     error_report("failed to parse UUID string: wrong format");
                     exit(1);
                 }
@@ -4443,6 +4518,11 @@ int main(int argc, char **argv, char **envp)
 
     os_daemonize();
 
+    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
+        error_report("could not acquire pid file: %s", strerror(errno));
+        exit(1);
+    }
+
     if (qemu_init_main_loop(&main_loop_err)) {
         error_report_err(main_loop_err);
         exit(1);
@@ -4489,6 +4569,16 @@ int main(int argc, char **argv, char **envp)
 
     object_property_add_child(object_get_root(), "machine",
                               OBJECT(current_machine), &error_abort);
+
+    if (machine_class->minimum_page_bits) {
+        if (!set_preferred_target_page_bits(machine_class->minimum_page_bits)) {
+            /* This would be a board error: specifying a minimum smaller than
+             * a target's compile-time fixed setting.
+             */
+            g_assert_not_reached();
+        }
+    }
+
     cpu_exec_init_all();
 
     if (machine_class->hw_version) {
@@ -4736,11 +4826,6 @@ int main(int argc, char **argv, char **envp)
     }
 #endif
 
-    if (pid_file && qemu_create_pidfile(pid_file) != 0) {
-        error_report("could not acquire pid file: %s", strerror(errno));
-        exit(1);
-    }
-
     if (qemu_opts_foreach(qemu_find_opts("device"),
                           device_help_func, NULL, NULL)) {
         exit(0);
@@ -4806,11 +4891,6 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     }
 
-    if (!linux_boot && qemu_opt_get(machine_opts, "dtb")) {
-        error_report("-dtb only allowed with -kernel option");
-        exit(1);
-    }
-
     if (semihosting_enabled() && !semihosting_get_argc() && kernel_filename) {
         /* fall back to the -kernel/-append */
         semihosting_arg_fallback(kernel_filename, kernel_cmdline);
@@ -4839,6 +4919,8 @@ int main(int argc, char **argv, char **envp)
 #endif
     }
 
+    colo_info_init();
+
     if (net_init_clients() < 0) {
         exit(1);
     }
@@ -5103,16 +5185,13 @@ int main(int argc, char **argv, char **envp)
 
     os_setup_post();
 
-    trace_init_vcpu_events();
     main_loop();
     replay_disable_events();
+    iothread_stop_all();
 
     bdrv_close_all();
     pause_all_vcpus();
     res_free();
-#ifdef CONFIG_TPM
-    tpm_cleanup();
-#endif
 
     /* vhost-user must be cleaned up before chardevs.  */
     net_cleanup();
diff --git a/xen-common.c b/xen-common.c
index e641ad1aef..909976071c 100644
--- a/xen-common.c
+++ b/xen-common.c
@@ -116,12 +116,12 @@ static int xen_init(MachineState *ms)
 {
     xen_xc = xc_interface_open(0, 0, 0);
     if (xen_xc == NULL) {
-        xen_be_printf(NULL, 0, "can't open xen interface\n");
+        xen_pv_printf(NULL, 0, "can't open xen interface\n");
         return -1;
     }
     xen_fmem = xenforeignmemory_open(0, 0);
     if (xen_fmem == NULL) {
-        xen_be_printf(NULL, 0, "can't open xen fmem interface\n");
+        xen_pv_printf(NULL, 0, "can't open xen fmem interface\n");
         xc_interface_close(xen_xc);
         return -1;
     }
diff --git a/xen-hvm.c b/xen-hvm.c
index 2f348edf86..0892361cc2 100644
--- a/xen-hvm.c
+++ b/xen-hvm.c
@@ -810,6 +810,10 @@ static void cpu_ioreq_pio(ioreq_t *req)
     trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
                          req->data, req->count, req->size);
 
+    if (req->size > sizeof(uint32_t)) {
+        hw_error("PIO: bad size (%u)", req->size);
+    }
+
     if (req->dir == IOREQ_READ) {
         if (!req->data_is_ptr) {
             req->data = do_inp(req->addr, req->size);
@@ -846,6 +850,10 @@ static void cpu_ioreq_move(ioreq_t *req)
     trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
                          req->data, req->count, req->size);
 
+    if (req->size > sizeof(req->data)) {
+        hw_error("MMIO: bad size (%u)", req->size);
+    }
+
     if (!req->data_is_ptr) {
         if (req->dir == IOREQ_READ) {
             for (i = 0; i < req->count; i++) {
@@ -987,6 +995,9 @@ static int handle_buffered_iopage(XenIOState *state)
     }
 
     memset(&req, 0x00, sizeof(req));
+    req.state = STATE_IOREQ_READY;
+    req.count = 1;
+    req.dir = IOREQ_WRITE;
 
     for (;;) {
         uint32_t rdptr = buf_page->read_pointer, wrptr;
@@ -1001,24 +1012,33 @@ static int handle_buffered_iopage(XenIOState *state)
             break;
         }
         buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
-        req.size = 1UL << buf_req->size;
-        req.count = 1;
+        req.size = 1U << buf_req->size;
         req.addr = buf_req->addr;
         req.data = buf_req->data;
-        req.state = STATE_IOREQ_READY;
-        req.dir = buf_req->dir;
-        req.df = 1;
         req.type = buf_req->type;
-        req.data_is_ptr = 0;
+        xen_rmb();
         qw = (req.size == 8);
         if (qw) {
+            if (rdptr + 1 == wrptr) {
+                hw_error("Incomplete quad word buffered ioreq");
+            }
             buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
                                            IOREQ_BUFFER_SLOT_NUM];
             req.data |= ((uint64_t)buf_req->data) << 32;
+            xen_rmb();
         }
 
         handle_ioreq(state, &req);
 
+        /* Only req.data may get updated by handle_ioreq(), albeit even that
+         * should not happen as such data would never make it to the guest (we
+         * can only usefully see writes here after all).
+         */
+        assert(req.state == STATE_IOREQ_READY);
+        assert(req.count == 1);
+        assert(req.dir == IOREQ_WRITE);
+        assert(!req.data_is_ptr);
+
         atomic_add(&buf_page->read_pointer, qw + 1);
     }
 
@@ -1045,7 +1065,11 @@ static void cpu_handle_ioreq(void *opaque)
 
     handle_buffered_iopage(state);
     if (req) {
-        handle_ioreq(state, req);
+        ioreq_t copy = *req;
+
+        xen_rmb();
+        handle_ioreq(state, &copy);
+        req->data = copy.data;
 
         if (req->state != STATE_IOREQ_INPROCESS) {
             fprintf(stderr, "Badness in I/O request ... not in service?!: "
@@ -1316,6 +1340,10 @@ void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
     }
     xen_be_register_common();
     xen_read_physmap(state);
+
+    /* Disable ACPI build because Xen handles it */
+    pcms->acpi_build_enabled = false;
+
     return;
 
 err: