diff --git a/src/conio/scale.c b/src/conio/scale.c
index 6e41edc72735307030678ed6672f53ea80ad1e30..3391ac06211d9efde8f91d24f44f9fc5deac5482 100644
--- a/src/conio/scale.c
+++ b/src/conio/scale.c
@@ -120,41 +120,44 @@ do_scale(struct rectlist* rect, int xscale, int yscale, double ratio)
 	xscale = 1;
 	total_yscaling = yscale;
 	yscale = 1;
-	if (!(cio_api.options & CONIO_OPT_BLOCKY_SCALING)) {
-		if ((total_xscaling & 1) == 1 && (total_xscaling == total_yscaling || (total_yscaling % total_xscaling == 0))) {
-			pointymult = total_xscaling;
-			total_xscaling /= pointymult;
-			xscale *= pointymult;
-			total_yscaling /= pointymult;
-			yscale *= pointymult;
-		}
-		while (total_xscaling > 1 && ((total_xscaling % 5) == 0) && ((total_yscaling % 5) == 0)) {
-			pointy5++;
-			total_xscaling /= 5;
-			xscale *= 5;
-			total_yscaling /= 5;
-			yscale *= 5;
-		}
-		while (total_xscaling > 1 && ((total_xscaling % 3) == 0) && ((total_yscaling % 3) == 0)) {
-			pointy3++;
-			total_xscaling /= 3;
-			xscale *= 3;
-			total_yscaling /= 3;
-			yscale *= 3;
-		}
-		while (total_xscaling > 1 && ((total_xscaling % 4) == 0) && ((total_yscaling % 4) == 0)) {
-			xbr4++;
-			total_xscaling /= 4;
-			xscale *= 4;
-			total_yscaling /= 4;
-			yscale *= 4;
-		}
-		while (total_xscaling > 1 && ((total_xscaling % 2) == 0) && ((total_yscaling % 2) == 0)) {
-			xbr2++;
-			total_xscaling /= 2;
-			xscale *= 2;
-			total_yscaling /= 2;
-			yscale *= 2;
+	// If x/y scaling isn't a simple multiple, block scale everything...
+	if ((total_yscaling % total_xscaling) == 0) {
+		if (!(cio_api.options & CONIO_OPT_BLOCKY_SCALING)) {
+			if ((total_xscaling & 1) == 1 && total_xscaling > 5) {
+				pointymult = total_xscaling;
+				total_xscaling /= pointymult;
+				xscale *= pointymult;
+				total_yscaling /= pointymult;
+				yscale *= pointymult;
+			}
+			while (total_xscaling > 1 && ((total_xscaling % 5) == 0) && ((total_yscaling % 5) == 0)) {
+				pointy5++;
+				total_xscaling /= 5;
+				xscale *= 5;
+				total_yscaling /= 5;
+				yscale *= 5;
+			}
+			while (total_xscaling > 1 && ((total_xscaling % 3) == 0) && ((total_yscaling % 3) == 0)) {
+				pointy3++;
+				total_xscaling /= 3;
+				xscale *= 3;
+				total_yscaling /= 3;
+				yscale *= 3;
+			}
+			while (total_xscaling > 1 && ((total_xscaling % 4) == 0) && ((total_yscaling % 4) == 0)) {
+				xbr4++;
+				total_xscaling /= 4;
+				xscale *= 4;
+				total_yscaling /= 4;
+				yscale *= 4;
+			}
+			while (total_xscaling > 1 && ((total_xscaling % 2) == 0) && ((total_yscaling % 2) == 0)) {
+				xbr2++;
+				total_xscaling /= 2;
+				xscale *= 2;
+				total_yscaling /= 2;
+				yscale *= 2;
+			}
 		}
 	}
 
@@ -215,6 +218,18 @@ do_scale(struct rectlist* rect, int xscale, int yscale, double ratio)
 	csrc->w = rect->rect.width;
 	csrc->h = rect->rect.height;
 
+#if 0
+fprintf(stderr, "Plan:\n"
+"pointymulti: %d\n"
+"pointy5:     %d\n"
+"pointy3:     %d\n"
+"xBR4:        %d\n"
+"xBR2:        %d\n"
+"Multiply:    %dx%d\n"
+"hinterp:     %zu -> %zu\n"
+"winterp:     %zu -> %zu\n",
+pointymult, pointy5, pointy3, xbr4, xbr2, xmult, ymult, csrc->h * yscale, ratio < 1 ? fheight : csrc->h * yscale, csrc->w * xscale, ratio > 1 ? fwidth : csrc->w * xscale);
+#endif
 	// And scale...
 	if (ymult != 1 || xmult != 1) {
 		multiply_scale(csrc->data, ctarget->data, csrc->w, csrc->h, xmult, ymult);
@@ -683,33 +698,29 @@ static void
 interpolate_height(uint32_t* src, uint32_t* dst, int width, int height, int newheight)
 {
 	int x, y;
+	bool em = false;
 	const double mult = (double)height / newheight;
 
 	for (y = 0; y < newheight; y++) {
-		for (x = 0; x < width; x++) {
-			// First, calculate which two pixels this is between.
-			const double ypos = mult * y;
-			const int yposi = ypos;
-			if (y == ypos) {
-				// Exact match!
-				*dst = src[width * yposi + x];
-			}
-			else {
-				const double weight = ypos - yposi;
+		const double ypos = mult * y;
+		const int yposi = ypos;
+		em = (y == ypos || yposi >= height - 1);
+		if (em) {
+			memcpy(dst, &src[yposi * width], width * sizeof(dst[0]));
+			dst += width;
+		}
+		else {
+			const double weight = ypos - yposi;
+			for (x = 0; x < width; x++) {
 				// Now pick the two pixels
 				const uint32_t pix1 = src[yposi * width + x] & 0xffffff;
-				uint32_t pix2;
-				if (yposi < height - 1)
-					pix2 = src[(yposi + 1) * width + x] & 0xffffff;
-				else
-					pix2 = src[yposi * width + x] & 0xffffff;
+				const uint32_t pix2 = src[(yposi + 1) * width + x] & 0xffffff;
 				if (pix1 == pix2)
 					*dst = pix1;
-				else {
+				else
 					*dst = blend(pix1, pix2, weight);
-				}
+				dst++;
 			}
-			dst++;
 		}
 	}
 }
@@ -717,8 +728,6 @@ interpolate_height(uint32_t* src, uint32_t* dst, int width, int height, int newh
 static void
 multiply_scale(uint32_t* src, uint32_t* dst, int width, int height, int xmult, int ymult)
 {
-	int nheight = height * ymult;
-	int nwidth = width * xmult;
 	int x, y;
 	int mx, my;
 	uint32_t* slstart;
diff --git a/src/conio/sdl_con.c b/src/conio/sdl_con.c
index 7f830966b1f40b884af654a5815da5345eeeb860..ba1842de34ca9d66cc781028cc2cc8e6283f6f6d 100644
--- a/src/conio/sdl_con.c
+++ b/src/conio/sdl_con.c
@@ -24,6 +24,7 @@
 #include "vidmodes.h"
 #define BITMAP_CIOLIB_DRIVER
 #include "bitmap_con.h"
+#include "scale.h"
 
 #include "SDL.h"
 
@@ -38,6 +39,8 @@ unsigned char		sdl_keynext=0;			/* Index into keybuf for next free position */
 
 int sdl_exitcode=0;
 
+bool internal_scaling = true;	// Protected by the win mutex
+
 SDL_Window	*win=NULL;
 SDL_Cursor	*curs=NULL;
 SDL_Renderer	*renderer=NULL;
@@ -565,12 +568,14 @@ static void setup_surfaces_locked(void)
 	vmultiplier = cvstat.vmultiplier;
 	idealh = roundl((long double)cvstat.winwidth * cvstat.scale_denominator / cvstat.scale_numerator * cvstat.scrnheight / cvstat.scrnwidth);
 	idealmh = roundl((long double)cvstat.scrnwidth * cvstat.scale_denominator / cvstat.scale_numerator * cvstat.scrnheight / cvstat.scrnwidth);
+	internal_scaling = true;
+	sdl.SetHint(SDL_HINT_RENDER_SCALE_QUALITY, "0");
 
 	if (win == NULL) {
 		// SDL2: This is slow sometimes... not sure why.
 		if (sdl.CreateWindowAndRenderer(cvstat.winwidth, idealh, flags, &win, &renderer) == 0) {
 			sdl.RenderClear(renderer);
-			newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.scrnwidth, cvstat.scrnheight);
+			newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.winwidth, idealh);
 
 			if (texture) 
 				sdl.DestroyTexture(texture);
@@ -582,9 +587,9 @@ static void setup_surfaces_locked(void)
 		}
 	}
 	else {
-		sdl.SetWindowMinimumSize(win, cvstat.scrnwidth, idealh);
+		sdl.SetWindowMinimumSize(win, cvstat.scrnwidth, idealmh);
 		sdl.SetWindowSize(win, cvstat.winwidth, idealh);
-		newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.scrnwidth, cvstat.scrnheight);
+		newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.winwidth, idealh);
 		sdl.RenderClear(renderer);
 		if (texture)
 			sdl.DestroyTexture(texture);
@@ -969,16 +974,23 @@ void sdl_video_event_thread(void *data)
 							const char *newh;
 
 							pthread_mutex_lock(&vstatlock);
-							if ((ev.window.data1 % cvstat.scrnwidth) || (ev.window.data2 % cvstat.scrnheight))
+							pthread_mutex_lock(&win_mutex);
+							if ((ev.window.data1 % cvstat.scrnwidth) && (ev.window.data2 % cvstat.scrnheight)) {
 								newh = "2";
-							else
+								internal_scaling = false;
+							}
+							else {
 								newh = "0";
-							pthread_mutex_lock(&win_mutex);
+								internal_scaling = true;
+							}
 							sdl.GetWindowSize(win, &cvstat.winwidth, &cvstat.winheight);
 							if (strcmp(newh, sdl.GetHint(SDL_HINT_RENDER_SCALE_QUALITY))) {
 								SDL_Texture *newtexture;
 								sdl.SetHint(SDL_HINT_RENDER_SCALE_QUALITY, newh);
-								newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.scrnwidth, cvstat.scrnheight);
+								if (internal_scaling)
+									newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.winwidth, cvstat.winheight);
+								else
+									newtexture = sdl.CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, cvstat.scrnwidth, cvstat.scrnheight);
 								sdl.RenderClear(renderer);
 								if (texture)
 									sdl.DestroyTexture(texture);
@@ -1025,43 +1037,78 @@ void sdl_video_event_thread(void *data)
 									int row;
 									int tw, th;
 
-									src.x = 0;
-									src.y = 0;
-									src.w = list->rect.width;
-									src.h = list->rect.height;
-									sdl.QueryTexture(texture, NULL, NULL, &tw, &th);
-									sdl.LockTexture(texture, &src, &pixels, &pitch);
-									if (pitch != list->rect.width * sizeof(list->data[0])) {
-										// If this happens, we need to copy a row at a time...
-										for (row = 0; row < list->rect.height && row < th; row++) {
-											if (pitch < list->rect.width * sizeof(list->data[0]))
-												memcpy(pixels, &list->data[list->rect.width * row], pitch);
-											else
-												memcpy(pixels, &list->data[list->rect.width * row], list->rect.width * sizeof(list->data[0]));
-											pixels = (void *)((char*)pixels + pitch);
+									if (internal_scaling) {
+										struct graphics_buffer *gb;
+										gb = do_scale(list, cvstat.winwidth / cvstat.scrnwidth, cvstat.winheight / cvstat.scrnheight,
+										    (double)cvstat.scale_numerator / cvstat.scale_denominator);
+										src.x = 0;
+										src.y = 0;
+										src.w = gb->w;
+										src.h = gb->h;
+										sdl.QueryTexture(texture, NULL, NULL, &tw, &th);
+										sdl.LockTexture(texture, &src, &pixels, &pitch);
+										if (pitch != gb->w * sizeof(gb->data[0])) {
+											// If this happens, we need to copy a row at a time...
+											for (row = 0; row < gb->h && row < th; row++) {
+												if (pitch < gb->w * sizeof(gb->data[0]))
+													memcpy(pixels, &gb->data[gb->w * row], pitch);
+												else
+													memcpy(pixels, &gb->data[gb->w * row], gb->w * sizeof(gb->data[0]));
+												pixels = (void *)((char*)pixels + pitch);
+											}
 										}
+										else {
+											int ch = gb->h;
+											if (ch > th)
+												ch = th;
+											memcpy(pixels, gb->data, gb->w * ch * sizeof(gb->data[0]));
+										}
+										sdl.UnlockTexture(texture);
+										dst.x = 0;
+										dst.y = 0;
+										dst.w = gb->w;
+										dst.h = gb->h;
+										release_buffer(gb);
 									}
 									else {
-										int ch = list->rect.height;
-										if (ch > th)
-											ch = th;
-										memcpy(pixels, list->data, list->rect.width * ch * sizeof(list->data[0]));
-									}
-									sdl.UnlockTexture(texture);
-									dst.x = 0;
-									dst.y = 0;
-									dst.w = cvstat.winwidth;
-									dst.h = cvstat.winheight;
-									// Get correct aspect ratio for dst...
-									idealw = roundl((long double)dst.h * cvstat.scale_numerator / cvstat.scale_denominator * cvstat.scrnwidth / cvstat.scrnheight);
-									idealh = roundl((long double)dst.w * cvstat.scale_denominator / cvstat.scale_numerator * cvstat.scrnheight / cvstat.scrnwidth);
-									if (idealw < cvstat.winwidth) {
-										dst.x = (cvstat.winwidth - idealw) / 2;
-										dst.w = idealw;
-									}
-									else if(idealh < cvstat.winheight) {
-										dst.y = (cvstat.winheight - idealh) / 2;
-										dst.h = idealh;
+										src.x = 0;
+										src.y = 0;
+										src.w = list->rect.width;
+										src.h = list->rect.height;
+										sdl.QueryTexture(texture, NULL, NULL, &tw, &th);
+										sdl.LockTexture(texture, &src, &pixels, &pitch);
+										if (pitch != list->rect.width * sizeof(list->data[0])) {
+											// If this happens, we need to copy a row at a time...
+											for (row = 0; row < list->rect.height && row < th; row++) {
+												if (pitch < list->rect.width * sizeof(list->data[0]))
+													memcpy(pixels, &list->data[list->rect.width * row], pitch);
+												else
+													memcpy(pixels, &list->data[list->rect.width * row], list->rect.width * sizeof(list->data[0]));
+												pixels = (void *)((char*)pixels + pitch);
+											}
+										}
+										else {
+											int ch = list->rect.height;
+											if (ch > th)
+												ch = th;
+											memcpy(pixels, list->data, list->rect.width * ch * sizeof(list->data[0]));
+										}
+										sdl.UnlockTexture(texture);
+										dst.x = 0;
+										dst.y = 0;
+										dst.w = cvstat.winwidth;
+										dst.h = cvstat.winheight;
+										// Get correct aspect ratio for dst...
+										idealw = roundl((long double)dst.h * cvstat.scale_numerator / cvstat.scale_denominator * cvstat.scrnwidth / cvstat.scrnheight);
+										idealh = roundl((long double)dst.w * cvstat.scale_denominator / cvstat.scale_numerator * cvstat.scrnheight / cvstat.scrnwidth);
+										if (idealw < cvstat.winwidth) {
+											dst.x = (cvstat.winwidth - idealw) / 2;
+											dst.w = idealw;
+										}
+										else if(idealh < cvstat.winheight) {
+											dst.y = (cvstat.winheight - idealh) / 2;
+											dst.h = idealh;
+										}
 									}
 									sdl.RenderCopy(renderer, texture, &src, &dst);
 								}
diff --git a/src/syncterm/syncterm.c b/src/syncterm/syncterm.c
index 0d64d598613d7f8bf6cbc895926f918f68872dbd..45cf98f97ee339660781e35f3599e5574eed7cf8 100644
--- a/src/syncterm/syncterm.c
+++ b/src/syncterm/syncterm.c
@@ -1816,7 +1816,7 @@ int main(int argc, char **argv)
 	// Save changed settings
 	gettextinfo(&txtinfo);
 	// Only save window info if we're in the startup mode...
-	if (txtinfo.currmode == settings.startup_mode || (settings.startup_mode == SCREEN_MODE_CURRENT && txtinfo.currmode == C80)) {
+	if (txtinfo.currmode == screen_to_ciolib(settings.startup_mode) || (settings.startup_mode == SCREEN_MODE_CURRENT && txtinfo.currmode == C80)) {
 		ww = wh = sf = -1;
 		get_window_info(&ww, &wh, NULL, NULL);
 		sf = getscaling();